diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2018-01-31 14:31:10 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2018-01-31 14:31:10 -0800 |
commit | b2fe5fa68642860e7de76167c3111623aa0d5de1 (patch) | |
tree | b7f9b89b7039ecefbc35fe3c8e73a6ff972641dd /net | |
parent | a103950e0dd2058df5e8a8d4a915707bdcf205f0 (diff) | |
parent | a54667f6728c2714a400f3c884727da74b6d1717 (diff) | |
download | lwn-b2fe5fa68642860e7de76167c3111623aa0d5de1.tar.gz lwn-b2fe5fa68642860e7de76167c3111623aa0d5de1.zip |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller:
1) Significantly shrink the core networking routing structures. Result
of http://vger.kernel.org/~davem/seoul2017_netdev_keynote.pdf
2) Add netdevsim driver for testing various offloads, from Jakub
Kicinski.
3) Support cross-chip FDB operations in DSA, from Vivien Didelot.
4) Add a 2nd listener hash table for TCP, similar to what was done for
UDP. From Martin KaFai Lau.
5) Add eBPF based queue selection to tun, from Jason Wang.
6) Lockless qdisc support, from John Fastabend.
7) SCTP stream interleave support, from Xin Long.
8) Smoother TCP receive autotuning, from Eric Dumazet.
9) Lots of erspan tunneling enhancements, from William Tu.
10) Add true function call support to BPF, from Alexei Starovoitov.
11) Add explicit support for GRO HW offloading, from Michael Chan.
12) Support extack generation in more netlink subsystems. From Alexander
Aring, Quentin Monnet, and Jakub Kicinski.
13) Add 1000BaseX, flow control, and EEE support to mvneta driver. From
Russell King.
14) Add flow table abstraction to netfilter, from Pablo Neira Ayuso.
15) Many improvements and simplifications to the NFP driver bpf JIT,
from Jakub Kicinski.
16) Support for ipv6 non-equal cost multipath routing, from Ido
Schimmel.
17) Add resource abstration to devlink, from Arkadi Sharshevsky.
18) Packet scheduler classifier shared filter block support, from Jiri
Pirko.
19) Avoid locking in act_csum, from Davide Caratti.
20) devinet_ioctl() simplifications from Al viro.
21) More TCP bpf improvements from Lawrence Brakmo.
22) Add support for onlink ipv6 route flag, similar to ipv4, from David
Ahern.
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1925 commits)
tls: Add support for encryption using async offload accelerator
ip6mr: fix stale iterator
net/sched: kconfig: Remove blank help texts
openvswitch: meter: Use 64-bit arithmetic instead of 32-bit
tcp_nv: fix potential integer overflow in tcpnv_acked
r8169: fix RTL8168EP take too long to complete driver initialization.
qmi_wwan: Add support for Quectel EP06
rtnetlink: enable IFLA_IF_NETNSID for RTM_NEWLINK
ipmr: Fix ptrdiff_t print formatting
ibmvnic: Wait for device response when changing MAC
qlcnic: fix deadlock bug
tcp: release sk_frag.page in tcp_disconnect
ipv4: Get the address of interface correctly.
net_sched: gen_estimator: fix lockdep splat
net: macb: Handle HRESP error
net/mlx5e: IPoIB, Fix copy-paste bug in flow steering refactoring
ipv6: addrconf: break critical section in addrconf_verify_rtnl()
ipv6: change route cache aging logic
i40e/i40evf: Update DESC_NEEDED value to reflect larger value
bnxt_en: cleanup DIM work on device shutdown
...
Diffstat (limited to 'net')
500 files changed, 17287 insertions, 10820 deletions
diff --git a/net/8021q/vlanproc.c b/net/8021q/vlanproc.c index 5f1446c9f098..a662ccc166df 100644 --- a/net/8021q/vlanproc.c +++ b/net/8021q/vlanproc.c @@ -80,7 +80,6 @@ static int vlan_seq_open(struct inode *inode, struct file *file) } static const struct file_operations vlan_fops = { - .owner = THIS_MODULE, .open = vlan_seq_open, .read = seq_read, .llseek = seq_lseek, @@ -97,7 +96,6 @@ static int vlandev_seq_open(struct inode *inode, struct file *file) } static const struct file_operations vlandev_fops = { - .owner = THIS_MODULE, .open = vlandev_seq_open, .read = seq_read, .llseek = seq_lseek, diff --git a/net/Kconfig b/net/Kconfig index 9dba2715919d..37ec8e67af57 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -182,6 +182,7 @@ config BRIDGE_NETFILTER depends on BRIDGE depends on NETFILTER && INET depends on NETFILTER_ADVANCED + select NETFILTER_FAMILY_BRIDGE default m ---help--- Enabling this option will let arptables resp. iptables see bridged @@ -336,23 +337,6 @@ config NET_PKTGEN To compile this code as a module, choose M here: the module will be called pktgen. -config NET_TCPPROBE - tristate "TCP connection probing" - depends on INET && PROC_FS && KPROBES - ---help--- - This module allows for capturing the changes to TCP connection - state in response to incoming packets. It is used for debugging - TCP congestion avoidance modules. If you don't understand - what was just said, you don't need it: say N. - - Documentation on how to use TCP connection probing can be found - at: - - http://www.linuxfoundation.org/collaborate/workgroups/networking/tcpprobe - - To compile this code as a module, choose M here: the - module will be called tcp_probe. - config NET_DROP_MONITOR tristate "Network packet drop alerting service" depends on INET && TRACEPOINTS diff --git a/net/appletalk/aarp.c b/net/appletalk/aarp.c index 309d7dbb36e8..d4c1021e74e1 100644 --- a/net/appletalk/aarp.c +++ b/net/appletalk/aarp.c @@ -1047,7 +1047,6 @@ static int aarp_seq_open(struct inode *inode, struct file *file) } const struct file_operations atalk_seq_arp_fops = { - .owner = THIS_MODULE, .open = aarp_seq_open, .read = seq_read, .llseek = seq_lseek, diff --git a/net/appletalk/atalk_proc.c b/net/appletalk/atalk_proc.c index af46bc49e1e9..a3bf9d519193 100644 --- a/net/appletalk/atalk_proc.c +++ b/net/appletalk/atalk_proc.c @@ -226,7 +226,6 @@ static int atalk_seq_socket_open(struct inode *inode, struct file *file) } static const struct file_operations atalk_seq_interface_fops = { - .owner = THIS_MODULE, .open = atalk_seq_interface_open, .read = seq_read, .llseek = seq_lseek, @@ -234,7 +233,6 @@ static const struct file_operations atalk_seq_interface_fops = { }; static const struct file_operations atalk_seq_route_fops = { - .owner = THIS_MODULE, .open = atalk_seq_route_open, .read = seq_read, .llseek = seq_lseek, @@ -242,7 +240,6 @@ static const struct file_operations atalk_seq_route_fops = { }; static const struct file_operations atalk_seq_socket_fops = { - .owner = THIS_MODULE, .open = atalk_seq_socket_open, .read = seq_read, .llseek = seq_lseek, diff --git a/net/atm/br2684.c b/net/atm/br2684.c index 4e111196f902..fd94bea36ee8 100644 --- a/net/atm/br2684.c +++ b/net/atm/br2684.c @@ -824,7 +824,6 @@ static int br2684_proc_open(struct inode *inode, struct file *file) } static const struct file_operations br2684_proc_ops = { - .owner = THIS_MODULE, .open = br2684_proc_open, .read = seq_read, .llseek = seq_lseek, diff --git a/net/atm/common.c b/net/atm/common.c index 8f12f1c6fa14..6523f38c4957 100644 --- a/net/atm/common.c +++ b/net/atm/common.c @@ -14,7 +14,7 @@ #include <linux/capability.h> #include <linux/mm.h> #include <linux/sched/signal.h> -#include <linux/time.h> /* struct timeval */ +#include <linux/time64.h> /* 64-bit time for seconds */ #include <linux/skbuff.h> #include <linux/bitops.h> #include <linux/init.h> diff --git a/net/atm/lec.c b/net/atm/lec.c index 6676e3433261..09a1f056712a 100644 --- a/net/atm/lec.c +++ b/net/atm/lec.c @@ -992,7 +992,6 @@ static int lec_seq_open(struct inode *inode, struct file *file) } static const struct file_operations lec_seq_fops = { - .owner = THIS_MODULE, .open = lec_seq_open, .read = seq_read, .llseek = seq_lseek, diff --git a/net/atm/mpc.c b/net/atm/mpc.c index 7c6a1cc760a2..31e0dcb970f8 100644 --- a/net/atm/mpc.c +++ b/net/atm/mpc.c @@ -1089,7 +1089,7 @@ static void MPOA_trigger_rcvd(struct k_message *msg, struct mpoa_client *mpc) msg->type = SND_MPOA_RES_RQST; msg->content.in_info = entry->ctrl_info; msg_to_mpoad(msg, mpc); - do_gettimeofday(&(entry->reply_wait)); + entry->reply_wait = ktime_get_seconds(); mpc->in_ops->put(entry); return; } @@ -1099,7 +1099,7 @@ static void MPOA_trigger_rcvd(struct k_message *msg, struct mpoa_client *mpc) msg->type = SND_MPOA_RES_RQST; msg->content.in_info = entry->ctrl_info; msg_to_mpoad(msg, mpc); - do_gettimeofday(&(entry->reply_wait)); + entry->reply_wait = ktime_get_seconds(); mpc->in_ops->put(entry); return; } @@ -1175,8 +1175,9 @@ static void MPOA_res_reply_rcvd(struct k_message *msg, struct mpoa_client *mpc) } entry->ctrl_info = msg->content.in_info; - do_gettimeofday(&(entry->tv)); - do_gettimeofday(&(entry->reply_wait)); /* Used in refreshing func from now on */ + entry->time = ktime_get_seconds(); + /* Used in refreshing func from now on */ + entry->reply_wait = ktime_get_seconds(); entry->refresh_time = 0; ddprintk_cont("entry->shortcut = %p\n", entry->shortcut); diff --git a/net/atm/mpoa_caches.c b/net/atm/mpoa_caches.c index e01450bb32d6..4bb418313720 100644 --- a/net/atm/mpoa_caches.c +++ b/net/atm/mpoa_caches.c @@ -117,7 +117,7 @@ static in_cache_entry *in_cache_add_entry(__be32 dst_ip, memcpy(entry->MPS_ctrl_ATM_addr, client->mps_ctrl_addr, ATM_ESA_LEN); entry->ctrl_info.in_dst_ip = dst_ip; - do_gettimeofday(&(entry->tv)); + entry->time = ktime_get_seconds(); entry->retry_time = client->parameters.mpc_p4; entry->count = 1; entry->entry_state = INGRESS_INVALID; @@ -148,7 +148,7 @@ static int cache_hit(in_cache_entry *entry, struct mpoa_client *mpc) if (qos != NULL) msg.qos = qos->qos; msg_to_mpoad(&msg, mpc); - do_gettimeofday(&(entry->reply_wait)); + entry->reply_wait = ktime_get_seconds(); entry->entry_state = INGRESS_RESOLVING; } if (entry->shortcut != NULL) @@ -171,7 +171,7 @@ static int cache_hit(in_cache_entry *entry, struct mpoa_client *mpc) if (qos != NULL) msg.qos = qos->qos; msg_to_mpoad(&msg, mpc); - do_gettimeofday(&(entry->reply_wait)); + entry->reply_wait = ktime_get_seconds(); } return CLOSED; @@ -227,17 +227,16 @@ static void in_cache_remove_entry(in_cache_entry *entry, static void clear_count_and_expired(struct mpoa_client *client) { in_cache_entry *entry, *next_entry; - struct timeval now; + time64_t now; - do_gettimeofday(&now); + now = ktime_get_seconds(); write_lock_bh(&client->ingress_lock); entry = client->in_cache; while (entry != NULL) { entry->count = 0; next_entry = entry->next; - if ((now.tv_sec - entry->tv.tv_sec) - > entry->ctrl_info.holding_time) { + if ((now - entry->time) > entry->ctrl_info.holding_time) { dprintk("holding time expired, ip = %pI4\n", &entry->ctrl_info.in_dst_ip); client->in_ops->remove_entry(entry, client); @@ -253,35 +252,35 @@ static void check_resolving_entries(struct mpoa_client *client) struct atm_mpoa_qos *qos; in_cache_entry *entry; - struct timeval now; + time64_t now; struct k_message msg; - do_gettimeofday(&now); + now = ktime_get_seconds(); read_lock_bh(&client->ingress_lock); entry = client->in_cache; while (entry != NULL) { if (entry->entry_state == INGRESS_RESOLVING) { - if ((now.tv_sec - entry->hold_down.tv_sec) < - client->parameters.mpc_p6) { + + if ((now - entry->hold_down) + < client->parameters.mpc_p6) { entry = entry->next; /* Entry in hold down */ continue; } - if ((now.tv_sec - entry->reply_wait.tv_sec) > - entry->retry_time) { + if ((now - entry->reply_wait) > entry->retry_time) { entry->retry_time = MPC_C1 * (entry->retry_time); /* * Retry time maximum exceeded, * put entry in hold down. */ if (entry->retry_time > client->parameters.mpc_p5) { - do_gettimeofday(&(entry->hold_down)); + entry->hold_down = ktime_get_seconds(); entry->retry_time = client->parameters.mpc_p4; entry = entry->next; continue; } /* Ask daemon to send a resolution request. */ - memset(&(entry->hold_down), 0, sizeof(struct timeval)); + memset(&entry->hold_down, 0, sizeof(time64_t)); msg.type = SND_MPOA_RES_RTRY; memcpy(msg.MPS_ctrl, client->mps_ctrl_addr, ATM_ESA_LEN); msg.content.in_info = entry->ctrl_info; @@ -289,7 +288,7 @@ static void check_resolving_entries(struct mpoa_client *client) if (qos != NULL) msg.qos = qos->qos; msg_to_mpoad(&msg, client); - do_gettimeofday(&(entry->reply_wait)); + entry->reply_wait = ktime_get_seconds(); } } entry = entry->next; @@ -300,18 +299,18 @@ static void check_resolving_entries(struct mpoa_client *client) /* Call this every MPC-p5 seconds. */ static void refresh_entries(struct mpoa_client *client) { - struct timeval now; + time64_t now; struct in_cache_entry *entry = client->in_cache; ddprintk("refresh_entries\n"); - do_gettimeofday(&now); + now = ktime_get_seconds(); read_lock_bh(&client->ingress_lock); while (entry != NULL) { if (entry->entry_state == INGRESS_RESOLVED) { if (!(entry->refresh_time)) entry->refresh_time = (2 * (entry->ctrl_info.holding_time))/3; - if ((now.tv_sec - entry->reply_wait.tv_sec) > + if ((now - entry->reply_wait) > entry->refresh_time) { dprintk("refreshing an entry.\n"); entry->entry_state = INGRESS_REFRESHING; @@ -480,7 +479,7 @@ static eg_cache_entry *eg_cache_add_entry(struct k_message *msg, memcpy(entry->MPS_ctrl_ATM_addr, client->mps_ctrl_addr, ATM_ESA_LEN); entry->ctrl_info = msg->content.eg_info; - do_gettimeofday(&(entry->tv)); + entry->time = ktime_get_seconds(); entry->entry_state = EGRESS_RESOLVED; dprintk("new_eg_cache_entry cache_id %u\n", ntohl(entry->ctrl_info.cache_id)); @@ -495,7 +494,7 @@ static eg_cache_entry *eg_cache_add_entry(struct k_message *msg, static void update_eg_cache_entry(eg_cache_entry *entry, uint16_t holding_time) { - do_gettimeofday(&(entry->tv)); + entry->time = ktime_get_seconds(); entry->entry_state = EGRESS_RESOLVED; entry->ctrl_info.holding_time = holding_time; } @@ -503,17 +502,16 @@ static void update_eg_cache_entry(eg_cache_entry *entry, uint16_t holding_time) static void clear_expired(struct mpoa_client *client) { eg_cache_entry *entry, *next_entry; - struct timeval now; + time64_t now; struct k_message msg; - do_gettimeofday(&now); + now = ktime_get_seconds(); write_lock_irq(&client->egress_lock); entry = client->eg_cache; while (entry != NULL) { next_entry = entry->next; - if ((now.tv_sec - entry->tv.tv_sec) - > entry->ctrl_info.holding_time) { + if ((now - entry->time) > entry->ctrl_info.holding_time) { msg.type = SND_EGRESS_PURGE; msg.content.eg_info = entry->ctrl_info; dprintk("egress_cache: holding time expired, cache_id = %u.\n", diff --git a/net/atm/mpoa_caches.h b/net/atm/mpoa_caches.h index 6a266669ebf4..464c4c7f8d1f 100644 --- a/net/atm/mpoa_caches.h +++ b/net/atm/mpoa_caches.h @@ -2,6 +2,7 @@ #ifndef MPOA_CACHES_H #define MPOA_CACHES_H +#include <linux/time64.h> #include <linux/netdevice.h> #include <linux/types.h> #include <linux/atm.h> @@ -16,9 +17,9 @@ void atm_mpoa_init_cache(struct mpoa_client *mpc); typedef struct in_cache_entry { struct in_cache_entry *next; struct in_cache_entry *prev; - struct timeval tv; - struct timeval reply_wait; - struct timeval hold_down; + time64_t time; + time64_t reply_wait; + time64_t hold_down; uint32_t packets_fwded; uint16_t entry_state; uint32_t retry_time; @@ -53,7 +54,7 @@ struct in_cache_ops{ typedef struct eg_cache_entry{ struct eg_cache_entry *next; struct eg_cache_entry *prev; - struct timeval tv; + time64_t time; uint8_t MPS_ctrl_ATM_addr[ATM_ESA_LEN]; struct atm_vcc *shortcut; uint32_t packets_rcvd; diff --git a/net/atm/mpoa_proc.c b/net/atm/mpoa_proc.c index 8a0c17e1c203..b93cc0f18292 100644 --- a/net/atm/mpoa_proc.c +++ b/net/atm/mpoa_proc.c @@ -8,7 +8,7 @@ #include <linux/mm.h> #include <linux/module.h> #include <linux/proc_fs.h> -#include <linux/time.h> +#include <linux/ktime.h> #include <linux/seq_file.h> #include <linux/uaccess.h> #include <linux/atmmpc.h> @@ -57,7 +57,6 @@ static int parse_qos(const char *buff); * Define allowed FILE OPERATIONS */ static const struct file_operations mpc_file_operations = { - .owner = THIS_MODULE, .open = proc_mpc_open, .read = seq_read, .llseek = seq_lseek, @@ -138,7 +137,7 @@ static int mpc_show(struct seq_file *m, void *v) int i; in_cache_entry *in_entry; eg_cache_entry *eg_entry; - struct timeval now; + time64_t now; unsigned char ip_string[16]; if (v == SEQ_START_TOKEN) { @@ -148,15 +147,17 @@ static int mpc_show(struct seq_file *m, void *v) seq_printf(m, "\nInterface %d:\n\n", mpc->dev_num); seq_printf(m, "Ingress Entries:\nIP address State Holding time Packets fwded VPI VCI\n"); - do_gettimeofday(&now); + now = ktime_get_seconds(); for (in_entry = mpc->in_cache; in_entry; in_entry = in_entry->next) { + unsigned long seconds_delta = now - in_entry->time; + sprintf(ip_string, "%pI4", &in_entry->ctrl_info.in_dst_ip); seq_printf(m, "%-16s%s%-14lu%-12u", ip_string, ingress_state_string(in_entry->entry_state), in_entry->ctrl_info.holding_time - - (now.tv_sec-in_entry->tv.tv_sec), + seconds_delta, in_entry->packets_fwded); if (in_entry->shortcut) seq_printf(m, " %-3d %-3d", @@ -169,13 +170,14 @@ static int mpc_show(struct seq_file *m, void *v) seq_printf(m, "Egress Entries:\nIngress MPC ATM addr\nCache-id State Holding time Packets recvd Latest IP addr VPI VCI\n"); for (eg_entry = mpc->eg_cache; eg_entry; eg_entry = eg_entry->next) { unsigned char *p = eg_entry->ctrl_info.in_MPC_data_ATM_addr; + unsigned long seconds_delta = now - eg_entry->time; + for (i = 0; i < ATM_ESA_LEN; i++) seq_printf(m, "%02x", p[i]); seq_printf(m, "\n%-16lu%s%-14lu%-15u", (unsigned long)ntohl(eg_entry->ctrl_info.cache_id), egress_state_string(eg_entry->entry_state), - (eg_entry->ctrl_info.holding_time - - (now.tv_sec-eg_entry->tv.tv_sec)), + (eg_entry->ctrl_info.holding_time - seconds_delta), eg_entry->packets_rcvd); /* latest IP address */ diff --git a/net/atm/proc.c b/net/atm/proc.c index 642f9272ab95..edc48edc95c1 100644 --- a/net/atm/proc.c +++ b/net/atm/proc.c @@ -37,7 +37,6 @@ static ssize_t proc_dev_atm_read(struct file *file, char __user *buf, size_t count, loff_t *pos); static const struct file_operations proc_atm_dev_ops = { - .owner = THIS_MODULE, .read = proc_dev_atm_read, .llseek = noop_llseek, }; diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index 06eac1f50c5e..47fdd399626b 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -1931,7 +1931,6 @@ static int ax25_info_open(struct inode *inode, struct file *file) } static const struct file_operations ax25_info_fops = { - .owner = THIS_MODULE, .open = ax25_info_open, .read = seq_read, .llseek = seq_lseek, diff --git a/net/ax25/ax25_route.c b/net/ax25/ax25_route.c index 0446b892618a..525558972fd9 100644 --- a/net/ax25/ax25_route.c +++ b/net/ax25/ax25_route.c @@ -336,7 +336,6 @@ static int ax25_rt_info_open(struct inode *inode, struct file *file) } const struct file_operations ax25_route_fops = { - .owner = THIS_MODULE, .open = ax25_rt_info_open, .read = seq_read, .llseek = seq_lseek, diff --git a/net/ax25/ax25_uid.c b/net/ax25/ax25_uid.c index 83b035f56202..4ebe91ba317a 100644 --- a/net/ax25/ax25_uid.c +++ b/net/ax25/ax25_uid.c @@ -194,7 +194,6 @@ static int ax25_uid_info_open(struct inode *inode, struct file *file) } const struct file_operations ax25_uid_fops = { - .owner = THIS_MODULE, .open = ax25_uid_info_open, .read = seq_read, .llseek = seq_lseek, diff --git a/net/batman-adv/Kconfig b/net/batman-adv/Kconfig index b73b96a2854b..c44f6515be5e 100644 --- a/net/batman-adv/Kconfig +++ b/net/batman-adv/Kconfig @@ -1,3 +1,20 @@ +# SPDX-License-Identifier: GPL-2.0 +# Copyright (C) 2007-2017 B.A.T.M.A.N. contributors: +# +# Marek Lindner, Simon Wunderlich +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of version 2 of the GNU General Public +# License as published by the Free Software Foundation. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, see <http://www.gnu.org/licenses/>. + # # B.A.T.M.A.N meshing protocol # diff --git a/net/batman-adv/Makefile b/net/batman-adv/Makefile index 915987bc6d29..022f6e77307b 100644 --- a/net/batman-adv/Makefile +++ b/net/batman-adv/Makefile @@ -1,4 +1,4 @@ -# +# SPDX-License-Identifier: GPL-2.0 # Copyright (C) 2007-2017 B.A.T.M.A.N. contributors: # # Marek Lindner, Simon Wunderlich diff --git a/net/batman-adv/bat_algo.c b/net/batman-adv/bat_algo.c index 44fd073b7546..80c72c7d3cad 100644 --- a/net/batman-adv/bat_algo.c +++ b/net/batman-adv/bat_algo.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich @@ -37,7 +38,8 @@ char batadv_routing_algo[20] = "BATMAN_IV"; static struct hlist_head batadv_algo_list; /** - * batadv_algo_init - Initialize batman-adv algorithm management data structures + * batadv_algo_init() - Initialize batman-adv algorithm management data + * structures */ void batadv_algo_init(void) { @@ -59,6 +61,12 @@ static struct batadv_algo_ops *batadv_algo_get(char *name) return bat_algo_ops; } +/** + * batadv_algo_register() - Register callbacks for a mesh algorithm + * @bat_algo_ops: mesh algorithm callbacks to add + * + * Return: 0 on success or negative error number in case of failure + */ int batadv_algo_register(struct batadv_algo_ops *bat_algo_ops) { struct batadv_algo_ops *bat_algo_ops_tmp; @@ -88,6 +96,19 @@ int batadv_algo_register(struct batadv_algo_ops *bat_algo_ops) return 0; } +/** + * batadv_algo_select() - Select algorithm of soft interface + * @bat_priv: the bat priv with all the soft interface information + * @name: name of the algorithm to select + * + * The algorithm callbacks for the soft interface will be set when the algorithm + * with the correct name was found. Any previous selected algorithm will not be + * deinitialized and the new selected algorithm will also not be initialized. + * It is therefore not allowed to call batadv_algo_select outside the creation + * function of the soft interface. + * + * Return: 0 on success or negative error number in case of failure + */ int batadv_algo_select(struct batadv_priv *bat_priv, char *name) { struct batadv_algo_ops *bat_algo_ops; @@ -102,6 +123,14 @@ int batadv_algo_select(struct batadv_priv *bat_priv, char *name) } #ifdef CONFIG_BATMAN_ADV_DEBUGFS + +/** + * batadv_algo_seq_print_text() - Print the supported algorithms in a seq file + * @seq: seq file to print on + * @offset: not used + * + * Return: always 0 + */ int batadv_algo_seq_print_text(struct seq_file *seq, void *offset) { struct batadv_algo_ops *bat_algo_ops; @@ -148,7 +177,7 @@ module_param_cb(routing_algo, &batadv_param_ops_ra, &batadv_param_string_ra, 0644); /** - * batadv_algo_dump_entry - fill in information about one supported routing + * batadv_algo_dump_entry() - fill in information about one supported routing * algorithm * @msg: netlink message to be sent back * @portid: Port to reply to @@ -179,7 +208,7 @@ static int batadv_algo_dump_entry(struct sk_buff *msg, u32 portid, u32 seq, } /** - * batadv_algo_dump - fill in information about supported routing + * batadv_algo_dump() - fill in information about supported routing * algorithms * @msg: netlink message to be sent back * @cb: Parameters to the netlink request diff --git a/net/batman-adv/bat_algo.h b/net/batman-adv/bat_algo.h index 29f6312f9bf1..029221615ba3 100644 --- a/net/batman-adv/bat_algo.h +++ b/net/batman-adv/bat_algo.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* Copyright (C) 2011-2017 B.A.T.M.A.N. contributors: * * Marek Lindner, Linus Lüssing diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index bbe8414b6ee7..79e326383726 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich @@ -26,7 +27,7 @@ #include <linux/cache.h> #include <linux/errno.h> #include <linux/etherdevice.h> -#include <linux/fs.h> +#include <linux/gfp.h> #include <linux/if_ether.h> #include <linux/init.h> #include <linux/jiffies.h> @@ -51,6 +52,7 @@ #include <linux/workqueue.h> #include <net/genetlink.h> #include <net/netlink.h> +#include <uapi/linux/batadv_packet.h> #include <uapi/linux/batman_adv.h> #include "bat_algo.h" @@ -62,7 +64,6 @@ #include "netlink.h" #include "network-coding.h" #include "originator.h" -#include "packet.h" #include "routing.h" #include "send.h" #include "translation-table.h" @@ -72,21 +73,28 @@ static void batadv_iv_send_outstanding_bat_ogm_packet(struct work_struct *work); /** * enum batadv_dup_status - duplicate status - * @BATADV_NO_DUP: the packet is no duplicate - * @BATADV_ORIG_DUP: OGM is a duplicate in the originator (but not for the - * neighbor) - * @BATADV_NEIGH_DUP: OGM is a duplicate for the neighbor - * @BATADV_PROTECTED: originator is currently protected (after reboot) */ enum batadv_dup_status { + /** @BATADV_NO_DUP: the packet is no duplicate */ BATADV_NO_DUP = 0, + + /** + * @BATADV_ORIG_DUP: OGM is a duplicate in the originator (but not for + * the neighbor) + */ BATADV_ORIG_DUP, + + /** @BATADV_NEIGH_DUP: OGM is a duplicate for the neighbor */ BATADV_NEIGH_DUP, + + /** + * @BATADV_PROTECTED: originator is currently protected (after reboot) + */ BATADV_PROTECTED, }; /** - * batadv_ring_buffer_set - update the ring buffer with the given value + * batadv_ring_buffer_set() - update the ring buffer with the given value * @lq_recv: pointer to the ring buffer * @lq_index: index to store the value at * @value: value to store in the ring buffer @@ -98,7 +106,7 @@ static void batadv_ring_buffer_set(u8 lq_recv[], u8 *lq_index, u8 value) } /** - * batadv_ring_buffer_avg - compute the average of all non-zero values stored + * batadv_ring_buffer_avg() - compute the average of all non-zero values stored * in the given ring buffer * @lq_recv: pointer to the ring buffer * @@ -130,7 +138,7 @@ static u8 batadv_ring_buffer_avg(const u8 lq_recv[]) } /** - * batadv_iv_ogm_orig_free - free the private resources allocated for this + * batadv_iv_ogm_orig_free() - free the private resources allocated for this * orig_node * @orig_node: the orig_node for which the resources have to be free'd */ @@ -141,8 +149,8 @@ static void batadv_iv_ogm_orig_free(struct batadv_orig_node *orig_node) } /** - * batadv_iv_ogm_orig_add_if - change the private structures of the orig_node to - * include the new hard-interface + * batadv_iv_ogm_orig_add_if() - change the private structures of the orig_node + * to include the new hard-interface * @orig_node: the orig_node that has to be changed * @max_if_num: the current amount of interfaces * @@ -186,7 +194,7 @@ unlock: } /** - * batadv_iv_ogm_drop_bcast_own_entry - drop section of bcast_own + * batadv_iv_ogm_drop_bcast_own_entry() - drop section of bcast_own * @orig_node: the orig_node that has to be changed * @max_if_num: the current amount of interfaces * @del_if_num: the index of the interface being removed @@ -224,7 +232,7 @@ batadv_iv_ogm_drop_bcast_own_entry(struct batadv_orig_node *orig_node, } /** - * batadv_iv_ogm_drop_bcast_own_sum_entry - drop section of bcast_own_sum + * batadv_iv_ogm_drop_bcast_own_sum_entry() - drop section of bcast_own_sum * @orig_node: the orig_node that has to be changed * @max_if_num: the current amount of interfaces * @del_if_num: the index of the interface being removed @@ -259,8 +267,8 @@ batadv_iv_ogm_drop_bcast_own_sum_entry(struct batadv_orig_node *orig_node, } /** - * batadv_iv_ogm_orig_del_if - change the private structures of the orig_node to - * exclude the removed interface + * batadv_iv_ogm_orig_del_if() - change the private structures of the orig_node + * to exclude the removed interface * @orig_node: the orig_node that has to be changed * @max_if_num: the current amount of interfaces * @del_if_num: the index of the interface being removed @@ -290,7 +298,8 @@ static int batadv_iv_ogm_orig_del_if(struct batadv_orig_node *orig_node, } /** - * batadv_iv_ogm_orig_get - retrieve or create (if does not exist) an originator + * batadv_iv_ogm_orig_get() - retrieve or create (if does not exist) an + * originator * @bat_priv: the bat priv with all the soft interface information * @addr: mac address of the originator * @@ -447,7 +456,7 @@ static u8 batadv_hop_penalty(u8 tq, const struct batadv_priv *bat_priv) } /** - * batadv_iv_ogm_aggr_packet - checks if there is another OGM attached + * batadv_iv_ogm_aggr_packet() - checks if there is another OGM attached * @buff_pos: current position in the skb * @packet_len: total length of the skb * @tvlv_len: tvlv length of the previously considered OGM @@ -557,7 +566,7 @@ static void batadv_iv_ogm_emit(struct batadv_forw_packet *forw_packet) } /** - * batadv_iv_ogm_can_aggregate - find out if an OGM can be aggregated on an + * batadv_iv_ogm_can_aggregate() - find out if an OGM can be aggregated on an * existing forward packet * @new_bat_ogm_packet: OGM packet to be aggregated * @bat_priv: the bat priv with all the soft interface information @@ -660,7 +669,7 @@ out: } /** - * batadv_iv_ogm_aggregate_new - create a new aggregated packet and add this + * batadv_iv_ogm_aggregate_new() - create a new aggregated packet and add this * packet to it. * @packet_buff: pointer to the OGM * @packet_len: (total) length of the OGM @@ -743,7 +752,7 @@ static void batadv_iv_ogm_aggregate(struct batadv_forw_packet *forw_packet_aggr, } /** - * batadv_iv_ogm_queue_add - queue up an OGM for transmission + * batadv_iv_ogm_queue_add() - queue up an OGM for transmission * @bat_priv: the bat priv with all the soft interface information * @packet_buff: pointer to the OGM * @packet_len: (total) length of the OGM @@ -869,8 +878,8 @@ static void batadv_iv_ogm_forward(struct batadv_orig_node *orig_node, } /** - * batadv_iv_ogm_slide_own_bcast_window - bitshift own OGM broadcast windows for - * the given interface + * batadv_iv_ogm_slide_own_bcast_window() - bitshift own OGM broadcast windows + * for the given interface * @hard_iface: the interface for which the windows have to be shifted */ static void @@ -987,7 +996,7 @@ out: } /** - * batadv_iv_ogm_orig_update - use OGM to update corresponding data in an + * batadv_iv_ogm_orig_update() - use OGM to update corresponding data in an * originator * @bat_priv: the bat priv with all the soft interface information * @orig_node: the orig node who originally emitted the ogm packet @@ -1152,7 +1161,7 @@ out: } /** - * batadv_iv_ogm_calc_tq - calculate tq for current received ogm packet + * batadv_iv_ogm_calc_tq() - calculate tq for current received ogm packet * @orig_node: the orig node who originally emitted the ogm packet * @orig_neigh_node: the orig node struct of the neighbor who sent the packet * @batadv_ogm_packet: the ogm packet @@ -1298,7 +1307,7 @@ out: } /** - * batadv_iv_ogm_update_seqnos - process a batman packet for all interfaces, + * batadv_iv_ogm_update_seqnos() - process a batman packet for all interfaces, * adjust the sequence number and find out whether it is a duplicate * @ethhdr: ethernet header of the packet * @batadv_ogm_packet: OGM packet to be considered @@ -1401,7 +1410,8 @@ out: } /** - * batadv_iv_ogm_process_per_outif - process a batman iv OGM for an outgoing if + * batadv_iv_ogm_process_per_outif() - process a batman iv OGM for an outgoing + * interface * @skb: the skb containing the OGM * @ogm_offset: offset from skb->data to start of ogm header * @orig_node: the (cached) orig node for the originator of this OGM @@ -1608,7 +1618,7 @@ out: } /** - * batadv_iv_ogm_process - process an incoming batman iv OGM + * batadv_iv_ogm_process() - process an incoming batman iv OGM * @skb: the skb containing the OGM * @ogm_offset: offset to the OGM which should be processed (for aggregates) * @if_incoming: the interface where this packet was receved @@ -1861,7 +1871,7 @@ free_skb: #ifdef CONFIG_BATMAN_ADV_DEBUGFS /** - * batadv_iv_ogm_orig_print_neigh - print neighbors for the originator table + * batadv_iv_ogm_orig_print_neigh() - print neighbors for the originator table * @orig_node: the orig_node for which the neighbors are printed * @if_outgoing: outgoing interface for these entries * @seq: debugfs table seq_file struct @@ -1890,7 +1900,7 @@ batadv_iv_ogm_orig_print_neigh(struct batadv_orig_node *orig_node, } /** - * batadv_iv_ogm_orig_print - print the originator table + * batadv_iv_ogm_orig_print() - print the originator table * @bat_priv: the bat priv with all the soft interface information * @seq: debugfs table seq_file struct * @if_outgoing: the outgoing interface for which this should be printed @@ -1960,7 +1970,7 @@ next: #endif /** - * batadv_iv_ogm_neigh_get_tq_avg - Get the TQ average for a neighbour on a + * batadv_iv_ogm_neigh_get_tq_avg() - Get the TQ average for a neighbour on a * given outgoing interface. * @neigh_node: Neighbour of interest * @if_outgoing: Outgoing interface of interest @@ -1986,7 +1996,7 @@ batadv_iv_ogm_neigh_get_tq_avg(struct batadv_neigh_node *neigh_node, } /** - * batadv_iv_ogm_orig_dump_subentry - Dump an originator subentry into a + * batadv_iv_ogm_orig_dump_subentry() - Dump an originator subentry into a * message * @msg: Netlink message to dump into * @portid: Port making netlink request @@ -2048,7 +2058,7 @@ batadv_iv_ogm_orig_dump_subentry(struct sk_buff *msg, u32 portid, u32 seq, } /** - * batadv_iv_ogm_orig_dump_entry - Dump an originator entry into a message + * batadv_iv_ogm_orig_dump_entry() - Dump an originator entry into a message * @msg: Netlink message to dump into * @portid: Port making netlink request * @seq: Sequence number of netlink message @@ -2110,7 +2120,7 @@ batadv_iv_ogm_orig_dump_entry(struct sk_buff *msg, u32 portid, u32 seq, } /** - * batadv_iv_ogm_orig_dump_bucket - Dump an originator bucket into a + * batadv_iv_ogm_orig_dump_bucket() - Dump an originator bucket into a * message * @msg: Netlink message to dump into * @portid: Port making netlink request @@ -2153,7 +2163,7 @@ batadv_iv_ogm_orig_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq, } /** - * batadv_iv_ogm_orig_dump - Dump the originators into a message + * batadv_iv_ogm_orig_dump() - Dump the originators into a message * @msg: Netlink message to dump into * @cb: Control block containing additional options * @bat_priv: The bat priv with all the soft interface information @@ -2190,7 +2200,7 @@ batadv_iv_ogm_orig_dump(struct sk_buff *msg, struct netlink_callback *cb, #ifdef CONFIG_BATMAN_ADV_DEBUGFS /** - * batadv_iv_hardif_neigh_print - print a single hop neighbour node + * batadv_iv_hardif_neigh_print() - print a single hop neighbour node * @seq: neighbour table seq_file struct * @hardif_neigh: hardif neighbour information */ @@ -2209,7 +2219,7 @@ batadv_iv_hardif_neigh_print(struct seq_file *seq, } /** - * batadv_iv_ogm_neigh_print - print the single hop neighbour list + * batadv_iv_ogm_neigh_print() - print the single hop neighbour list * @bat_priv: the bat priv with all the soft interface information * @seq: neighbour table seq_file struct */ @@ -2242,7 +2252,7 @@ static void batadv_iv_neigh_print(struct batadv_priv *bat_priv, #endif /** - * batadv_iv_ogm_neigh_diff - calculate tq difference of two neighbors + * batadv_iv_ogm_neigh_diff() - calculate tq difference of two neighbors * @neigh1: the first neighbor object of the comparison * @if_outgoing1: outgoing interface for the first neighbor * @neigh2: the second neighbor object of the comparison @@ -2287,7 +2297,7 @@ out: } /** - * batadv_iv_ogm_neigh_dump_neigh - Dump a neighbour into a netlink message + * batadv_iv_ogm_neigh_dump_neigh() - Dump a neighbour into a netlink message * @msg: Netlink message to dump into * @portid: Port making netlink request * @seq: Sequence number of netlink message @@ -2326,7 +2336,7 @@ batadv_iv_ogm_neigh_dump_neigh(struct sk_buff *msg, u32 portid, u32 seq, } /** - * batadv_iv_ogm_neigh_dump_hardif - Dump the neighbours of a hard interface + * batadv_iv_ogm_neigh_dump_hardif() - Dump the neighbours of a hard interface * into a message * @msg: Netlink message to dump into * @portid: Port making netlink request @@ -2365,7 +2375,7 @@ batadv_iv_ogm_neigh_dump_hardif(struct sk_buff *msg, u32 portid, u32 seq, } /** - * batadv_iv_ogm_neigh_dump - Dump the neighbours into a message + * batadv_iv_ogm_neigh_dump() - Dump the neighbours into a message * @msg: Netlink message to dump into * @cb: Control block containing additional options * @bat_priv: The bat priv with all the soft interface information @@ -2417,7 +2427,7 @@ batadv_iv_ogm_neigh_dump(struct sk_buff *msg, struct netlink_callback *cb, } /** - * batadv_iv_ogm_neigh_cmp - compare the metrics of two neighbors + * batadv_iv_ogm_neigh_cmp() - compare the metrics of two neighbors * @neigh1: the first neighbor object of the comparison * @if_outgoing1: outgoing interface for the first neighbor * @neigh2: the second neighbor object of the comparison @@ -2443,7 +2453,7 @@ static int batadv_iv_ogm_neigh_cmp(struct batadv_neigh_node *neigh1, } /** - * batadv_iv_ogm_neigh_is_sob - check if neigh1 is similarly good or better + * batadv_iv_ogm_neigh_is_sob() - check if neigh1 is similarly good or better * than neigh2 from the metric prospective * @neigh1: the first neighbor object of the comparison * @if_outgoing1: outgoing interface for the first neighbor @@ -2478,7 +2488,7 @@ static void batadv_iv_iface_activate(struct batadv_hard_iface *hard_iface) } /** - * batadv_iv_init_sel_class - initialize GW selection class + * batadv_iv_init_sel_class() - initialize GW selection class * @bat_priv: the bat priv with all the soft interface information */ static void batadv_iv_init_sel_class(struct batadv_priv *bat_priv) @@ -2703,7 +2713,7 @@ static void batadv_iv_gw_print(struct batadv_priv *bat_priv, #endif /** - * batadv_iv_gw_dump_entry - Dump a gateway into a message + * batadv_iv_gw_dump_entry() - Dump a gateway into a message * @msg: Netlink message to dump into * @portid: Port making netlink request * @seq: Sequence number of netlink message @@ -2774,7 +2784,7 @@ out: } /** - * batadv_iv_gw_dump - Dump gateways into a message + * batadv_iv_gw_dump() - Dump gateways into a message * @msg: Netlink message to dump into * @cb: Control block containing additional options * @bat_priv: The bat priv with all the soft interface information @@ -2843,6 +2853,11 @@ static struct batadv_algo_ops batadv_batman_iv __read_mostly = { }, }; +/** + * batadv_iv_init() - B.A.T.M.A.N. IV initialization function + * + * Return: 0 on success or negative error number in case of failure + */ int __init batadv_iv_init(void) { int ret; diff --git a/net/batman-adv/bat_iv_ogm.h b/net/batman-adv/bat_iv_ogm.h index ae2ab526bdb1..9dc0dd5c83df 100644 --- a/net/batman-adv/bat_iv_ogm.h +++ b/net/batman-adv/bat_iv_ogm.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich diff --git a/net/batman-adv/bat_v.c b/net/batman-adv/bat_v.c index e0e2bfcd6b3e..27e165ac9302 100644 --- a/net/batman-adv/bat_v.c +++ b/net/batman-adv/bat_v.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* Copyright (C) 2013-2017 B.A.T.M.A.N. contributors: * * Linus Lüssing, Marek Lindner @@ -36,6 +37,7 @@ #include <linux/workqueue.h> #include <net/genetlink.h> #include <net/netlink.h> +#include <uapi/linux/batadv_packet.h> #include <uapi/linux/batman_adv.h> #include "bat_algo.h" @@ -48,7 +50,6 @@ #include "log.h" #include "netlink.h" #include "originator.h" -#include "packet.h" struct sk_buff; @@ -99,7 +100,7 @@ static void batadv_v_primary_iface_set(struct batadv_hard_iface *hard_iface) } /** - * batadv_v_iface_update_mac - react to hard-interface MAC address change + * batadv_v_iface_update_mac() - react to hard-interface MAC address change * @hard_iface: the modified interface * * If the modified interface is the primary one, update the originator @@ -130,7 +131,7 @@ batadv_v_hardif_neigh_init(struct batadv_hardif_neigh_node *hardif_neigh) #ifdef CONFIG_BATMAN_ADV_DEBUGFS /** - * batadv_v_orig_print_neigh - print neighbors for the originator table + * batadv_v_orig_print_neigh() - print neighbors for the originator table * @orig_node: the orig_node for which the neighbors are printed * @if_outgoing: outgoing interface for these entries * @seq: debugfs table seq_file struct @@ -160,7 +161,7 @@ batadv_v_orig_print_neigh(struct batadv_orig_node *orig_node, } /** - * batadv_v_hardif_neigh_print - print a single ELP neighbour node + * batadv_v_hardif_neigh_print() - print a single ELP neighbour node * @seq: neighbour table seq_file struct * @hardif_neigh: hardif neighbour information */ @@ -181,7 +182,7 @@ batadv_v_hardif_neigh_print(struct seq_file *seq, } /** - * batadv_v_neigh_print - print the single hop neighbour list + * batadv_v_neigh_print() - print the single hop neighbour list * @bat_priv: the bat priv with all the soft interface information * @seq: neighbour table seq_file struct */ @@ -215,7 +216,7 @@ static void batadv_v_neigh_print(struct batadv_priv *bat_priv, #endif /** - * batadv_v_neigh_dump_neigh - Dump a neighbour into a message + * batadv_v_neigh_dump_neigh() - Dump a neighbour into a message * @msg: Netlink message to dump into * @portid: Port making netlink request * @seq: Sequence number of netlink message @@ -258,7 +259,7 @@ batadv_v_neigh_dump_neigh(struct sk_buff *msg, u32 portid, u32 seq, } /** - * batadv_v_neigh_dump_hardif - Dump the neighbours of a hard interface into + * batadv_v_neigh_dump_hardif() - Dump the neighbours of a hard interface into * a message * @msg: Netlink message to dump into * @portid: Port making netlink request @@ -296,7 +297,7 @@ batadv_v_neigh_dump_hardif(struct sk_buff *msg, u32 portid, u32 seq, } /** - * batadv_v_neigh_dump - Dump the neighbours of a hard interface into a + * batadv_v_neigh_dump() - Dump the neighbours of a hard interface into a * message * @msg: Netlink message to dump into * @cb: Control block containing additional options @@ -348,7 +349,7 @@ batadv_v_neigh_dump(struct sk_buff *msg, struct netlink_callback *cb, #ifdef CONFIG_BATMAN_ADV_DEBUGFS /** - * batadv_v_orig_print - print the originator table + * batadv_v_orig_print() - print the originator table * @bat_priv: the bat priv with all the soft interface information * @seq: debugfs table seq_file struct * @if_outgoing: the outgoing interface for which this should be printed @@ -416,8 +417,7 @@ next: #endif /** - * batadv_v_orig_dump_subentry - Dump an originator subentry into a - * message + * batadv_v_orig_dump_subentry() - Dump an originator subentry into a message * @msg: Netlink message to dump into * @portid: Port making netlink request * @seq: Sequence number of netlink message @@ -483,7 +483,7 @@ batadv_v_orig_dump_subentry(struct sk_buff *msg, u32 portid, u32 seq, } /** - * batadv_v_orig_dump_entry - Dump an originator entry into a message + * batadv_v_orig_dump_entry() - Dump an originator entry into a message * @msg: Netlink message to dump into * @portid: Port making netlink request * @seq: Sequence number of netlink message @@ -536,8 +536,7 @@ batadv_v_orig_dump_entry(struct sk_buff *msg, u32 portid, u32 seq, } /** - * batadv_v_orig_dump_bucket - Dump an originator bucket into a - * message + * batadv_v_orig_dump_bucket() - Dump an originator bucket into a message * @msg: Netlink message to dump into * @portid: Port making netlink request * @seq: Sequence number of netlink message @@ -578,7 +577,7 @@ batadv_v_orig_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq, } /** - * batadv_v_orig_dump - Dump the originators into a message + * batadv_v_orig_dump() - Dump the originators into a message * @msg: Netlink message to dump into * @cb: Control block containing additional options * @bat_priv: The bat priv with all the soft interface information @@ -668,7 +667,7 @@ err_ifinfo1: } /** - * batadv_v_init_sel_class - initialize GW selection class + * batadv_v_init_sel_class() - initialize GW selection class * @bat_priv: the bat priv with all the soft interface information */ static void batadv_v_init_sel_class(struct batadv_priv *bat_priv) @@ -704,7 +703,7 @@ static ssize_t batadv_v_show_sel_class(struct batadv_priv *bat_priv, char *buff) } /** - * batadv_v_gw_throughput_get - retrieve the GW-bandwidth for a given GW + * batadv_v_gw_throughput_get() - retrieve the GW-bandwidth for a given GW * @gw_node: the GW to retrieve the metric for * @bw: the pointer where the metric will be stored. The metric is computed as * the minimum between the GW advertised throughput and the path throughput to @@ -747,7 +746,7 @@ out: } /** - * batadv_v_gw_get_best_gw_node - retrieve the best GW node + * batadv_v_gw_get_best_gw_node() - retrieve the best GW node * @bat_priv: the bat priv with all the soft interface information * * Return: the GW node having the best GW-metric, NULL if no GW is known @@ -785,7 +784,7 @@ next: } /** - * batadv_v_gw_is_eligible - check if a originator would be selected as GW + * batadv_v_gw_is_eligible() - check if a originator would be selected as GW * @bat_priv: the bat priv with all the soft interface information * @curr_gw_orig: originator representing the currently selected GW * @orig_node: the originator representing the new candidate @@ -884,7 +883,7 @@ out: } /** - * batadv_v_gw_print - print the gateway list + * batadv_v_gw_print() - print the gateway list * @bat_priv: the bat priv with all the soft interface information * @seq: gateway table seq_file struct */ @@ -913,7 +912,7 @@ static void batadv_v_gw_print(struct batadv_priv *bat_priv, #endif /** - * batadv_v_gw_dump_entry - Dump a gateway into a message + * batadv_v_gw_dump_entry() - Dump a gateway into a message * @msg: Netlink message to dump into * @portid: Port making netlink request * @seq: Sequence number of netlink message @@ -1004,7 +1003,7 @@ out: } /** - * batadv_v_gw_dump - Dump gateways into a message + * batadv_v_gw_dump() - Dump gateways into a message * @msg: Netlink message to dump into * @cb: Control block containing additional options * @bat_priv: The bat priv with all the soft interface information @@ -1074,7 +1073,7 @@ static struct batadv_algo_ops batadv_batman_v __read_mostly = { }; /** - * batadv_v_hardif_init - initialize the algorithm specific fields in the + * batadv_v_hardif_init() - initialize the algorithm specific fields in the * hard-interface object * @hard_iface: the hard-interface to initialize */ @@ -1088,7 +1087,7 @@ void batadv_v_hardif_init(struct batadv_hard_iface *hard_iface) } /** - * batadv_v_mesh_init - initialize the B.A.T.M.A.N. V private resources for a + * batadv_v_mesh_init() - initialize the B.A.T.M.A.N. V private resources for a * mesh * @bat_priv: the object representing the mesh interface to initialise * @@ -1106,7 +1105,7 @@ int batadv_v_mesh_init(struct batadv_priv *bat_priv) } /** - * batadv_v_mesh_free - free the B.A.T.M.A.N. V private resources for a mesh + * batadv_v_mesh_free() - free the B.A.T.M.A.N. V private resources for a mesh * @bat_priv: the object representing the mesh interface to free */ void batadv_v_mesh_free(struct batadv_priv *bat_priv) @@ -1115,7 +1114,7 @@ void batadv_v_mesh_free(struct batadv_priv *bat_priv) } /** - * batadv_v_init - B.A.T.M.A.N. V initialization function + * batadv_v_init() - B.A.T.M.A.N. V initialization function * * Description: Takes care of initializing all the subcomponents. * It is invoked upon module load only. diff --git a/net/batman-adv/bat_v.h b/net/batman-adv/bat_v.h index dd7c4b647e6b..a17ab68bbce8 100644 --- a/net/batman-adv/bat_v.h +++ b/net/batman-adv/bat_v.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* Copyright (C) 2011-2017 B.A.T.M.A.N. contributors: * * Marek Lindner, Linus Lüssing diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c index 1de992c58b35..a83478c46597 100644 --- a/net/batman-adv/bat_v_elp.c +++ b/net/batman-adv/bat_v_elp.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* Copyright (C) 2011-2017 B.A.T.M.A.N. contributors: * * Linus Lüssing, Marek Lindner @@ -24,7 +25,7 @@ #include <linux/errno.h> #include <linux/etherdevice.h> #include <linux/ethtool.h> -#include <linux/fs.h> +#include <linux/gfp.h> #include <linux/if_ether.h> #include <linux/jiffies.h> #include <linux/kernel.h> @@ -41,18 +42,18 @@ #include <linux/types.h> #include <linux/workqueue.h> #include <net/cfg80211.h> +#include <uapi/linux/batadv_packet.h> #include "bat_algo.h" #include "bat_v_ogm.h" #include "hard-interface.h" #include "log.h" #include "originator.h" -#include "packet.h" #include "routing.h" #include "send.h" /** - * batadv_v_elp_start_timer - restart timer for ELP periodic work + * batadv_v_elp_start_timer() - restart timer for ELP periodic work * @hard_iface: the interface for which the timer has to be reset */ static void batadv_v_elp_start_timer(struct batadv_hard_iface *hard_iface) @@ -67,7 +68,7 @@ static void batadv_v_elp_start_timer(struct batadv_hard_iface *hard_iface) } /** - * batadv_v_elp_get_throughput - get the throughput towards a neighbour + * batadv_v_elp_get_throughput() - get the throughput towards a neighbour * @neigh: the neighbour for which the throughput has to be obtained * * Return: The throughput towards the given neighbour in multiples of 100kpbs @@ -153,8 +154,8 @@ default_throughput: } /** - * batadv_v_elp_throughput_metric_update - worker updating the throughput metric - * of a single hop neighbour + * batadv_v_elp_throughput_metric_update() - worker updating the throughput + * metric of a single hop neighbour * @work: the work queue item */ void batadv_v_elp_throughput_metric_update(struct work_struct *work) @@ -177,7 +178,7 @@ void batadv_v_elp_throughput_metric_update(struct work_struct *work) } /** - * batadv_v_elp_wifi_neigh_probe - send link probing packets to a neighbour + * batadv_v_elp_wifi_neigh_probe() - send link probing packets to a neighbour * @neigh: the neighbour to probe * * Sends a predefined number of unicast wifi packets to a given neighbour in @@ -240,7 +241,7 @@ batadv_v_elp_wifi_neigh_probe(struct batadv_hardif_neigh_node *neigh) } /** - * batadv_v_elp_periodic_work - ELP periodic task per interface + * batadv_v_elp_periodic_work() - ELP periodic task per interface * @work: work queue item * * Emits broadcast ELP message in regular intervals. @@ -327,7 +328,7 @@ out: } /** - * batadv_v_elp_iface_enable - setup the ELP interface private resources + * batadv_v_elp_iface_enable() - setup the ELP interface private resources * @hard_iface: interface for which the data has to be prepared * * Return: 0 on success or a -ENOMEM in case of failure. @@ -375,7 +376,7 @@ out: } /** - * batadv_v_elp_iface_disable - release ELP interface private resources + * batadv_v_elp_iface_disable() - release ELP interface private resources * @hard_iface: interface for which the resources have to be released */ void batadv_v_elp_iface_disable(struct batadv_hard_iface *hard_iface) @@ -387,7 +388,7 @@ void batadv_v_elp_iface_disable(struct batadv_hard_iface *hard_iface) } /** - * batadv_v_elp_iface_activate - update the ELP buffer belonging to the given + * batadv_v_elp_iface_activate() - update the ELP buffer belonging to the given * hard-interface * @primary_iface: the new primary interface * @hard_iface: interface holding the to-be-updated buffer @@ -408,7 +409,7 @@ void batadv_v_elp_iface_activate(struct batadv_hard_iface *primary_iface, } /** - * batadv_v_elp_primary_iface_set - change internal data to reflect the new + * batadv_v_elp_primary_iface_set() - change internal data to reflect the new * primary interface * @primary_iface: the new primary interface */ @@ -428,7 +429,7 @@ void batadv_v_elp_primary_iface_set(struct batadv_hard_iface *primary_iface) } /** - * batadv_v_elp_neigh_update - update an ELP neighbour node + * batadv_v_elp_neigh_update() - update an ELP neighbour node * @bat_priv: the bat priv with all the soft interface information * @neigh_addr: the neighbour interface address * @if_incoming: the interface the packet was received through @@ -488,7 +489,7 @@ orig_free: } /** - * batadv_v_elp_packet_recv - main ELP packet handler + * batadv_v_elp_packet_recv() - main ELP packet handler * @skb: the received packet * @if_incoming: the interface this packet was received through * diff --git a/net/batman-adv/bat_v_elp.h b/net/batman-adv/bat_v_elp.h index 376ead280ab9..5e39d0588a48 100644 --- a/net/batman-adv/bat_v_elp.h +++ b/net/batman-adv/bat_v_elp.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* Copyright (C) 2013-2017 B.A.T.M.A.N. contributors: * * Linus Lüssing, Marek Lindner diff --git a/net/batman-adv/bat_v_ogm.c b/net/batman-adv/bat_v_ogm.c index c251445a42a0..ba59b77c605d 100644 --- a/net/batman-adv/bat_v_ogm.c +++ b/net/batman-adv/bat_v_ogm.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* Copyright (C) 2013-2017 B.A.T.M.A.N. contributors: * * Antonio Quartulli @@ -22,7 +23,7 @@ #include <linux/byteorder/generic.h> #include <linux/errno.h> #include <linux/etherdevice.h> -#include <linux/fs.h> +#include <linux/gfp.h> #include <linux/if_ether.h> #include <linux/jiffies.h> #include <linux/kernel.h> @@ -38,20 +39,20 @@ #include <linux/string.h> #include <linux/types.h> #include <linux/workqueue.h> +#include <uapi/linux/batadv_packet.h> #include "bat_algo.h" #include "hard-interface.h" #include "hash.h" #include "log.h" #include "originator.h" -#include "packet.h" #include "routing.h" #include "send.h" #include "translation-table.h" #include "tvlv.h" /** - * batadv_v_ogm_orig_get - retrieve and possibly create an originator node + * batadv_v_ogm_orig_get() - retrieve and possibly create an originator node * @bat_priv: the bat priv with all the soft interface information * @addr: the address of the originator * @@ -88,7 +89,7 @@ struct batadv_orig_node *batadv_v_ogm_orig_get(struct batadv_priv *bat_priv, } /** - * batadv_v_ogm_start_timer - restart the OGM sending timer + * batadv_v_ogm_start_timer() - restart the OGM sending timer * @bat_priv: the bat priv with all the soft interface information */ static void batadv_v_ogm_start_timer(struct batadv_priv *bat_priv) @@ -107,7 +108,7 @@ static void batadv_v_ogm_start_timer(struct batadv_priv *bat_priv) } /** - * batadv_v_ogm_send_to_if - send a batman ogm using a given interface + * batadv_v_ogm_send_to_if() - send a batman ogm using a given interface * @skb: the OGM to send * @hard_iface: the interface to use to send the OGM */ @@ -127,7 +128,7 @@ static void batadv_v_ogm_send_to_if(struct sk_buff *skb, } /** - * batadv_v_ogm_send - periodic worker broadcasting the own OGM + * batadv_v_ogm_send() - periodic worker broadcasting the own OGM * @work: work queue item */ static void batadv_v_ogm_send(struct work_struct *work) @@ -235,7 +236,7 @@ out: } /** - * batadv_v_ogm_iface_enable - prepare an interface for B.A.T.M.A.N. V + * batadv_v_ogm_iface_enable() - prepare an interface for B.A.T.M.A.N. V * @hard_iface: the interface to prepare * * Takes care of scheduling own OGM sending routine for this interface. @@ -252,7 +253,7 @@ int batadv_v_ogm_iface_enable(struct batadv_hard_iface *hard_iface) } /** - * batadv_v_ogm_primary_iface_set - set a new primary interface + * batadv_v_ogm_primary_iface_set() - set a new primary interface * @primary_iface: the new primary interface */ void batadv_v_ogm_primary_iface_set(struct batadv_hard_iface *primary_iface) @@ -268,8 +269,8 @@ void batadv_v_ogm_primary_iface_set(struct batadv_hard_iface *primary_iface) } /** - * batadv_v_forward_penalty - apply a penalty to the throughput metric forwarded - * with B.A.T.M.A.N. V OGMs + * batadv_v_forward_penalty() - apply a penalty to the throughput metric + * forwarded with B.A.T.M.A.N. V OGMs * @bat_priv: the bat priv with all the soft interface information * @if_incoming: the interface where the OGM has been received * @if_outgoing: the interface where the OGM has to be forwarded to @@ -314,7 +315,7 @@ static u32 batadv_v_forward_penalty(struct batadv_priv *bat_priv, } /** - * batadv_v_ogm_forward - check conditions and forward an OGM to the given + * batadv_v_ogm_forward() - check conditions and forward an OGM to the given * outgoing interface * @bat_priv: the bat priv with all the soft interface information * @ogm_received: previously received OGM to be forwarded @@ -405,7 +406,7 @@ out: } /** - * batadv_v_ogm_metric_update - update route metric based on OGM + * batadv_v_ogm_metric_update() - update route metric based on OGM * @bat_priv: the bat priv with all the soft interface information * @ogm2: OGM2 structure * @orig_node: Originator structure for which the OGM has been received @@ -490,7 +491,7 @@ out: } /** - * batadv_v_ogm_route_update - update routes based on OGM + * batadv_v_ogm_route_update() - update routes based on OGM * @bat_priv: the bat priv with all the soft interface information * @ethhdr: the Ethernet header of the OGM2 * @ogm2: OGM2 structure @@ -590,7 +591,7 @@ out: } /** - * batadv_v_ogm_process_per_outif - process a batman v OGM for an outgoing if + * batadv_v_ogm_process_per_outif() - process a batman v OGM for an outgoing if * @bat_priv: the bat priv with all the soft interface information * @ethhdr: the Ethernet header of the OGM2 * @ogm2: OGM2 structure @@ -639,7 +640,7 @@ batadv_v_ogm_process_per_outif(struct batadv_priv *bat_priv, } /** - * batadv_v_ogm_aggr_packet - checks if there is another OGM aggregated + * batadv_v_ogm_aggr_packet() - checks if there is another OGM aggregated * @buff_pos: current position in the skb * @packet_len: total length of the skb * @tvlv_len: tvlv length of the previously considered OGM @@ -659,7 +660,7 @@ static bool batadv_v_ogm_aggr_packet(int buff_pos, int packet_len, } /** - * batadv_v_ogm_process - process an incoming batman v OGM + * batadv_v_ogm_process() - process an incoming batman v OGM * @skb: the skb containing the OGM * @ogm_offset: offset to the OGM which should be processed (for aggregates) * @if_incoming: the interface where this packet was receved @@ -787,7 +788,7 @@ out: } /** - * batadv_v_ogm_packet_recv - OGM2 receiving handler + * batadv_v_ogm_packet_recv() - OGM2 receiving handler * @skb: the received OGM * @if_incoming: the interface where this OGM has been received * @@ -851,7 +852,7 @@ free_skb: } /** - * batadv_v_ogm_init - initialise the OGM2 engine + * batadv_v_ogm_init() - initialise the OGM2 engine * @bat_priv: the bat priv with all the soft interface information * * Return: 0 on success or a negative error code in case of failure @@ -884,7 +885,7 @@ int batadv_v_ogm_init(struct batadv_priv *bat_priv) } /** - * batadv_v_ogm_free - free OGM private resources + * batadv_v_ogm_free() - free OGM private resources * @bat_priv: the bat priv with all the soft interface information */ void batadv_v_ogm_free(struct batadv_priv *bat_priv) diff --git a/net/batman-adv/bat_v_ogm.h b/net/batman-adv/bat_v_ogm.h index 2068770b542d..6a4c14ccc3c6 100644 --- a/net/batman-adv/bat_v_ogm.h +++ b/net/batman-adv/bat_v_ogm.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* Copyright (C) 2013-2017 B.A.T.M.A.N. contributors: * * Antonio Quartulli diff --git a/net/batman-adv/bitarray.c b/net/batman-adv/bitarray.c index 2b070c7e31da..bdc1ef06e05b 100644 --- a/net/batman-adv/bitarray.c +++ b/net/batman-adv/bitarray.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* Copyright (C) 2006-2017 B.A.T.M.A.N. contributors: * * Simon Wunderlich, Marek Lindner @@ -32,7 +33,7 @@ static void batadv_bitmap_shift_left(unsigned long *seq_bits, s32 n) } /** - * batadv_bit_get_packet - receive and process one packet within the sequence + * batadv_bit_get_packet() - receive and process one packet within the sequence * number window * @priv: the bat priv with all the soft interface information * @seq_bits: pointer to the sequence number receive packet diff --git a/net/batman-adv/bitarray.h b/net/batman-adv/bitarray.h index cc262c9d97e0..ca9d0753dd6b 100644 --- a/net/batman-adv/bitarray.h +++ b/net/batman-adv/bitarray.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* Copyright (C) 2006-2017 B.A.T.M.A.N. contributors: * * Simon Wunderlich, Marek Lindner @@ -26,7 +27,7 @@ #include <linux/types.h> /** - * batadv_test_bit - check if bit is set in the current window + * batadv_test_bit() - check if bit is set in the current window * * @seq_bits: pointer to the sequence number receive packet * @last_seqno: latest sequence number in seq_bits @@ -46,7 +47,12 @@ static inline bool batadv_test_bit(const unsigned long *seq_bits, return test_bit(diff, seq_bits) != 0; } -/* turn corresponding bit on, so we can remember that we got the packet */ +/** + * batadv_set_bit() - Turn corresponding bit on, so we can remember that we got + * the packet + * @seq_bits: bitmap of the packet receive window + * @n: relative sequence number of newly received packet + */ static inline void batadv_set_bit(unsigned long *seq_bits, s32 n) { /* if too old, just drop it */ diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index cdd8e8e4df0b..fad47853ad3c 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* Copyright (C) 2011-2017 B.A.T.M.A.N. contributors: * * Simon Wunderlich @@ -24,7 +25,7 @@ #include <linux/crc16.h> #include <linux/errno.h> #include <linux/etherdevice.h> -#include <linux/fs.h> +#include <linux/gfp.h> #include <linux/if_arp.h> #include <linux/if_ether.h> #include <linux/if_vlan.h> @@ -49,6 +50,7 @@ #include <net/genetlink.h> #include <net/netlink.h> #include <net/sock.h> +#include <uapi/linux/batadv_packet.h> #include <uapi/linux/batman_adv.h> #include "hard-interface.h" @@ -56,7 +58,6 @@ #include "log.h" #include "netlink.h" #include "originator.h" -#include "packet.h" #include "soft-interface.h" #include "sysfs.h" #include "translation-table.h" @@ -69,7 +70,7 @@ batadv_bla_send_announce(struct batadv_priv *bat_priv, struct batadv_bla_backbone_gw *backbone_gw); /** - * batadv_choose_claim - choose the right bucket for a claim. + * batadv_choose_claim() - choose the right bucket for a claim. * @data: data to hash * @size: size of the hash table * @@ -87,7 +88,7 @@ static inline u32 batadv_choose_claim(const void *data, u32 size) } /** - * batadv_choose_backbone_gw - choose the right bucket for a backbone gateway. + * batadv_choose_backbone_gw() - choose the right bucket for a backbone gateway. * @data: data to hash * @size: size of the hash table * @@ -105,7 +106,7 @@ static inline u32 batadv_choose_backbone_gw(const void *data, u32 size) } /** - * batadv_compare_backbone_gw - compare address and vid of two backbone gws + * batadv_compare_backbone_gw() - compare address and vid of two backbone gws * @node: list node of the first entry to compare * @data2: pointer to the second backbone gateway * @@ -129,7 +130,7 @@ static bool batadv_compare_backbone_gw(const struct hlist_node *node, } /** - * batadv_compare_claim - compare address and vid of two claims + * batadv_compare_claim() - compare address and vid of two claims * @node: list node of the first entry to compare * @data2: pointer to the second claims * @@ -153,7 +154,7 @@ static bool batadv_compare_claim(const struct hlist_node *node, } /** - * batadv_backbone_gw_release - release backbone gw from lists and queue for + * batadv_backbone_gw_release() - release backbone gw from lists and queue for * free after rcu grace period * @ref: kref pointer of the backbone gw */ @@ -168,7 +169,7 @@ static void batadv_backbone_gw_release(struct kref *ref) } /** - * batadv_backbone_gw_put - decrement the backbone gw refcounter and possibly + * batadv_backbone_gw_put() - decrement the backbone gw refcounter and possibly * release it * @backbone_gw: backbone gateway to be free'd */ @@ -178,8 +179,8 @@ static void batadv_backbone_gw_put(struct batadv_bla_backbone_gw *backbone_gw) } /** - * batadv_claim_release - release claim from lists and queue for free after rcu - * grace period + * batadv_claim_release() - release claim from lists and queue for free after + * rcu grace period * @ref: kref pointer of the claim */ static void batadv_claim_release(struct kref *ref) @@ -204,8 +205,7 @@ static void batadv_claim_release(struct kref *ref) } /** - * batadv_claim_put - decrement the claim refcounter and possibly - * release it + * batadv_claim_put() - decrement the claim refcounter and possibly release it * @claim: claim to be free'd */ static void batadv_claim_put(struct batadv_bla_claim *claim) @@ -214,7 +214,7 @@ static void batadv_claim_put(struct batadv_bla_claim *claim) } /** - * batadv_claim_hash_find - looks for a claim in the claim hash + * batadv_claim_hash_find() - looks for a claim in the claim hash * @bat_priv: the bat priv with all the soft interface information * @data: search data (may be local/static data) * @@ -253,7 +253,7 @@ batadv_claim_hash_find(struct batadv_priv *bat_priv, } /** - * batadv_backbone_hash_find - looks for a backbone gateway in the hash + * batadv_backbone_hash_find() - looks for a backbone gateway in the hash * @bat_priv: the bat priv with all the soft interface information * @addr: the address of the originator * @vid: the VLAN ID @@ -297,7 +297,7 @@ batadv_backbone_hash_find(struct batadv_priv *bat_priv, u8 *addr, } /** - * batadv_bla_del_backbone_claims - delete all claims for a backbone + * batadv_bla_del_backbone_claims() - delete all claims for a backbone * @backbone_gw: backbone gateway where the claims should be removed */ static void @@ -337,7 +337,7 @@ batadv_bla_del_backbone_claims(struct batadv_bla_backbone_gw *backbone_gw) } /** - * batadv_bla_send_claim - sends a claim frame according to the provided info + * batadv_bla_send_claim() - sends a claim frame according to the provided info * @bat_priv: the bat priv with all the soft interface information * @mac: the mac address to be announced within the claim * @vid: the VLAN ID @@ -457,7 +457,7 @@ out: } /** - * batadv_bla_loopdetect_report - worker for reporting the loop + * batadv_bla_loopdetect_report() - worker for reporting the loop * @work: work queue item * * Throws an uevent, as the loopdetect check function can't do that itself @@ -487,7 +487,7 @@ static void batadv_bla_loopdetect_report(struct work_struct *work) } /** - * batadv_bla_get_backbone_gw - finds or creates a backbone gateway + * batadv_bla_get_backbone_gw() - finds or creates a backbone gateway * @bat_priv: the bat priv with all the soft interface information * @orig: the mac address of the originator * @vid: the VLAN ID @@ -560,7 +560,7 @@ batadv_bla_get_backbone_gw(struct batadv_priv *bat_priv, u8 *orig, } /** - * batadv_bla_update_own_backbone_gw - updates the own backbone gw for a VLAN + * batadv_bla_update_own_backbone_gw() - updates the own backbone gw for a VLAN * @bat_priv: the bat priv with all the soft interface information * @primary_if: the selected primary interface * @vid: VLAN identifier @@ -586,7 +586,7 @@ batadv_bla_update_own_backbone_gw(struct batadv_priv *bat_priv, } /** - * batadv_bla_answer_request - answer a bla request by sending own claims + * batadv_bla_answer_request() - answer a bla request by sending own claims * @bat_priv: the bat priv with all the soft interface information * @primary_if: interface where the request came on * @vid: the vid where the request came on @@ -636,7 +636,7 @@ static void batadv_bla_answer_request(struct batadv_priv *bat_priv, } /** - * batadv_bla_send_request - send a request to repeat claims + * batadv_bla_send_request() - send a request to repeat claims * @backbone_gw: the backbone gateway from whom we are out of sync * * When the crc is wrong, ask the backbone gateway for a full table update. @@ -663,7 +663,7 @@ static void batadv_bla_send_request(struct batadv_bla_backbone_gw *backbone_gw) } /** - * batadv_bla_send_announce - Send an announcement frame + * batadv_bla_send_announce() - Send an announcement frame * @bat_priv: the bat priv with all the soft interface information * @backbone_gw: our backbone gateway which should be announced */ @@ -684,7 +684,7 @@ static void batadv_bla_send_announce(struct batadv_priv *bat_priv, } /** - * batadv_bla_add_claim - Adds a claim in the claim hash + * batadv_bla_add_claim() - Adds a claim in the claim hash * @bat_priv: the bat priv with all the soft interface information * @mac: the mac address of the claim * @vid: the VLAN ID of the frame @@ -774,7 +774,7 @@ claim_free_ref: } /** - * batadv_bla_claim_get_backbone_gw - Get valid reference for backbone_gw of + * batadv_bla_claim_get_backbone_gw() - Get valid reference for backbone_gw of * claim * @claim: claim whose backbone_gw should be returned * @@ -794,7 +794,7 @@ batadv_bla_claim_get_backbone_gw(struct batadv_bla_claim *claim) } /** - * batadv_bla_del_claim - delete a claim from the claim hash + * batadv_bla_del_claim() - delete a claim from the claim hash * @bat_priv: the bat priv with all the soft interface information * @mac: mac address of the claim to be removed * @vid: VLAN id for the claim to be removed @@ -822,7 +822,7 @@ static void batadv_bla_del_claim(struct batadv_priv *bat_priv, } /** - * batadv_handle_announce - check for ANNOUNCE frame + * batadv_handle_announce() - check for ANNOUNCE frame * @bat_priv: the bat priv with all the soft interface information * @an_addr: announcement mac address (ARP Sender HW address) * @backbone_addr: originator address of the sender (Ethernet source MAC) @@ -880,7 +880,7 @@ static bool batadv_handle_announce(struct batadv_priv *bat_priv, u8 *an_addr, } /** - * batadv_handle_request - check for REQUEST frame + * batadv_handle_request() - check for REQUEST frame * @bat_priv: the bat priv with all the soft interface information * @primary_if: the primary hard interface of this batman soft interface * @backbone_addr: backbone address to be requested (ARP sender HW MAC) @@ -913,7 +913,7 @@ static bool batadv_handle_request(struct batadv_priv *bat_priv, } /** - * batadv_handle_unclaim - check for UNCLAIM frame + * batadv_handle_unclaim() - check for UNCLAIM frame * @bat_priv: the bat priv with all the soft interface information * @primary_if: the primary hard interface of this batman soft interface * @backbone_addr: originator address of the backbone (Ethernet source) @@ -951,7 +951,7 @@ static bool batadv_handle_unclaim(struct batadv_priv *bat_priv, } /** - * batadv_handle_claim - check for CLAIM frame + * batadv_handle_claim() - check for CLAIM frame * @bat_priv: the bat priv with all the soft interface information * @primary_if: the primary hard interface of this batman soft interface * @backbone_addr: originator address of the backbone (Ethernet Source) @@ -988,7 +988,7 @@ static bool batadv_handle_claim(struct batadv_priv *bat_priv, } /** - * batadv_check_claim_group - check for claim group membership + * batadv_check_claim_group() - check for claim group membership * @bat_priv: the bat priv with all the soft interface information * @primary_if: the primary interface of this batman interface * @hw_src: the Hardware source in the ARP Header @@ -1063,7 +1063,7 @@ static int batadv_check_claim_group(struct batadv_priv *bat_priv, } /** - * batadv_bla_process_claim - Check if this is a claim frame, and process it + * batadv_bla_process_claim() - Check if this is a claim frame, and process it * @bat_priv: the bat priv with all the soft interface information * @primary_if: the primary hard interface of this batman soft interface * @skb: the frame to be checked @@ -1205,7 +1205,7 @@ static bool batadv_bla_process_claim(struct batadv_priv *bat_priv, } /** - * batadv_bla_purge_backbone_gw - Remove backbone gateways after a timeout or + * batadv_bla_purge_backbone_gw() - Remove backbone gateways after a timeout or * immediately * @bat_priv: the bat priv with all the soft interface information * @now: whether the whole hash shall be wiped now @@ -1258,7 +1258,7 @@ purge_now: } /** - * batadv_bla_purge_claims - Remove claims after a timeout or immediately + * batadv_bla_purge_claims() - Remove claims after a timeout or immediately * @bat_priv: the bat priv with all the soft interface information * @primary_if: the selected primary interface, may be NULL if now is set * @now: whether the whole hash shall be wiped now @@ -1316,7 +1316,7 @@ skip: } /** - * batadv_bla_update_orig_address - Update the backbone gateways when the own + * batadv_bla_update_orig_address() - Update the backbone gateways when the own * originator address changes * @bat_priv: the bat priv with all the soft interface information * @primary_if: the new selected primary_if @@ -1372,7 +1372,7 @@ void batadv_bla_update_orig_address(struct batadv_priv *bat_priv, } /** - * batadv_bla_send_loopdetect - send a loopdetect frame + * batadv_bla_send_loopdetect() - send a loopdetect frame * @bat_priv: the bat priv with all the soft interface information * @backbone_gw: the backbone gateway for which a loop should be detected * @@ -1392,7 +1392,7 @@ batadv_bla_send_loopdetect(struct batadv_priv *bat_priv, } /** - * batadv_bla_status_update - purge bla interfaces if necessary + * batadv_bla_status_update() - purge bla interfaces if necessary * @net_dev: the soft interface net device */ void batadv_bla_status_update(struct net_device *net_dev) @@ -1412,7 +1412,7 @@ void batadv_bla_status_update(struct net_device *net_dev) } /** - * batadv_bla_periodic_work - performs periodic bla work + * batadv_bla_periodic_work() - performs periodic bla work * @work: kernel work struct * * periodic work to do: @@ -1517,7 +1517,7 @@ static struct lock_class_key batadv_claim_hash_lock_class_key; static struct lock_class_key batadv_backbone_hash_lock_class_key; /** - * batadv_bla_init - initialize all bla structures + * batadv_bla_init() - initialize all bla structures * @bat_priv: the bat priv with all the soft interface information * * Return: 0 on success, < 0 on error. @@ -1579,7 +1579,7 @@ int batadv_bla_init(struct batadv_priv *bat_priv) } /** - * batadv_bla_check_bcast_duplist - Check if a frame is in the broadcast dup. + * batadv_bla_check_bcast_duplist() - Check if a frame is in the broadcast dup. * @bat_priv: the bat priv with all the soft interface information * @skb: contains the bcast_packet to be checked * @@ -1652,7 +1652,7 @@ out: } /** - * batadv_bla_is_backbone_gw_orig - Check if the originator is a gateway for + * batadv_bla_is_backbone_gw_orig() - Check if the originator is a gateway for * the VLAN identified by vid. * @bat_priv: the bat priv with all the soft interface information * @orig: originator mac address @@ -1692,7 +1692,7 @@ bool batadv_bla_is_backbone_gw_orig(struct batadv_priv *bat_priv, u8 *orig, } /** - * batadv_bla_is_backbone_gw - check if originator is a backbone gw for a VLAN. + * batadv_bla_is_backbone_gw() - check if originator is a backbone gw for a VLAN * @skb: the frame to be checked * @orig_node: the orig_node of the frame * @hdr_size: maximum length of the frame @@ -1726,7 +1726,7 @@ bool batadv_bla_is_backbone_gw(struct sk_buff *skb, } /** - * batadv_bla_free - free all bla structures + * batadv_bla_free() - free all bla structures * @bat_priv: the bat priv with all the soft interface information * * for softinterface free or module unload @@ -1753,7 +1753,7 @@ void batadv_bla_free(struct batadv_priv *bat_priv) } /** - * batadv_bla_loopdetect_check - check and handle a detected loop + * batadv_bla_loopdetect_check() - check and handle a detected loop * @bat_priv: the bat priv with all the soft interface information * @skb: the packet to check * @primary_if: interface where the request came on @@ -1802,7 +1802,7 @@ batadv_bla_loopdetect_check(struct batadv_priv *bat_priv, struct sk_buff *skb, } /** - * batadv_bla_rx - check packets coming from the mesh. + * batadv_bla_rx() - check packets coming from the mesh. * @bat_priv: the bat priv with all the soft interface information * @skb: the frame to be checked * @vid: the VLAN ID of the frame @@ -1914,7 +1914,7 @@ out: } /** - * batadv_bla_tx - check packets going into the mesh + * batadv_bla_tx() - check packets going into the mesh * @bat_priv: the bat priv with all the soft interface information * @skb: the frame to be checked * @vid: the VLAN ID of the frame @@ -2022,7 +2022,7 @@ out: #ifdef CONFIG_BATMAN_ADV_DEBUGFS /** - * batadv_bla_claim_table_seq_print_text - print the claim table in a seq file + * batadv_bla_claim_table_seq_print_text() - print the claim table in a seq file * @seq: seq file to print on * @offset: not used * @@ -2084,7 +2084,7 @@ out: #endif /** - * batadv_bla_claim_dump_entry - dump one entry of the claim table + * batadv_bla_claim_dump_entry() - dump one entry of the claim table * to a netlink socket * @msg: buffer for the message * @portid: netlink port @@ -2143,7 +2143,7 @@ out: } /** - * batadv_bla_claim_dump_bucket - dump one bucket of the claim table + * batadv_bla_claim_dump_bucket() - dump one bucket of the claim table * to a netlink socket * @msg: buffer for the message * @portid: netlink port @@ -2180,7 +2180,7 @@ unlock: } /** - * batadv_bla_claim_dump - dump claim table to a netlink socket + * batadv_bla_claim_dump() - dump claim table to a netlink socket * @msg: buffer for the message * @cb: callback structure containing arguments * @@ -2247,8 +2247,8 @@ out: #ifdef CONFIG_BATMAN_ADV_DEBUGFS /** - * batadv_bla_backbone_table_seq_print_text - print the backbone table in a seq - * file + * batadv_bla_backbone_table_seq_print_text() - print the backbone table in a + * seq file * @seq: seq file to print on * @offset: not used * @@ -2312,8 +2312,8 @@ out: #endif /** - * batadv_bla_backbone_dump_entry - dump one entry of the backbone table - * to a netlink socket + * batadv_bla_backbone_dump_entry() - dump one entry of the backbone table to a + * netlink socket * @msg: buffer for the message * @portid: netlink port * @seq: Sequence number of netlink message @@ -2373,8 +2373,8 @@ out: } /** - * batadv_bla_backbone_dump_bucket - dump one bucket of the backbone table - * to a netlink socket + * batadv_bla_backbone_dump_bucket() - dump one bucket of the backbone table to + * a netlink socket * @msg: buffer for the message * @portid: netlink port * @seq: Sequence number of netlink message @@ -2410,7 +2410,7 @@ unlock: } /** - * batadv_bla_backbone_dump - dump backbone table to a netlink socket + * batadv_bla_backbone_dump() - dump backbone table to a netlink socket * @msg: buffer for the message * @cb: callback structure containing arguments * @@ -2477,7 +2477,7 @@ out: #ifdef CONFIG_BATMAN_ADV_DAT /** - * batadv_bla_check_claim - check if address is claimed + * batadv_bla_check_claim() - check if address is claimed * * @bat_priv: the bat priv with all the soft interface information * @addr: mac address of which the claim status is checked diff --git a/net/batman-adv/bridge_loop_avoidance.h b/net/batman-adv/bridge_loop_avoidance.h index 234775748b8e..b27571abcd2f 100644 --- a/net/batman-adv/bridge_loop_avoidance.h +++ b/net/batman-adv/bridge_loop_avoidance.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* Copyright (C) 2011-2017 B.A.T.M.A.N. contributors: * * Simon Wunderlich @@ -30,8 +31,8 @@ struct seq_file; struct sk_buff; /** - * batadv_bla_is_loopdetect_mac - check if the mac address is from a loop detect - * frame sent by bridge loop avoidance + * batadv_bla_is_loopdetect_mac() - check if the mac address is from a loop + * detect frame sent by bridge loop avoidance * @mac: mac address to check * * Return: true if the it looks like a loop detect frame diff --git a/net/batman-adv/debugfs.c b/net/batman-adv/debugfs.c index e32ad47c6efd..21d1189957a7 100644 --- a/net/batman-adv/debugfs.c +++ b/net/batman-adv/debugfs.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* Copyright (C) 2010-2017 B.A.T.M.A.N. contributors: * * Marek Lindner @@ -25,7 +26,6 @@ #include <linux/fs.h> #include <linux/netdevice.h> #include <linux/printk.h> -#include <linux/sched.h> /* for linux/wait.h */ #include <linux/seq_file.h> #include <linux/stat.h> #include <linux/stddef.h> @@ -66,8 +66,8 @@ static int batadv_originators_open(struct inode *inode, struct file *file) } /** - * batadv_originators_hardif_open - handles debugfs output for the - * originator table of an hard interface + * batadv_originators_hardif_open() - handles debugfs output for the originator + * table of an hard interface * @inode: inode pointer to debugfs file * @file: pointer to the seq_file * @@ -117,7 +117,7 @@ static int batadv_bla_backbone_table_open(struct inode *inode, #ifdef CONFIG_BATMAN_ADV_DAT /** - * batadv_dat_cache_open - Prepare file handler for reads from dat_chache + * batadv_dat_cache_open() - Prepare file handler for reads from dat_chache * @inode: inode which was opened * @file: file handle to be initialized * @@ -154,7 +154,7 @@ static int batadv_nc_nodes_open(struct inode *inode, struct file *file) #ifdef CONFIG_BATMAN_ADV_MCAST /** - * batadv_mcast_flags_open - prepare file handler for reads from mcast_flags + * batadv_mcast_flags_open() - prepare file handler for reads from mcast_flags * @inode: inode which was opened * @file: file handle to be initialized * @@ -259,6 +259,9 @@ static struct batadv_debuginfo *batadv_hardif_debuginfos[] = { NULL, }; +/** + * batadv_debugfs_init() - Initialize soft interface independent debugfs entries + */ void batadv_debugfs_init(void) { struct batadv_debuginfo **bat_debug; @@ -289,6 +292,9 @@ err: batadv_debugfs = NULL; } +/** + * batadv_debugfs_destroy() - Remove all debugfs entries + */ void batadv_debugfs_destroy(void) { debugfs_remove_recursive(batadv_debugfs); @@ -296,7 +302,7 @@ void batadv_debugfs_destroy(void) } /** - * batadv_debugfs_add_hardif - creates the base directory for a hard interface + * batadv_debugfs_add_hardif() - creates the base directory for a hard interface * in debugfs. * @hard_iface: hard interface which should be added. * @@ -338,7 +344,7 @@ out: } /** - * batadv_debugfs_del_hardif - delete the base directory for a hard interface + * batadv_debugfs_del_hardif() - delete the base directory for a hard interface * in debugfs. * @hard_iface: hard interface which is deleted. */ @@ -355,6 +361,12 @@ void batadv_debugfs_del_hardif(struct batadv_hard_iface *hard_iface) } } +/** + * batadv_debugfs_add_meshif() - Initialize interface dependent debugfs entries + * @dev: netdev struct of the soft interface + * + * Return: 0 on success or negative error number in case of failure + */ int batadv_debugfs_add_meshif(struct net_device *dev) { struct batadv_priv *bat_priv = netdev_priv(dev); @@ -401,6 +413,10 @@ out: return -ENOMEM; } +/** + * batadv_debugfs_del_meshif() - Remove interface dependent debugfs entries + * @dev: netdev struct of the soft interface + */ void batadv_debugfs_del_meshif(struct net_device *dev) { struct batadv_priv *bat_priv = netdev_priv(dev); diff --git a/net/batman-adv/debugfs.h b/net/batman-adv/debugfs.h index 9c5d4a65b98c..90a08d35c501 100644 --- a/net/batman-adv/debugfs.h +++ b/net/batman-adv/debugfs.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* Copyright (C) 2010-2017 B.A.T.M.A.N. contributors: * * Marek Lindner diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c index 760c0de72582..9703c791ffc5 100644 --- a/net/batman-adv/distributed-arp-table.c +++ b/net/batman-adv/distributed-arp-table.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* Copyright (C) 2011-2017 B.A.T.M.A.N. contributors: * * Antonio Quartulli @@ -23,7 +24,7 @@ #include <linux/byteorder/generic.h> #include <linux/errno.h> #include <linux/etherdevice.h> -#include <linux/fs.h> +#include <linux/gfp.h> #include <linux/if_arp.h> #include <linux/if_ether.h> #include <linux/if_vlan.h> @@ -55,7 +56,7 @@ static void batadv_dat_purge(struct work_struct *work); /** - * batadv_dat_start_timer - initialise the DAT periodic worker + * batadv_dat_start_timer() - initialise the DAT periodic worker * @bat_priv: the bat priv with all the soft interface information */ static void batadv_dat_start_timer(struct batadv_priv *bat_priv) @@ -66,7 +67,7 @@ static void batadv_dat_start_timer(struct batadv_priv *bat_priv) } /** - * batadv_dat_entry_release - release dat_entry from lists and queue for free + * batadv_dat_entry_release() - release dat_entry from lists and queue for free * after rcu grace period * @ref: kref pointer of the dat_entry */ @@ -80,7 +81,7 @@ static void batadv_dat_entry_release(struct kref *ref) } /** - * batadv_dat_entry_put - decrement the dat_entry refcounter and possibly + * batadv_dat_entry_put() - decrement the dat_entry refcounter and possibly * release it * @dat_entry: dat_entry to be free'd */ @@ -90,7 +91,7 @@ static void batadv_dat_entry_put(struct batadv_dat_entry *dat_entry) } /** - * batadv_dat_to_purge - check whether a dat_entry has to be purged or not + * batadv_dat_to_purge() - check whether a dat_entry has to be purged or not * @dat_entry: the entry to check * * Return: true if the entry has to be purged now, false otherwise. @@ -102,7 +103,7 @@ static bool batadv_dat_to_purge(struct batadv_dat_entry *dat_entry) } /** - * __batadv_dat_purge - delete entries from the DAT local storage + * __batadv_dat_purge() - delete entries from the DAT local storage * @bat_priv: the bat priv with all the soft interface information * @to_purge: function in charge to decide whether an entry has to be purged or * not. This function takes the dat_entry as argument and has to @@ -145,8 +146,8 @@ static void __batadv_dat_purge(struct batadv_priv *bat_priv, } /** - * batadv_dat_purge - periodic task that deletes old entries from the local DAT - * hash table + * batadv_dat_purge() - periodic task that deletes old entries from the local + * DAT hash table * @work: kernel work struct */ static void batadv_dat_purge(struct work_struct *work) @@ -164,7 +165,7 @@ static void batadv_dat_purge(struct work_struct *work) } /** - * batadv_compare_dat - comparing function used in the local DAT hash table + * batadv_compare_dat() - comparing function used in the local DAT hash table * @node: node in the local table * @data2: second object to compare the node to * @@ -179,7 +180,7 @@ static bool batadv_compare_dat(const struct hlist_node *node, const void *data2) } /** - * batadv_arp_hw_src - extract the hw_src field from an ARP packet + * batadv_arp_hw_src() - extract the hw_src field from an ARP packet * @skb: ARP packet * @hdr_size: size of the possible header before the ARP packet * @@ -196,7 +197,7 @@ static u8 *batadv_arp_hw_src(struct sk_buff *skb, int hdr_size) } /** - * batadv_arp_ip_src - extract the ip_src field from an ARP packet + * batadv_arp_ip_src() - extract the ip_src field from an ARP packet * @skb: ARP packet * @hdr_size: size of the possible header before the ARP packet * @@ -208,7 +209,7 @@ static __be32 batadv_arp_ip_src(struct sk_buff *skb, int hdr_size) } /** - * batadv_arp_hw_dst - extract the hw_dst field from an ARP packet + * batadv_arp_hw_dst() - extract the hw_dst field from an ARP packet * @skb: ARP packet * @hdr_size: size of the possible header before the ARP packet * @@ -220,7 +221,7 @@ static u8 *batadv_arp_hw_dst(struct sk_buff *skb, int hdr_size) } /** - * batadv_arp_ip_dst - extract the ip_dst field from an ARP packet + * batadv_arp_ip_dst() - extract the ip_dst field from an ARP packet * @skb: ARP packet * @hdr_size: size of the possible header before the ARP packet * @@ -232,7 +233,7 @@ static __be32 batadv_arp_ip_dst(struct sk_buff *skb, int hdr_size) } /** - * batadv_hash_dat - compute the hash value for an IP address + * batadv_hash_dat() - compute the hash value for an IP address * @data: data to hash * @size: size of the hash table * @@ -267,7 +268,7 @@ static u32 batadv_hash_dat(const void *data, u32 size) } /** - * batadv_dat_entry_hash_find - look for a given dat_entry in the local hash + * batadv_dat_entry_hash_find() - look for a given dat_entry in the local hash * table * @bat_priv: the bat priv with all the soft interface information * @ip: search key @@ -310,7 +311,7 @@ batadv_dat_entry_hash_find(struct batadv_priv *bat_priv, __be32 ip, } /** - * batadv_dat_entry_add - add a new dat entry or update it if already exists + * batadv_dat_entry_add() - add a new dat entry or update it if already exists * @bat_priv: the bat priv with all the soft interface information * @ip: ipv4 to add/edit * @mac_addr: mac address to assign to the given ipv4 @@ -367,7 +368,8 @@ out: #ifdef CONFIG_BATMAN_ADV_DEBUG /** - * batadv_dbg_arp - print a debug message containing all the ARP packet details + * batadv_dbg_arp() - print a debug message containing all the ARP packet + * details * @bat_priv: the bat priv with all the soft interface information * @skb: ARP packet * @hdr_size: size of the possible header before the ARP packet @@ -448,7 +450,7 @@ static void batadv_dbg_arp(struct batadv_priv *bat_priv, struct sk_buff *skb, #endif /* CONFIG_BATMAN_ADV_DEBUG */ /** - * batadv_is_orig_node_eligible - check whether a node can be a DHT candidate + * batadv_is_orig_node_eligible() - check whether a node can be a DHT candidate * @res: the array with the already selected candidates * @select: number of already selected candidates * @tmp_max: address of the currently evaluated node @@ -502,7 +504,7 @@ out: } /** - * batadv_choose_next_candidate - select the next DHT candidate + * batadv_choose_next_candidate() - select the next DHT candidate * @bat_priv: the bat priv with all the soft interface information * @cands: candidates array * @select: number of candidates already present in the array @@ -566,8 +568,8 @@ static void batadv_choose_next_candidate(struct batadv_priv *bat_priv, } /** - * batadv_dat_select_candidates - select the nodes which the DHT message has to - * be sent to + * batadv_dat_select_candidates() - select the nodes which the DHT message has + * to be sent to * @bat_priv: the bat priv with all the soft interface information * @ip_dst: ipv4 to look up in the DHT * @vid: VLAN identifier @@ -612,7 +614,7 @@ batadv_dat_select_candidates(struct batadv_priv *bat_priv, __be32 ip_dst, } /** - * batadv_dat_send_data - send a payload to the selected candidates + * batadv_dat_send_data() - send a payload to the selected candidates * @bat_priv: the bat priv with all the soft interface information * @skb: payload to send * @ip: the DHT key @@ -688,7 +690,7 @@ out: } /** - * batadv_dat_tvlv_container_update - update the dat tvlv container after dat + * batadv_dat_tvlv_container_update() - update the dat tvlv container after dat * setting change * @bat_priv: the bat priv with all the soft interface information */ @@ -710,7 +712,7 @@ static void batadv_dat_tvlv_container_update(struct batadv_priv *bat_priv) } /** - * batadv_dat_status_update - update the dat tvlv container after dat + * batadv_dat_status_update() - update the dat tvlv container after dat * setting change * @net_dev: the soft interface net device */ @@ -722,7 +724,7 @@ void batadv_dat_status_update(struct net_device *net_dev) } /** - * batadv_dat_tvlv_ogm_handler_v1 - process incoming dat tvlv container + * batadv_dat_tvlv_ogm_handler_v1() - process incoming dat tvlv container * @bat_priv: the bat priv with all the soft interface information * @orig: the orig_node of the ogm * @flags: flags indicating the tvlv state (see batadv_tvlv_handler_flags) @@ -741,7 +743,7 @@ static void batadv_dat_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv, } /** - * batadv_dat_hash_free - free the local DAT hash table + * batadv_dat_hash_free() - free the local DAT hash table * @bat_priv: the bat priv with all the soft interface information */ static void batadv_dat_hash_free(struct batadv_priv *bat_priv) @@ -757,7 +759,7 @@ static void batadv_dat_hash_free(struct batadv_priv *bat_priv) } /** - * batadv_dat_init - initialise the DAT internals + * batadv_dat_init() - initialise the DAT internals * @bat_priv: the bat priv with all the soft interface information * * Return: 0 in case of success, a negative error code otherwise @@ -782,7 +784,7 @@ int batadv_dat_init(struct batadv_priv *bat_priv) } /** - * batadv_dat_free - free the DAT internals + * batadv_dat_free() - free the DAT internals * @bat_priv: the bat priv with all the soft interface information */ void batadv_dat_free(struct batadv_priv *bat_priv) @@ -797,7 +799,7 @@ void batadv_dat_free(struct batadv_priv *bat_priv) #ifdef CONFIG_BATMAN_ADV_DEBUGFS /** - * batadv_dat_cache_seq_print_text - print the local DAT hash table + * batadv_dat_cache_seq_print_text() - print the local DAT hash table * @seq: seq file to print on * @offset: not used * @@ -850,7 +852,7 @@ out: #endif /** - * batadv_arp_get_type - parse an ARP packet and gets the type + * batadv_arp_get_type() - parse an ARP packet and gets the type * @bat_priv: the bat priv with all the soft interface information * @skb: packet to analyse * @hdr_size: size of the possible header before the ARP packet in the skb @@ -924,7 +926,7 @@ out: } /** - * batadv_dat_get_vid - extract the VLAN identifier from skb if any + * batadv_dat_get_vid() - extract the VLAN identifier from skb if any * @skb: the buffer containing the packet to extract the VID from * @hdr_size: the size of the batman-adv header encapsulating the packet * @@ -950,7 +952,7 @@ static unsigned short batadv_dat_get_vid(struct sk_buff *skb, int *hdr_size) } /** - * batadv_dat_arp_create_reply - create an ARP Reply + * batadv_dat_arp_create_reply() - create an ARP Reply * @bat_priv: the bat priv with all the soft interface information * @ip_src: ARP sender IP * @ip_dst: ARP target IP @@ -985,7 +987,7 @@ batadv_dat_arp_create_reply(struct batadv_priv *bat_priv, __be32 ip_src, } /** - * batadv_dat_snoop_outgoing_arp_request - snoop the ARP request and try to + * batadv_dat_snoop_outgoing_arp_request() - snoop the ARP request and try to * answer using DAT * @bat_priv: the bat priv with all the soft interface information * @skb: packet to check @@ -1083,7 +1085,7 @@ out: } /** - * batadv_dat_snoop_incoming_arp_request - snoop the ARP request and try to + * batadv_dat_snoop_incoming_arp_request() - snoop the ARP request and try to * answer using the local DAT storage * @bat_priv: the bat priv with all the soft interface information * @skb: packet to check @@ -1153,7 +1155,7 @@ out: } /** - * batadv_dat_snoop_outgoing_arp_reply - snoop the ARP reply and fill the DHT + * batadv_dat_snoop_outgoing_arp_reply() - snoop the ARP reply and fill the DHT * @bat_priv: the bat priv with all the soft interface information * @skb: packet to check */ @@ -1193,8 +1195,8 @@ void batadv_dat_snoop_outgoing_arp_reply(struct batadv_priv *bat_priv, } /** - * batadv_dat_snoop_incoming_arp_reply - snoop the ARP reply and fill the local - * DAT storage only + * batadv_dat_snoop_incoming_arp_reply() - snoop the ARP reply and fill the + * local DAT storage only * @bat_priv: the bat priv with all the soft interface information * @skb: packet to check * @hdr_size: size of the encapsulation header @@ -1282,8 +1284,8 @@ out: } /** - * batadv_dat_drop_broadcast_packet - check if an ARP request has to be dropped - * (because the node has already obtained the reply via DAT) or not + * batadv_dat_drop_broadcast_packet() - check if an ARP request has to be + * dropped (because the node has already obtained the reply via DAT) or not * @bat_priv: the bat priv with all the soft interface information * @forw_packet: the broadcast packet * diff --git a/net/batman-adv/distributed-arp-table.h b/net/batman-adv/distributed-arp-table.h index ec364a3c1c66..12897eb46268 100644 --- a/net/batman-adv/distributed-arp-table.h +++ b/net/batman-adv/distributed-arp-table.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* Copyright (C) 2011-2017 B.A.T.M.A.N. contributors: * * Antonio Quartulli @@ -23,9 +24,9 @@ #include <linux/compiler.h> #include <linux/netdevice.h> #include <linux/types.h> +#include <uapi/linux/batadv_packet.h> #include "originator.h" -#include "packet.h" struct seq_file; struct sk_buff; @@ -48,7 +49,7 @@ bool batadv_dat_drop_broadcast_packet(struct batadv_priv *bat_priv, struct batadv_forw_packet *forw_packet); /** - * batadv_dat_init_orig_node_addr - assign a DAT address to the orig_node + * batadv_dat_init_orig_node_addr() - assign a DAT address to the orig_node * @orig_node: the node to assign the DAT address to */ static inline void @@ -61,7 +62,7 @@ batadv_dat_init_orig_node_addr(struct batadv_orig_node *orig_node) } /** - * batadv_dat_init_own_addr - assign a DAT address to the node itself + * batadv_dat_init_own_addr() - assign a DAT address to the node itself * @bat_priv: the bat priv with all the soft interface information * @primary_if: a pointer to the primary interface */ @@ -82,7 +83,7 @@ void batadv_dat_free(struct batadv_priv *bat_priv); int batadv_dat_cache_seq_print_text(struct seq_file *seq, void *offset); /** - * batadv_dat_inc_counter - increment the correct DAT packet counter + * batadv_dat_inc_counter() - increment the correct DAT packet counter * @bat_priv: the bat priv with all the soft interface information * @subtype: the 4addr subtype of the packet to be counted * diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c index ebe6e38934e4..22dde42fd80e 100644 --- a/net/batman-adv/fragmentation.c +++ b/net/batman-adv/fragmentation.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* Copyright (C) 2013-2017 B.A.T.M.A.N. contributors: * * Martin Hundebøll <martin@hundeboll.net> @@ -22,7 +23,7 @@ #include <linux/byteorder/generic.h> #include <linux/errno.h> #include <linux/etherdevice.h> -#include <linux/fs.h> +#include <linux/gfp.h> #include <linux/if_ether.h> #include <linux/jiffies.h> #include <linux/kernel.h> @@ -32,16 +33,16 @@ #include <linux/slab.h> #include <linux/spinlock.h> #include <linux/string.h> +#include <uapi/linux/batadv_packet.h> #include "hard-interface.h" #include "originator.h" -#include "packet.h" #include "routing.h" #include "send.h" #include "soft-interface.h" /** - * batadv_frag_clear_chain - delete entries in the fragment buffer chain + * batadv_frag_clear_chain() - delete entries in the fragment buffer chain * @head: head of chain with entries. * @dropped: whether the chain is cleared because all fragments are dropped * @@ -65,7 +66,7 @@ static void batadv_frag_clear_chain(struct hlist_head *head, bool dropped) } /** - * batadv_frag_purge_orig - free fragments associated to an orig + * batadv_frag_purge_orig() - free fragments associated to an orig * @orig_node: originator to free fragments from * @check_cb: optional function to tell if an entry should be purged */ @@ -89,7 +90,7 @@ void batadv_frag_purge_orig(struct batadv_orig_node *orig_node, } /** - * batadv_frag_size_limit - maximum possible size of packet to be fragmented + * batadv_frag_size_limit() - maximum possible size of packet to be fragmented * * Return: the maximum size of payload that can be fragmented. */ @@ -104,7 +105,7 @@ static int batadv_frag_size_limit(void) } /** - * batadv_frag_init_chain - check and prepare fragment chain for new fragment + * batadv_frag_init_chain() - check and prepare fragment chain for new fragment * @chain: chain in fragments table to init * @seqno: sequence number of the received fragment * @@ -134,7 +135,7 @@ static bool batadv_frag_init_chain(struct batadv_frag_table_entry *chain, } /** - * batadv_frag_insert_packet - insert a fragment into a fragment chain + * batadv_frag_insert_packet() - insert a fragment into a fragment chain * @orig_node: originator that the fragment was received from * @skb: skb to insert * @chain_out: list head to attach complete chains of fragments to @@ -248,7 +249,7 @@ err: } /** - * batadv_frag_merge_packets - merge a chain of fragments + * batadv_frag_merge_packets() - merge a chain of fragments * @chain: head of chain with fragments * * Expand the first skb in the chain and copy the content of the remaining @@ -306,7 +307,7 @@ free: } /** - * batadv_frag_skb_buffer - buffer fragment for later merge + * batadv_frag_skb_buffer() - buffer fragment for later merge * @skb: skb to buffer * @orig_node_src: originator that the skb is received from * @@ -346,7 +347,7 @@ out_err: } /** - * batadv_frag_skb_fwd - forward fragments that would exceed MTU when merged + * batadv_frag_skb_fwd() - forward fragments that would exceed MTU when merged * @skb: skb to forward * @recv_if: interface that the skb is received on * @orig_node_src: originator that the skb is received from @@ -400,7 +401,7 @@ out: } /** - * batadv_frag_create - create a fragment from skb + * batadv_frag_create() - create a fragment from skb * @skb: skb to create fragment from * @frag_head: header to use in new fragment * @fragment_size: size of new fragment @@ -438,7 +439,7 @@ err: } /** - * batadv_frag_send_packet - create up to 16 fragments from the passed skb + * batadv_frag_send_packet() - create up to 16 fragments from the passed skb * @skb: skb to create fragments from * @orig_node: final destination of the created fragments * @neigh_node: next-hop of the created fragments diff --git a/net/batman-adv/fragmentation.h b/net/batman-adv/fragmentation.h index 1a2d6c308745..138b22a1836a 100644 --- a/net/batman-adv/fragmentation.h +++ b/net/batman-adv/fragmentation.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* Copyright (C) 2013-2017 B.A.T.M.A.N. contributors: * * Martin Hundebøll <martin@hundeboll.net> @@ -39,7 +40,7 @@ int batadv_frag_send_packet(struct sk_buff *skb, struct batadv_neigh_node *neigh_node); /** - * batadv_frag_check_entry - check if a list of fragments has timed out + * batadv_frag_check_entry() - check if a list of fragments has timed out * @frags_entry: table entry to check * * Return: true if the frags entry has timed out, false otherwise. diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c index 10d521f0b17f..37fe9a644f22 100644 --- a/net/batman-adv/gateway_client.c +++ b/net/batman-adv/gateway_client.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* Copyright (C) 2009-2017 B.A.T.M.A.N. contributors: * * Marek Lindner @@ -22,7 +23,7 @@ #include <linux/byteorder/generic.h> #include <linux/errno.h> #include <linux/etherdevice.h> -#include <linux/fs.h> +#include <linux/gfp.h> #include <linux/if_ether.h> #include <linux/if_vlan.h> #include <linux/in.h> @@ -42,6 +43,7 @@ #include <linux/stddef.h> #include <linux/udp.h> #include <net/sock.h> +#include <uapi/linux/batadv_packet.h> #include <uapi/linux/batman_adv.h> #include "gateway_common.h" @@ -49,7 +51,6 @@ #include "log.h" #include "netlink.h" #include "originator.h" -#include "packet.h" #include "routing.h" #include "soft-interface.h" #include "sysfs.h" @@ -68,8 +69,8 @@ #define BATADV_DHCP_CHADDR_OFFSET 28 /** - * batadv_gw_node_release - release gw_node from lists and queue for free after - * rcu grace period + * batadv_gw_node_release() - release gw_node from lists and queue for free + * after rcu grace period * @ref: kref pointer of the gw_node */ static void batadv_gw_node_release(struct kref *ref) @@ -83,7 +84,8 @@ static void batadv_gw_node_release(struct kref *ref) } /** - * batadv_gw_node_put - decrement the gw_node refcounter and possibly release it + * batadv_gw_node_put() - decrement the gw_node refcounter and possibly release + * it * @gw_node: gateway node to free */ void batadv_gw_node_put(struct batadv_gw_node *gw_node) @@ -91,6 +93,12 @@ void batadv_gw_node_put(struct batadv_gw_node *gw_node) kref_put(&gw_node->refcount, batadv_gw_node_release); } +/** + * batadv_gw_get_selected_gw_node() - Get currently selected gateway + * @bat_priv: the bat priv with all the soft interface information + * + * Return: selected gateway (with increased refcnt), NULL on errors + */ struct batadv_gw_node * batadv_gw_get_selected_gw_node(struct batadv_priv *bat_priv) { @@ -109,6 +117,12 @@ out: return gw_node; } +/** + * batadv_gw_get_selected_orig() - Get originator of currently selected gateway + * @bat_priv: the bat priv with all the soft interface information + * + * Return: orig_node of selected gateway (with increased refcnt), NULL on errors + */ struct batadv_orig_node * batadv_gw_get_selected_orig(struct batadv_priv *bat_priv) { @@ -155,7 +169,7 @@ static void batadv_gw_select(struct batadv_priv *bat_priv, } /** - * batadv_gw_reselect - force a gateway reselection + * batadv_gw_reselect() - force a gateway reselection * @bat_priv: the bat priv with all the soft interface information * * Set a flag to remind the GW component to perform a new gateway reselection. @@ -171,7 +185,7 @@ void batadv_gw_reselect(struct batadv_priv *bat_priv) } /** - * batadv_gw_check_client_stop - check if client mode has been switched off + * batadv_gw_check_client_stop() - check if client mode has been switched off * @bat_priv: the bat priv with all the soft interface information * * This function assumes the caller has checked that the gw state *is actually @@ -202,6 +216,10 @@ void batadv_gw_check_client_stop(struct batadv_priv *bat_priv) batadv_gw_node_put(curr_gw); } +/** + * batadv_gw_election() - Elect the best gateway + * @bat_priv: the bat priv with all the soft interface information + */ void batadv_gw_election(struct batadv_priv *bat_priv) { struct batadv_gw_node *curr_gw = NULL; @@ -290,6 +308,11 @@ out: batadv_neigh_ifinfo_put(router_ifinfo); } +/** + * batadv_gw_check_election() - Elect orig node as best gateway when eligible + * @bat_priv: the bat priv with all the soft interface information + * @orig_node: orig node which is to be checked + */ void batadv_gw_check_election(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node) { @@ -321,7 +344,7 @@ out: } /** - * batadv_gw_node_add - add gateway node to list of available gateways + * batadv_gw_node_add() - add gateway node to list of available gateways * @bat_priv: the bat priv with all the soft interface information * @orig_node: originator announcing gateway capabilities * @gateway: announced bandwidth information @@ -364,7 +387,7 @@ static void batadv_gw_node_add(struct batadv_priv *bat_priv, } /** - * batadv_gw_node_get - retrieve gateway node from list of available gateways + * batadv_gw_node_get() - retrieve gateway node from list of available gateways * @bat_priv: the bat priv with all the soft interface information * @orig_node: originator announcing gateway capabilities * @@ -393,7 +416,7 @@ struct batadv_gw_node *batadv_gw_node_get(struct batadv_priv *bat_priv, } /** - * batadv_gw_node_update - update list of available gateways with changed + * batadv_gw_node_update() - update list of available gateways with changed * bandwidth information * @bat_priv: the bat priv with all the soft interface information * @orig_node: originator announcing gateway capabilities @@ -458,6 +481,11 @@ out: batadv_gw_node_put(gw_node); } +/** + * batadv_gw_node_delete() - Remove orig_node from gateway list + * @bat_priv: the bat priv with all the soft interface information + * @orig_node: orig node which is currently in process of being removed + */ void batadv_gw_node_delete(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node) { @@ -469,6 +497,10 @@ void batadv_gw_node_delete(struct batadv_priv *bat_priv, batadv_gw_node_update(bat_priv, orig_node, &gateway); } +/** + * batadv_gw_node_free() - Free gateway information from soft interface + * @bat_priv: the bat priv with all the soft interface information + */ void batadv_gw_node_free(struct batadv_priv *bat_priv) { struct batadv_gw_node *gw_node; @@ -484,6 +516,14 @@ void batadv_gw_node_free(struct batadv_priv *bat_priv) } #ifdef CONFIG_BATMAN_ADV_DEBUGFS + +/** + * batadv_gw_client_seq_print_text() - Print the gateway table in a seq file + * @seq: seq file to print on + * @offset: not used + * + * Return: always 0 + */ int batadv_gw_client_seq_print_text(struct seq_file *seq, void *offset) { struct net_device *net_dev = (struct net_device *)seq->private; @@ -514,7 +554,7 @@ int batadv_gw_client_seq_print_text(struct seq_file *seq, void *offset) #endif /** - * batadv_gw_dump - Dump gateways into a message + * batadv_gw_dump() - Dump gateways into a message * @msg: Netlink message to dump into * @cb: Control block containing additional options * @@ -567,7 +607,7 @@ out: } /** - * batadv_gw_dhcp_recipient_get - check if a packet is a DHCP message + * batadv_gw_dhcp_recipient_get() - check if a packet is a DHCP message * @skb: the packet to check * @header_len: a pointer to the batman-adv header size * @chaddr: buffer where the client address will be stored. Valid @@ -686,7 +726,8 @@ batadv_gw_dhcp_recipient_get(struct sk_buff *skb, unsigned int *header_len, } /** - * batadv_gw_out_of_range - check if the dhcp request destination is the best gw + * batadv_gw_out_of_range() - check if the dhcp request destination is the best + * gateway * @bat_priv: the bat priv with all the soft interface information * @skb: the outgoing packet * diff --git a/net/batman-adv/gateway_client.h b/net/batman-adv/gateway_client.h index 3baa3d466e5e..981f58421a32 100644 --- a/net/batman-adv/gateway_client.h +++ b/net/batman-adv/gateway_client.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* Copyright (C) 2009-2017 B.A.T.M.A.N. contributors: * * Marek Lindner diff --git a/net/batman-adv/gateway_common.c b/net/batman-adv/gateway_common.c index 2c26039c23fc..b3e156af2256 100644 --- a/net/batman-adv/gateway_common.c +++ b/net/batman-adv/gateway_common.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* Copyright (C) 2009-2017 B.A.T.M.A.N. contributors: * * Marek Lindner @@ -26,15 +27,15 @@ #include <linux/netdevice.h> #include <linux/stddef.h> #include <linux/string.h> +#include <uapi/linux/batadv_packet.h> #include "gateway_client.h" #include "log.h" -#include "packet.h" #include "tvlv.h" /** - * batadv_parse_throughput - parse supplied string buffer to extract throughput - * information + * batadv_parse_throughput() - parse supplied string buffer to extract + * throughput information * @net_dev: the soft interface net device * @buff: string buffer to parse * @description: text shown when throughput string cannot be parsed @@ -100,8 +101,8 @@ bool batadv_parse_throughput(struct net_device *net_dev, char *buff, } /** - * batadv_parse_gw_bandwidth - parse supplied string buffer to extract download - * and upload bandwidth information + * batadv_parse_gw_bandwidth() - parse supplied string buffer to extract + * download and upload bandwidth information * @net_dev: the soft interface net device * @buff: string buffer to parse * @down: pointer holding the returned download bandwidth information @@ -136,8 +137,8 @@ static bool batadv_parse_gw_bandwidth(struct net_device *net_dev, char *buff, } /** - * batadv_gw_tvlv_container_update - update the gw tvlv container after gateway - * setting change + * batadv_gw_tvlv_container_update() - update the gw tvlv container after + * gateway setting change * @bat_priv: the bat priv with all the soft interface information */ void batadv_gw_tvlv_container_update(struct batadv_priv *bat_priv) @@ -164,6 +165,15 @@ void batadv_gw_tvlv_container_update(struct batadv_priv *bat_priv) } } +/** + * batadv_gw_bandwidth_set() - Parse and set download/upload gateway bandwidth + * from supplied string buffer + * @net_dev: netdev struct of the soft interface + * @buff: the buffer containing the user data + * @count: number of bytes in the buffer + * + * Return: 'count' on success or a negative error code in case of failure + */ ssize_t batadv_gw_bandwidth_set(struct net_device *net_dev, char *buff, size_t count) { @@ -207,7 +217,7 @@ ssize_t batadv_gw_bandwidth_set(struct net_device *net_dev, char *buff, } /** - * batadv_gw_tvlv_ogm_handler_v1 - process incoming gateway tvlv container + * batadv_gw_tvlv_ogm_handler_v1() - process incoming gateway tvlv container * @bat_priv: the bat priv with all the soft interface information * @orig: the orig_node of the ogm * @flags: flags indicating the tvlv state (see batadv_tvlv_handler_flags) @@ -248,7 +258,7 @@ static void batadv_gw_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv, } /** - * batadv_gw_init - initialise the gateway handling internals + * batadv_gw_init() - initialise the gateway handling internals * @bat_priv: the bat priv with all the soft interface information */ void batadv_gw_init(struct batadv_priv *bat_priv) @@ -264,7 +274,7 @@ void batadv_gw_init(struct batadv_priv *bat_priv) } /** - * batadv_gw_free - free the gateway handling internals + * batadv_gw_free() - free the gateway handling internals * @bat_priv: the bat priv with all the soft interface information */ void batadv_gw_free(struct batadv_priv *bat_priv) diff --git a/net/batman-adv/gateway_common.h b/net/batman-adv/gateway_common.h index 0a6a97d201f2..afebd9c7edf4 100644 --- a/net/batman-adv/gateway_common.h +++ b/net/batman-adv/gateway_common.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* Copyright (C) 2009-2017 B.A.T.M.A.N. contributors: * * Marek Lindner @@ -32,11 +33,12 @@ enum batadv_gw_modes { /** * enum batadv_bandwidth_units - bandwidth unit types - * @BATADV_BW_UNIT_KBIT: unit type kbit - * @BATADV_BW_UNIT_MBIT: unit type mbit */ enum batadv_bandwidth_units { + /** @BATADV_BW_UNIT_KBIT: unit type kbit */ BATADV_BW_UNIT_KBIT, + + /** @BATADV_BW_UNIT_MBIT: unit type mbit */ BATADV_BW_UNIT_MBIT, }; diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index 4e3d5340ad96..5f186bff284a 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich @@ -22,7 +23,7 @@ #include <linux/bug.h> #include <linux/byteorder/generic.h> #include <linux/errno.h> -#include <linux/fs.h> +#include <linux/gfp.h> #include <linux/if.h> #include <linux/if_arp.h> #include <linux/if_ether.h> @@ -37,6 +38,7 @@ #include <linux/spinlock.h> #include <net/net_namespace.h> #include <net/rtnetlink.h> +#include <uapi/linux/batadv_packet.h> #include "bat_v.h" #include "bridge_loop_avoidance.h" @@ -45,14 +47,13 @@ #include "gateway_client.h" #include "log.h" #include "originator.h" -#include "packet.h" #include "send.h" #include "soft-interface.h" #include "sysfs.h" #include "translation-table.h" /** - * batadv_hardif_release - release hard interface from lists and queue for + * batadv_hardif_release() - release hard interface from lists and queue for * free after rcu grace period * @ref: kref pointer of the hard interface */ @@ -66,6 +67,12 @@ void batadv_hardif_release(struct kref *ref) kfree_rcu(hard_iface, rcu); } +/** + * batadv_hardif_get_by_netdev() - Get hard interface object of a net_device + * @net_dev: net_device to search for + * + * Return: batadv_hard_iface of net_dev (with increased refcnt), NULL on errors + */ struct batadv_hard_iface * batadv_hardif_get_by_netdev(const struct net_device *net_dev) { @@ -86,7 +93,7 @@ out: } /** - * batadv_getlink_net - return link net namespace (of use fallback) + * batadv_getlink_net() - return link net namespace (of use fallback) * @netdev: net_device to check * @fallback_net: return in case get_link_net is not available for @netdev * @@ -105,7 +112,7 @@ static struct net *batadv_getlink_net(const struct net_device *netdev, } /** - * batadv_mutual_parents - check if two devices are each others parent + * batadv_mutual_parents() - check if two devices are each others parent * @dev1: 1st net dev * @net1: 1st devices netns * @dev2: 2nd net dev @@ -138,7 +145,7 @@ static bool batadv_mutual_parents(const struct net_device *dev1, } /** - * batadv_is_on_batman_iface - check if a device is a batman iface descendant + * batadv_is_on_batman_iface() - check if a device is a batman iface descendant * @net_dev: the device to check * * If the user creates any virtual device on top of a batman-adv interface, it @@ -202,7 +209,7 @@ static bool batadv_is_valid_iface(const struct net_device *net_dev) } /** - * batadv_get_real_netdevice - check if the given netdev struct is a virtual + * batadv_get_real_netdevice() - check if the given netdev struct is a virtual * interface on top of another 'real' interface * @netdev: the device to check * @@ -246,7 +253,7 @@ out: } /** - * batadv_get_real_netdev - check if the given net_device struct is a virtual + * batadv_get_real_netdev() - check if the given net_device struct is a virtual * interface on top of another 'real' interface * @net_device: the device to check * @@ -265,7 +272,7 @@ struct net_device *batadv_get_real_netdev(struct net_device *net_device) } /** - * batadv_is_wext_netdev - check if the given net_device struct is a + * batadv_is_wext_netdev() - check if the given net_device struct is a * wext wifi interface * @net_device: the device to check * @@ -289,7 +296,7 @@ static bool batadv_is_wext_netdev(struct net_device *net_device) } /** - * batadv_is_cfg80211_netdev - check if the given net_device struct is a + * batadv_is_cfg80211_netdev() - check if the given net_device struct is a * cfg80211 wifi interface * @net_device: the device to check * @@ -309,7 +316,7 @@ static bool batadv_is_cfg80211_netdev(struct net_device *net_device) } /** - * batadv_wifi_flags_evaluate - calculate wifi flags for net_device + * batadv_wifi_flags_evaluate() - calculate wifi flags for net_device * @net_device: the device to check * * Return: batadv_hard_iface_wifi_flags flags of the device @@ -344,7 +351,7 @@ out: } /** - * batadv_is_cfg80211_hardif - check if the given hardif is a cfg80211 wifi + * batadv_is_cfg80211_hardif() - check if the given hardif is a cfg80211 wifi * interface * @hard_iface: the device to check * @@ -362,7 +369,7 @@ bool batadv_is_cfg80211_hardif(struct batadv_hard_iface *hard_iface) } /** - * batadv_is_wifi_hardif - check if the given hardif is a wifi interface + * batadv_is_wifi_hardif() - check if the given hardif is a wifi interface * @hard_iface: the device to check * * Return: true if the net device is a 802.11 wireless device, false otherwise. @@ -376,7 +383,7 @@ bool batadv_is_wifi_hardif(struct batadv_hard_iface *hard_iface) } /** - * batadv_hardif_no_broadcast - check whether (re)broadcast is necessary + * batadv_hardif_no_broadcast() - check whether (re)broadcast is necessary * @if_outgoing: the outgoing interface checked and considered for (re)broadcast * @orig_addr: the originator of this packet * @orig_neigh: originator address of the forwarder we just got the packet from @@ -560,6 +567,13 @@ static void batadv_hardif_recalc_extra_skbroom(struct net_device *soft_iface) soft_iface->needed_tailroom = lower_tailroom; } +/** + * batadv_hardif_min_mtu() - Calculate maximum MTU for soft interface + * @soft_iface: netdev struct of the soft interface + * + * Return: MTU for the soft-interface (limited by the minimal MTU of all active + * slave interfaces) + */ int batadv_hardif_min_mtu(struct net_device *soft_iface) { struct batadv_priv *bat_priv = netdev_priv(soft_iface); @@ -606,7 +620,11 @@ out: return min_t(int, min_mtu - batadv_max_header_len(), ETH_DATA_LEN); } -/* adjusts the MTU if a new interface with a smaller MTU appeared. */ +/** + * batadv_update_min_mtu() - Adjusts the MTU if a new interface with a smaller + * MTU appeared + * @soft_iface: netdev struct of the soft interface + */ void batadv_update_min_mtu(struct net_device *soft_iface) { soft_iface->mtu = batadv_hardif_min_mtu(soft_iface); @@ -667,7 +685,7 @@ batadv_hardif_deactivate_interface(struct batadv_hard_iface *hard_iface) } /** - * batadv_master_del_slave - remove hard_iface from the current master interface + * batadv_master_del_slave() - remove hard_iface from the current master iface * @slave: the interface enslaved in another master * @master: the master from which slave has to be removed * @@ -691,6 +709,14 @@ static int batadv_master_del_slave(struct batadv_hard_iface *slave, return ret; } +/** + * batadv_hardif_enable_interface() - Enslave hard interface to soft interface + * @hard_iface: hard interface to add to soft interface + * @net: the applicable net namespace + * @iface_name: name of the soft interface + * + * Return: 0 on success or negative error number in case of failure + */ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface, struct net *net, const char *iface_name) { @@ -802,6 +828,12 @@ err: return ret; } +/** + * batadv_hardif_disable_interface() - Remove hard interface from soft interface + * @hard_iface: hard interface to be removed + * @autodel: whether to delete soft interface when it doesn't contain any other + * slave interfaces + */ void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface, enum batadv_hard_if_cleanup autodel) { @@ -936,6 +968,9 @@ static void batadv_hardif_remove_interface(struct batadv_hard_iface *hard_iface) batadv_hardif_put(hard_iface); } +/** + * batadv_hardif_remove_interfaces() - Remove all hard interfaces + */ void batadv_hardif_remove_interfaces(void) { struct batadv_hard_iface *hard_iface, *hard_iface_tmp; diff --git a/net/batman-adv/hard-interface.h b/net/batman-adv/hard-interface.h index 9f9890ff7a22..de5e9a374ece 100644 --- a/net/batman-adv/hard-interface.h +++ b/net/batman-adv/hard-interface.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich @@ -30,36 +31,74 @@ struct net_device; struct net; +/** + * enum batadv_hard_if_state - State of a hard interface + */ enum batadv_hard_if_state { + /** + * @BATADV_IF_NOT_IN_USE: interface is not used as slave interface of a + * batman-adv soft interface + */ BATADV_IF_NOT_IN_USE, + + /** + * @BATADV_IF_TO_BE_REMOVED: interface will be removed from soft + * interface + */ BATADV_IF_TO_BE_REMOVED, + + /** @BATADV_IF_INACTIVE: interface is deactivated */ BATADV_IF_INACTIVE, + + /** @BATADV_IF_ACTIVE: interface is used */ BATADV_IF_ACTIVE, + + /** @BATADV_IF_TO_BE_ACTIVATED: interface is getting activated */ BATADV_IF_TO_BE_ACTIVATED, + + /** + * @BATADV_IF_I_WANT_YOU: interface is queued up (using sysfs) for being + * added as slave interface of a batman-adv soft interface + */ BATADV_IF_I_WANT_YOU, }; /** * enum batadv_hard_if_bcast - broadcast avoidance options - * @BATADV_HARDIF_BCAST_OK: Do broadcast on according hard interface - * @BATADV_HARDIF_BCAST_NORECIPIENT: Broadcast not needed, there is no recipient - * @BATADV_HARDIF_BCAST_DUPFWD: There is just the neighbor we got it from - * @BATADV_HARDIF_BCAST_DUPORIG: There is just the originator */ enum batadv_hard_if_bcast { + /** @BATADV_HARDIF_BCAST_OK: Do broadcast on according hard interface */ BATADV_HARDIF_BCAST_OK = 0, + + /** + * @BATADV_HARDIF_BCAST_NORECIPIENT: Broadcast not needed, there is no + * recipient + */ BATADV_HARDIF_BCAST_NORECIPIENT, + + /** + * @BATADV_HARDIF_BCAST_DUPFWD: There is just the neighbor we got it + * from + */ BATADV_HARDIF_BCAST_DUPFWD, + + /** @BATADV_HARDIF_BCAST_DUPORIG: There is just the originator */ BATADV_HARDIF_BCAST_DUPORIG, }; /** * enum batadv_hard_if_cleanup - Cleanup modi for soft_iface after slave removal - * @BATADV_IF_CLEANUP_KEEP: Don't automatically delete soft-interface - * @BATADV_IF_CLEANUP_AUTO: Delete soft-interface after last slave was removed */ enum batadv_hard_if_cleanup { + /** + * @BATADV_IF_CLEANUP_KEEP: Don't automatically delete soft-interface + */ BATADV_IF_CLEANUP_KEEP, + + /** + * @BATADV_IF_CLEANUP_AUTO: Delete soft-interface after last slave was + * removed + */ BATADV_IF_CLEANUP_AUTO, }; @@ -82,7 +121,7 @@ int batadv_hardif_no_broadcast(struct batadv_hard_iface *if_outgoing, u8 *orig_addr, u8 *orig_neigh); /** - * batadv_hardif_put - decrement the hard interface refcounter and possibly + * batadv_hardif_put() - decrement the hard interface refcounter and possibly * release it * @hard_iface: the hard interface to free */ @@ -91,6 +130,12 @@ static inline void batadv_hardif_put(struct batadv_hard_iface *hard_iface) kref_put(&hard_iface->refcount, batadv_hardif_release); } +/** + * batadv_primary_if_get_selected() - Get reference to primary interface + * @bat_priv: the bat priv with all the soft interface information + * + * Return: primary interface (with increased refcnt), otherwise NULL + */ static inline struct batadv_hard_iface * batadv_primary_if_get_selected(struct batadv_priv *bat_priv) { diff --git a/net/batman-adv/hash.c b/net/batman-adv/hash.c index b5f7e13918ac..04d964358c98 100644 --- a/net/batman-adv/hash.c +++ b/net/batman-adv/hash.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* Copyright (C) 2006-2017 B.A.T.M.A.N. contributors: * * Simon Wunderlich, Marek Lindner @@ -18,7 +19,7 @@ #include "hash.h" #include "main.h" -#include <linux/fs.h> +#include <linux/gfp.h> #include <linux/lockdep.h> #include <linux/slab.h> @@ -33,7 +34,10 @@ static void batadv_hash_init(struct batadv_hashtable *hash) } } -/* free only the hashtable and the hash itself. */ +/** + * batadv_hash_destroy() - Free only the hashtable and the hash itself + * @hash: hash object to destroy + */ void batadv_hash_destroy(struct batadv_hashtable *hash) { kfree(hash->list_locks); @@ -41,7 +45,12 @@ void batadv_hash_destroy(struct batadv_hashtable *hash) kfree(hash); } -/* allocates and clears the hash */ +/** + * batadv_hash_new() - Allocates and clears the hashtable + * @size: number of hash buckets to allocate + * + * Return: newly allocated hashtable, NULL on errors + */ struct batadv_hashtable *batadv_hash_new(u32 size) { struct batadv_hashtable *hash; @@ -70,6 +79,11 @@ free_hash: return NULL; } +/** + * batadv_hash_set_lock_class() - Set specific lockdep class for hash spinlocks + * @hash: hash object to modify + * @key: lockdep class key address + */ void batadv_hash_set_lock_class(struct batadv_hashtable *hash, struct lock_class_key *key) { diff --git a/net/batman-adv/hash.h b/net/batman-adv/hash.h index 0c905e91c5e2..4ce1b6d3ad5c 100644 --- a/net/batman-adv/hash.h +++ b/net/batman-adv/hash.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* Copyright (C) 2006-2017 B.A.T.M.A.N. contributors: * * Simon Wunderlich, Marek Lindner @@ -45,10 +46,18 @@ typedef bool (*batadv_hashdata_compare_cb)(const struct hlist_node *, typedef u32 (*batadv_hashdata_choose_cb)(const void *, u32); typedef void (*batadv_hashdata_free_cb)(struct hlist_node *, void *); +/** + * struct batadv_hashtable - Wrapper of simple hlist based hashtable + */ struct batadv_hashtable { - struct hlist_head *table; /* the hashtable itself with the buckets */ - spinlock_t *list_locks; /* spinlock for each hash list entry */ - u32 size; /* size of hashtable */ + /** @table: the hashtable itself with the buckets */ + struct hlist_head *table; + + /** @list_locks: spinlock for each hash list entry */ + spinlock_t *list_locks; + + /** @size: size of hashtable */ + u32 size; }; /* allocates and clears the hash */ @@ -62,7 +71,7 @@ void batadv_hash_set_lock_class(struct batadv_hashtable *hash, void batadv_hash_destroy(struct batadv_hashtable *hash); /** - * batadv_hash_add - adds data to the hashtable + * batadv_hash_add() - adds data to the hashtable * @hash: storage hash table * @compare: callback to determine if 2 hash elements are identical * @choose: callback calculating the hash index @@ -112,8 +121,15 @@ out: return ret; } -/* removes data from hash, if found. data could be the structure you use with - * just the key filled, we just need the key for comparing. +/** + * batadv_hash_remove() - Removes data from hash, if found + * @hash: hash table + * @compare: callback to determine if 2 hash elements are identical + * @choose: callback calculating the hash index + * @data: data passed to the aforementioned callbacks as argument + * + * ata could be the structure you use with just the key filled, we just need + * the key for comparing. * * Return: returns pointer do data on success, so you can remove the used * structure yourself, or NULL on error diff --git a/net/batman-adv/icmp_socket.c b/net/batman-adv/icmp_socket.c index a98e0a986cef..581375d0eed2 100644 --- a/net/batman-adv/icmp_socket.c +++ b/net/batman-adv/icmp_socket.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors: * * Marek Lindner @@ -26,6 +27,7 @@ #include <linux/export.h> #include <linux/fcntl.h> #include <linux/fs.h> +#include <linux/gfp.h> #include <linux/if_ether.h> #include <linux/kernel.h> #include <linux/list.h> @@ -42,11 +44,11 @@ #include <linux/string.h> #include <linux/uaccess.h> #include <linux/wait.h> +#include <uapi/linux/batadv_packet.h> #include "hard-interface.h" #include "log.h" #include "originator.h" -#include "packet.h" #include "send.h" static struct batadv_socket_client *batadv_socket_client_hash[256]; @@ -55,6 +57,9 @@ static void batadv_socket_add_packet(struct batadv_socket_client *socket_client, struct batadv_icmp_header *icmph, size_t icmp_len); +/** + * batadv_socket_init() - Initialize soft interface independent socket data + */ void batadv_socket_init(void) { memset(batadv_socket_client_hash, 0, sizeof(batadv_socket_client_hash)); @@ -314,6 +319,12 @@ static const struct file_operations batadv_fops = { .llseek = no_llseek, }; +/** + * batadv_socket_setup() - Create debugfs "socket" file + * @bat_priv: the bat priv with all the soft interface information + * + * Return: 0 on success or negative error number in case of failure + */ int batadv_socket_setup(struct batadv_priv *bat_priv) { struct dentry *d; @@ -333,7 +344,7 @@ err: } /** - * batadv_socket_add_packet - schedule an icmp packet to be sent to + * batadv_socket_add_packet() - schedule an icmp packet to be sent to * userspace on an icmp socket. * @socket_client: the socket this packet belongs to * @icmph: pointer to the header of the icmp packet @@ -390,7 +401,7 @@ static void batadv_socket_add_packet(struct batadv_socket_client *socket_client, } /** - * batadv_socket_receive_packet - schedule an icmp packet to be received + * batadv_socket_receive_packet() - schedule an icmp packet to be received * locally and sent to userspace. * @icmph: pointer to the header of the icmp packet * @icmp_len: total length of the icmp packet diff --git a/net/batman-adv/icmp_socket.h b/net/batman-adv/icmp_socket.h index f3fec40aae86..84cddd01eeab 100644 --- a/net/batman-adv/icmp_socket.h +++ b/net/batman-adv/icmp_socket.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors: * * Marek Lindner diff --git a/net/batman-adv/log.c b/net/batman-adv/log.c index 76451460c98d..9be74a44e99d 100644 --- a/net/batman-adv/log.c +++ b/net/batman-adv/log.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* Copyright (C) 2010-2017 B.A.T.M.A.N. contributors: * * Marek Lindner @@ -24,6 +25,7 @@ #include <linux/export.h> #include <linux/fcntl.h> #include <linux/fs.h> +#include <linux/gfp.h> #include <linux/jiffies.h> #include <linux/kernel.h> #include <linux/module.h> @@ -86,6 +88,13 @@ static int batadv_fdebug_log(struct batadv_priv_debug_log *debug_log, return 0; } +/** + * batadv_debug_log() - Add debug log entry + * @bat_priv: the bat priv with all the soft interface information + * @fmt: format string + * + * Return: 0 on success or negative error number in case of failure + */ int batadv_debug_log(struct batadv_priv *bat_priv, const char *fmt, ...) { va_list args; @@ -197,6 +206,12 @@ static const struct file_operations batadv_log_fops = { .llseek = no_llseek, }; +/** + * batadv_debug_log_setup() - Initialize debug log + * @bat_priv: the bat priv with all the soft interface information + * + * Return: 0 on success or negative error number in case of failure + */ int batadv_debug_log_setup(struct batadv_priv *bat_priv) { struct dentry *d; @@ -222,6 +237,10 @@ err: return -ENOMEM; } +/** + * batadv_debug_log_cleanup() - Destroy debug log + * @bat_priv: the bat priv with all the soft interface information + */ void batadv_debug_log_cleanup(struct batadv_priv *bat_priv) { kfree(bat_priv->debug_log); diff --git a/net/batman-adv/log.h b/net/batman-adv/log.h index 65ce97efa6b5..35e02b2b9e72 100644 --- a/net/batman-adv/log.h +++ b/net/batman-adv/log.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich @@ -44,25 +45,33 @@ static inline void batadv_debug_log_cleanup(struct batadv_priv *bat_priv) /** * enum batadv_dbg_level - available log levels - * @BATADV_DBG_BATMAN: OGM and TQ computations related messages - * @BATADV_DBG_ROUTES: route added / changed / deleted - * @BATADV_DBG_TT: translation table messages - * @BATADV_DBG_BLA: bridge loop avoidance messages - * @BATADV_DBG_DAT: ARP snooping and DAT related messages - * @BATADV_DBG_NC: network coding related messages - * @BATADV_DBG_MCAST: multicast related messages - * @BATADV_DBG_TP_METER: throughput meter messages - * @BATADV_DBG_ALL: the union of all the above log levels */ enum batadv_dbg_level { + /** @BATADV_DBG_BATMAN: OGM and TQ computations related messages */ BATADV_DBG_BATMAN = BIT(0), + + /** @BATADV_DBG_ROUTES: route added / changed / deleted */ BATADV_DBG_ROUTES = BIT(1), + + /** @BATADV_DBG_TT: translation table messages */ BATADV_DBG_TT = BIT(2), + + /** @BATADV_DBG_BLA: bridge loop avoidance messages */ BATADV_DBG_BLA = BIT(3), + + /** @BATADV_DBG_DAT: ARP snooping and DAT related messages */ BATADV_DBG_DAT = BIT(4), + + /** @BATADV_DBG_NC: network coding related messages */ BATADV_DBG_NC = BIT(5), + + /** @BATADV_DBG_MCAST: multicast related messages */ BATADV_DBG_MCAST = BIT(6), + + /** @BATADV_DBG_TP_METER: throughput meter messages */ BATADV_DBG_TP_METER = BIT(7), + + /** @BATADV_DBG_ALL: the union of all the above log levels */ BATADV_DBG_ALL = 255, }; @@ -70,7 +79,14 @@ enum batadv_dbg_level { int batadv_debug_log(struct batadv_priv *bat_priv, const char *fmt, ...) __printf(2, 3); -/* possibly ratelimited debug output */ +/** + * _batadv_dbg() - Store debug output with(out) ratelimiting + * @type: type of debug message + * @bat_priv: the bat priv with all the soft interface information + * @ratelimited: whether output should be rate limited + * @fmt: format string + * @arg...: variable arguments + */ #define _batadv_dbg(type, bat_priv, ratelimited, fmt, arg...) \ do { \ struct batadv_priv *__batpriv = (bat_priv); \ @@ -89,11 +105,30 @@ static inline void _batadv_dbg(int type __always_unused, } #endif +/** + * batadv_dbg() - Store debug output without ratelimiting + * @type: type of debug message + * @bat_priv: the bat priv with all the soft interface information + * @arg...: format string and variable arguments + */ #define batadv_dbg(type, bat_priv, arg...) \ _batadv_dbg(type, bat_priv, 0, ## arg) + +/** + * batadv_dbg_ratelimited() - Store debug output with ratelimiting + * @type: type of debug message + * @bat_priv: the bat priv with all the soft interface information + * @arg...: format string and variable arguments + */ #define batadv_dbg_ratelimited(type, bat_priv, arg...) \ _batadv_dbg(type, bat_priv, 1, ## arg) +/** + * batadv_info() - Store message in debug buffer and print it to kmsg buffer + * @net_dev: the soft interface net device + * @fmt: format string + * @arg...: variable arguments + */ #define batadv_info(net_dev, fmt, arg...) \ do { \ struct net_device *_netdev = (net_dev); \ @@ -101,6 +136,13 @@ static inline void _batadv_dbg(int type __always_unused, batadv_dbg(BATADV_DBG_ALL, _batpriv, fmt, ## arg); \ pr_info("%s: " fmt, _netdev->name, ## arg); \ } while (0) + +/** + * batadv_err() - Store error in debug buffer and print it to kmsg buffer + * @net_dev: the soft interface net device + * @fmt: format string + * @arg...: variable arguments + */ #define batadv_err(net_dev, fmt, arg...) \ do { \ struct net_device *_netdev = (net_dev); \ diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c index 4daed7ad46f2..d31c8266e244 100644 --- a/net/batman-adv/main.c +++ b/net/batman-adv/main.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich @@ -18,12 +19,12 @@ #include "main.h" #include <linux/atomic.h> -#include <linux/bug.h> +#include <linux/build_bug.h> #include <linux/byteorder/generic.h> #include <linux/crc32c.h> #include <linux/errno.h> -#include <linux/fs.h> #include <linux/genetlink.h> +#include <linux/gfp.h> #include <linux/if_ether.h> #include <linux/if_vlan.h> #include <linux/init.h> @@ -45,6 +46,7 @@ #include <linux/workqueue.h> #include <net/dsfield.h> #include <net/rtnetlink.h> +#include <uapi/linux/batadv_packet.h> #include <uapi/linux/batman_adv.h> #include "bat_algo.h" @@ -62,7 +64,6 @@ #include "netlink.h" #include "network-coding.h" #include "originator.h" -#include "packet.h" #include "routing.h" #include "send.h" #include "soft-interface.h" @@ -139,6 +140,12 @@ static void __exit batadv_exit(void) batadv_tt_cache_destroy(); } +/** + * batadv_mesh_init() - Initialize soft interface + * @soft_iface: netdev struct of the soft interface + * + * Return: 0 on success or negative error number in case of failure + */ int batadv_mesh_init(struct net_device *soft_iface) { struct batadv_priv *bat_priv = netdev_priv(soft_iface); @@ -216,6 +223,10 @@ err: return ret; } +/** + * batadv_mesh_free() - Deinitialize soft interface + * @soft_iface: netdev struct of the soft interface + */ void batadv_mesh_free(struct net_device *soft_iface) { struct batadv_priv *bat_priv = netdev_priv(soft_iface); @@ -255,8 +266,8 @@ void batadv_mesh_free(struct net_device *soft_iface) } /** - * batadv_is_my_mac - check if the given mac address belongs to any of the real - * interfaces in the current mesh + * batadv_is_my_mac() - check if the given mac address belongs to any of the + * real interfaces in the current mesh * @bat_priv: the bat priv with all the soft interface information * @addr: the address to check * @@ -286,7 +297,7 @@ bool batadv_is_my_mac(struct batadv_priv *bat_priv, const u8 *addr) #ifdef CONFIG_BATMAN_ADV_DEBUGFS /** - * batadv_seq_print_text_primary_if_get - called from debugfs table printing + * batadv_seq_print_text_primary_if_get() - called from debugfs table printing * function that requires the primary interface * @seq: debugfs table seq_file struct * @@ -323,7 +334,7 @@ out: #endif /** - * batadv_max_header_len - calculate maximum encapsulation overhead for a + * batadv_max_header_len() - calculate maximum encapsulation overhead for a * payload packet * * Return: the maximum encapsulation overhead in bytes. @@ -348,7 +359,7 @@ int batadv_max_header_len(void) } /** - * batadv_skb_set_priority - sets skb priority according to packet content + * batadv_skb_set_priority() - sets skb priority according to packet content * @skb: the packet to be sent * @offset: offset to the packet content * @@ -412,6 +423,16 @@ static int batadv_recv_unhandled_packet(struct sk_buff *skb, /* incoming packets with the batman ethertype received on any active hard * interface */ + +/** + * batadv_batman_skb_recv() - Handle incoming message from an hard interface + * @skb: the received packet + * @dev: the net device that the packet was received on + * @ptype: packet type of incoming packet (ETH_P_BATMAN) + * @orig_dev: the original receive net device (e.g. bonded device) + * + * Return: NET_RX_SUCCESS on success or NET_RX_DROP in case of failure + */ int batadv_batman_skb_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *ptype, struct net_device *orig_dev) @@ -535,6 +556,13 @@ static void batadv_recv_handler_init(void) batadv_rx_handler[BATADV_UNICAST_FRAG] = batadv_recv_frag_packet; } +/** + * batadv_recv_handler_register() - Register handler for batman-adv packet type + * @packet_type: batadv_packettype which should be handled + * @recv_handler: receive handler for the packet type + * + * Return: 0 on success or negative error number in case of failure + */ int batadv_recv_handler_register(u8 packet_type, int (*recv_handler)(struct sk_buff *, @@ -552,13 +580,17 @@ batadv_recv_handler_register(u8 packet_type, return 0; } +/** + * batadv_recv_handler_unregister() - Unregister handler for packet type + * @packet_type: batadv_packettype which should no longer be handled + */ void batadv_recv_handler_unregister(u8 packet_type) { batadv_rx_handler[packet_type] = batadv_recv_unhandled_packet; } /** - * batadv_skb_crc32 - calculate CRC32 of the whole packet and skip bytes in + * batadv_skb_crc32() - calculate CRC32 of the whole packet and skip bytes in * the header * @skb: skb pointing to fragmented socket buffers * @payload_ptr: Pointer to position inside the head buffer of the skb @@ -591,7 +623,7 @@ __be32 batadv_skb_crc32(struct sk_buff *skb, u8 *payload_ptr) } /** - * batadv_get_vid - extract the VLAN identifier from skb if any + * batadv_get_vid() - extract the VLAN identifier from skb if any * @skb: the buffer containing the packet * @header_len: length of the batman header preceding the ethernet header * @@ -618,7 +650,7 @@ unsigned short batadv_get_vid(struct sk_buff *skb, size_t header_len) } /** - * batadv_vlan_ap_isola_get - return the AP isolation status for the given vlan + * batadv_vlan_ap_isola_get() - return AP isolation status for the given vlan * @bat_priv: the bat priv with all the soft interface information * @vid: the VLAN identifier for which the AP isolation attributed as to be * looked up diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h index edb2f239d04d..f7ba3f96d8f3 100644 --- a/net/batman-adv/main.h +++ b/net/batman-adv/main.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich @@ -24,7 +25,7 @@ #define BATADV_DRIVER_DEVICE "batman-adv" #ifndef BATADV_SOURCE_VERSION -#define BATADV_SOURCE_VERSION "2017.4" +#define BATADV_SOURCE_VERSION "2018.0" #endif /* B.A.T.M.A.N. parameters */ @@ -140,24 +141,56 @@ */ #define BATADV_TP_MAX_NUM 5 +/** + * enum batadv_mesh_state - State of a soft interface + */ enum batadv_mesh_state { + /** @BATADV_MESH_INACTIVE: soft interface is not yet running */ BATADV_MESH_INACTIVE, + + /** @BATADV_MESH_ACTIVE: interface is up and running */ BATADV_MESH_ACTIVE, + + /** @BATADV_MESH_DEACTIVATING: interface is getting shut down */ BATADV_MESH_DEACTIVATING, }; #define BATADV_BCAST_QUEUE_LEN 256 #define BATADV_BATMAN_QUEUE_LEN 256 +/** + * enum batadv_uev_action - action type of uevent + */ enum batadv_uev_action { + /** @BATADV_UEV_ADD: gateway was selected (after none was selected) */ BATADV_UEV_ADD = 0, + + /** + * @BATADV_UEV_DEL: selected gateway was removed and none is selected + * anymore + */ BATADV_UEV_DEL, + + /** + * @BATADV_UEV_CHANGE: a different gateway was selected as based gateway + */ BATADV_UEV_CHANGE, + + /** + * @BATADV_UEV_LOOPDETECT: loop was detected which cannot be handled by + * bridge loop avoidance + */ BATADV_UEV_LOOPDETECT, }; +/** + * enum batadv_uev_type - Type of uevent + */ enum batadv_uev_type { + /** @BATADV_UEV_GW: selected gateway was modified */ BATADV_UEV_GW = 0, + + /** @BATADV_UEV_BLA: bridge loop avoidance event */ BATADV_UEV_BLA, }; @@ -184,16 +217,14 @@ enum batadv_uev_type { /* Kernel headers */ -#include <linux/bitops.h> /* for packet.h */ #include <linux/compiler.h> #include <linux/etherdevice.h> -#include <linux/if_ether.h> /* for packet.h */ #include <linux/if_vlan.h> #include <linux/jiffies.h> #include <linux/percpu.h> #include <linux/types.h> +#include <uapi/linux/batadv_packet.h> -#include "packet.h" #include "types.h" struct net_device; @@ -202,7 +233,7 @@ struct seq_file; struct sk_buff; /** - * batadv_print_vid - return printable version of vid information + * batadv_print_vid() - return printable version of vid information * @vid: the VLAN identifier * * Return: -1 when no VLAN is used, VLAN id otherwise @@ -238,7 +269,7 @@ void batadv_recv_handler_unregister(u8 packet_type); __be32 batadv_skb_crc32(struct sk_buff *skb, u8 *payload_ptr); /** - * batadv_compare_eth - Compare two not u16 aligned Ethernet addresses + * batadv_compare_eth() - Compare two not u16 aligned Ethernet addresses * @data1: Pointer to a six-byte array containing the Ethernet address * @data2: Pointer other six-byte array containing the Ethernet address * @@ -252,7 +283,7 @@ static inline bool batadv_compare_eth(const void *data1, const void *data2) } /** - * batadv_has_timed_out - compares current time (jiffies) and timestamp + + * batadv_has_timed_out() - compares current time (jiffies) and timestamp + * timeout * @timestamp: base value to compare with (in jiffies) * @timeout: added to base value before comparing (in milliseconds) @@ -265,40 +296,96 @@ static inline bool batadv_has_timed_out(unsigned long timestamp, return time_is_before_jiffies(timestamp + msecs_to_jiffies(timeout)); } +/** + * batadv_atomic_dec_not_zero() - Decrease unless the number is 0 + * @v: pointer of type atomic_t + * + * Return: non-zero if v was not 0, and zero otherwise. + */ #define batadv_atomic_dec_not_zero(v) atomic_add_unless((v), -1, 0) -/* Returns the smallest signed integer in two's complement with the sizeof x */ +/** + * batadv_smallest_signed_int() - Returns the smallest signed integer in two's + * complement with the sizeof x + * @x: type of integer + * + * Return: smallest signed integer of type + */ #define batadv_smallest_signed_int(x) (1u << (7u + 8u * (sizeof(x) - 1u))) -/* Checks if a sequence number x is a predecessor/successor of y. - * they handle overflows/underflows and can correctly check for a - * predecessor/successor unless the variable sequence number has grown by - * more then 2**(bitwidth(x)-1)-1. +/** + * batadv_seq_before() - Checks if a sequence number x is a predecessor of y + * @x: potential predecessor of @y + * @y: value to compare @x against + * + * It handles overflows/underflows and can correctly check for a predecessor + * unless the variable sequence number has grown by more then + * 2**(bitwidth(x)-1)-1. + * * This means that for a u8 with the maximum value 255, it would think: - * - when adding nothing - it is neither a predecessor nor a successor - * - before adding more than 127 to the starting value - it is a predecessor, - * - when adding 128 - it is neither a predecessor nor a successor, - * - after adding more than 127 to the starting value - it is a successor + * + * * when adding nothing - it is neither a predecessor nor a successor + * * before adding more than 127 to the starting value - it is a predecessor, + * * when adding 128 - it is neither a predecessor nor a successor, + * * after adding more than 127 to the starting value - it is a successor + * + * Return: true when x is a predecessor of y, false otherwise */ #define batadv_seq_before(x, y) ({typeof(x)_d1 = (x); \ typeof(y)_d2 = (y); \ typeof(x)_dummy = (_d1 - _d2); \ (void)(&_d1 == &_d2); \ _dummy > batadv_smallest_signed_int(_dummy); }) + +/** + * batadv_seq_after() - Checks if a sequence number x is a successor of y + * @x: potential sucessor of @y + * @y: value to compare @x against + * + * It handles overflows/underflows and can correctly check for a successor + * unless the variable sequence number has grown by more then + * 2**(bitwidth(x)-1)-1. + * + * This means that for a u8 with the maximum value 255, it would think: + * + * * when adding nothing - it is neither a predecessor nor a successor + * * before adding more than 127 to the starting value - it is a predecessor, + * * when adding 128 - it is neither a predecessor nor a successor, + * * after adding more than 127 to the starting value - it is a successor + * + * Return: true when x is a successor of y, false otherwise + */ #define batadv_seq_after(x, y) batadv_seq_before(y, x) -/* Stop preemption on local cpu while incrementing the counter */ +/** + * batadv_add_counter() - Add to per cpu statistics counter of soft interface + * @bat_priv: the bat priv with all the soft interface information + * @idx: counter index which should be modified + * @count: value to increase counter by + * + * Stop preemption on local cpu while incrementing the counter + */ static inline void batadv_add_counter(struct batadv_priv *bat_priv, size_t idx, size_t count) { this_cpu_add(bat_priv->bat_counters[idx], count); } +/** + * batadv_inc_counter() - Increase per cpu statistics counter of soft interface + * @b: the bat priv with all the soft interface information + * @i: counter index which should be modified + */ #define batadv_inc_counter(b, i) batadv_add_counter(b, i, 1) -/* Define a macro to reach the control buffer of the skb. The members of the - * control buffer are defined in struct batadv_skb_cb in types.h. - * The macro is inspired by the similar macro TCP_SKB_CB() in tcp.h. +/** + * BATADV_SKB_CB() - Get batadv_skb_cb from skb control buffer + * @__skb: skb holding the control buffer + * + * The members of the control buffer are defined in struct batadv_skb_cb in + * types.h. The macro is inspired by the similar macro TCP_SKB_CB() in tcp.h. + * + * Return: pointer to the batadv_skb_cb of the skb */ #define BATADV_SKB_CB(__skb) ((struct batadv_skb_cb *)&((__skb)->cb[0])) diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c index e553a8770a89..cbdeb47ec3f6 100644 --- a/net/batman-adv/multicast.c +++ b/net/batman-adv/multicast.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* Copyright (C) 2014-2017 B.A.T.M.A.N. contributors: * * Linus Lüssing @@ -24,7 +25,7 @@ #include <linux/byteorder/generic.h> #include <linux/errno.h> #include <linux/etherdevice.h> -#include <linux/fs.h> +#include <linux/gfp.h> #include <linux/icmpv6.h> #include <linux/if_bridge.h> #include <linux/if_ether.h> @@ -54,18 +55,18 @@ #include <net/if_inet6.h> #include <net/ip.h> #include <net/ipv6.h> +#include <uapi/linux/batadv_packet.h> #include "hard-interface.h" #include "hash.h" #include "log.h" -#include "packet.h" #include "translation-table.h" #include "tvlv.h" static void batadv_mcast_mla_update(struct work_struct *work); /** - * batadv_mcast_start_timer - schedule the multicast periodic worker + * batadv_mcast_start_timer() - schedule the multicast periodic worker * @bat_priv: the bat priv with all the soft interface information */ static void batadv_mcast_start_timer(struct batadv_priv *bat_priv) @@ -75,7 +76,7 @@ static void batadv_mcast_start_timer(struct batadv_priv *bat_priv) } /** - * batadv_mcast_get_bridge - get the bridge on top of the softif if it exists + * batadv_mcast_get_bridge() - get the bridge on top of the softif if it exists * @soft_iface: netdev struct of the mesh interface * * If the given soft interface has a bridge on top then the refcount @@ -101,7 +102,7 @@ static struct net_device *batadv_mcast_get_bridge(struct net_device *soft_iface) } /** - * batadv_mcast_mla_softif_get - get softif multicast listeners + * batadv_mcast_mla_softif_get() - get softif multicast listeners * @dev: the device to collect multicast addresses from * @mcast_list: a list to put found addresses into * @@ -147,7 +148,7 @@ static int batadv_mcast_mla_softif_get(struct net_device *dev, } /** - * batadv_mcast_mla_is_duplicate - check whether an address is in a list + * batadv_mcast_mla_is_duplicate() - check whether an address is in a list * @mcast_addr: the multicast address to check * @mcast_list: the list with multicast addresses to search in * @@ -167,7 +168,7 @@ static bool batadv_mcast_mla_is_duplicate(u8 *mcast_addr, } /** - * batadv_mcast_mla_br_addr_cpy - copy a bridge multicast address + * batadv_mcast_mla_br_addr_cpy() - copy a bridge multicast address * @dst: destination to write to - a multicast MAC address * @src: source to read from - a multicast IP address * @@ -191,7 +192,7 @@ static void batadv_mcast_mla_br_addr_cpy(char *dst, const struct br_ip *src) } /** - * batadv_mcast_mla_bridge_get - get bridged-in multicast listeners + * batadv_mcast_mla_bridge_get() - get bridged-in multicast listeners * @dev: a bridge slave whose bridge to collect multicast addresses from * @mcast_list: a list to put found addresses into * @@ -244,7 +245,7 @@ out: } /** - * batadv_mcast_mla_list_free - free a list of multicast addresses + * batadv_mcast_mla_list_free() - free a list of multicast addresses * @mcast_list: the list to free * * Removes and frees all items in the given mcast_list. @@ -261,7 +262,7 @@ static void batadv_mcast_mla_list_free(struct hlist_head *mcast_list) } /** - * batadv_mcast_mla_tt_retract - clean up multicast listener announcements + * batadv_mcast_mla_tt_retract() - clean up multicast listener announcements * @bat_priv: the bat priv with all the soft interface information * @mcast_list: a list of addresses which should _not_ be removed * @@ -297,7 +298,7 @@ static void batadv_mcast_mla_tt_retract(struct batadv_priv *bat_priv, } /** - * batadv_mcast_mla_tt_add - add multicast listener announcements + * batadv_mcast_mla_tt_add() - add multicast listener announcements * @bat_priv: the bat priv with all the soft interface information * @mcast_list: a list of addresses which are going to get added * @@ -333,7 +334,7 @@ static void batadv_mcast_mla_tt_add(struct batadv_priv *bat_priv, } /** - * batadv_mcast_has_bridge - check whether the soft-iface is bridged + * batadv_mcast_has_bridge() - check whether the soft-iface is bridged * @bat_priv: the bat priv with all the soft interface information * * Checks whether there is a bridge on top of our soft interface. @@ -354,7 +355,8 @@ static bool batadv_mcast_has_bridge(struct batadv_priv *bat_priv) } /** - * batadv_mcast_querier_log - debug output regarding the querier status on link + * batadv_mcast_querier_log() - debug output regarding the querier status on + * link * @bat_priv: the bat priv with all the soft interface information * @str_proto: a string for the querier protocol (e.g. "IGMP" or "MLD") * @old_state: the previous querier state on our link @@ -405,7 +407,8 @@ batadv_mcast_querier_log(struct batadv_priv *bat_priv, char *str_proto, } /** - * batadv_mcast_bridge_log - debug output for topology changes in bridged setups + * batadv_mcast_bridge_log() - debug output for topology changes in bridged + * setups * @bat_priv: the bat priv with all the soft interface information * @bridged: a flag about whether the soft interface is currently bridged or not * @querier_ipv4: (maybe) new status of a potential, selected IGMP querier @@ -444,7 +447,7 @@ batadv_mcast_bridge_log(struct batadv_priv *bat_priv, bool bridged, } /** - * batadv_mcast_flags_logs - output debug information about mcast flag changes + * batadv_mcast_flags_logs() - output debug information about mcast flag changes * @bat_priv: the bat priv with all the soft interface information * @flags: flags indicating the new multicast state * @@ -470,7 +473,7 @@ static void batadv_mcast_flags_log(struct batadv_priv *bat_priv, u8 flags) } /** - * batadv_mcast_mla_tvlv_update - update multicast tvlv + * batadv_mcast_mla_tvlv_update() - update multicast tvlv * @bat_priv: the bat priv with all the soft interface information * * Updates the own multicast tvlv with our current multicast related settings, @@ -545,7 +548,7 @@ update: } /** - * __batadv_mcast_mla_update - update the own MLAs + * __batadv_mcast_mla_update() - update the own MLAs * @bat_priv: the bat priv with all the soft interface information * * Updates the own multicast listener announcements in the translation @@ -582,7 +585,7 @@ out: } /** - * batadv_mcast_mla_update - update the own MLAs + * batadv_mcast_mla_update() - update the own MLAs * @work: kernel work struct * * Updates the own multicast listener announcements in the translation @@ -605,7 +608,7 @@ static void batadv_mcast_mla_update(struct work_struct *work) } /** - * batadv_mcast_is_report_ipv4 - check for IGMP reports + * batadv_mcast_is_report_ipv4() - check for IGMP reports * @skb: the ethernet frame destined for the mesh * * This call might reallocate skb data. @@ -630,7 +633,8 @@ static bool batadv_mcast_is_report_ipv4(struct sk_buff *skb) } /** - * batadv_mcast_forw_mode_check_ipv4 - check for optimized forwarding potential + * batadv_mcast_forw_mode_check_ipv4() - check for optimized forwarding + * potential * @bat_priv: the bat priv with all the soft interface information * @skb: the IPv4 packet to check * @is_unsnoopable: stores whether the destination is snoopable @@ -671,7 +675,7 @@ static int batadv_mcast_forw_mode_check_ipv4(struct batadv_priv *bat_priv, } /** - * batadv_mcast_is_report_ipv6 - check for MLD reports + * batadv_mcast_is_report_ipv6() - check for MLD reports * @skb: the ethernet frame destined for the mesh * * This call might reallocate skb data. @@ -695,7 +699,8 @@ static bool batadv_mcast_is_report_ipv6(struct sk_buff *skb) } /** - * batadv_mcast_forw_mode_check_ipv6 - check for optimized forwarding potential + * batadv_mcast_forw_mode_check_ipv6() - check for optimized forwarding + * potential * @bat_priv: the bat priv with all the soft interface information * @skb: the IPv6 packet to check * @is_unsnoopable: stores whether the destination is snoopable @@ -736,7 +741,7 @@ static int batadv_mcast_forw_mode_check_ipv6(struct batadv_priv *bat_priv, } /** - * batadv_mcast_forw_mode_check - check for optimized forwarding potential + * batadv_mcast_forw_mode_check() - check for optimized forwarding potential * @bat_priv: the bat priv with all the soft interface information * @skb: the multicast frame to check * @is_unsnoopable: stores whether the destination is snoopable @@ -774,7 +779,7 @@ static int batadv_mcast_forw_mode_check(struct batadv_priv *bat_priv, } /** - * batadv_mcast_forw_want_all_ip_count - count nodes with unspecific mcast + * batadv_mcast_forw_want_all_ip_count() - count nodes with unspecific mcast * interest * @bat_priv: the bat priv with all the soft interface information * @ethhdr: ethernet header of a packet @@ -798,7 +803,7 @@ static int batadv_mcast_forw_want_all_ip_count(struct batadv_priv *bat_priv, } /** - * batadv_mcast_forw_tt_node_get - get a multicast tt node + * batadv_mcast_forw_tt_node_get() - get a multicast tt node * @bat_priv: the bat priv with all the soft interface information * @ethhdr: the ether header containing the multicast destination * @@ -814,7 +819,7 @@ batadv_mcast_forw_tt_node_get(struct batadv_priv *bat_priv, } /** - * batadv_mcast_forw_ipv4_node_get - get a node with an ipv4 flag + * batadv_mcast_forw_ipv4_node_get() - get a node with an ipv4 flag * @bat_priv: the bat priv with all the soft interface information * * Return: an orig_node which has the BATADV_MCAST_WANT_ALL_IPV4 flag set and @@ -841,7 +846,7 @@ batadv_mcast_forw_ipv4_node_get(struct batadv_priv *bat_priv) } /** - * batadv_mcast_forw_ipv6_node_get - get a node with an ipv6 flag + * batadv_mcast_forw_ipv6_node_get() - get a node with an ipv6 flag * @bat_priv: the bat priv with all the soft interface information * * Return: an orig_node which has the BATADV_MCAST_WANT_ALL_IPV6 flag set @@ -868,7 +873,7 @@ batadv_mcast_forw_ipv6_node_get(struct batadv_priv *bat_priv) } /** - * batadv_mcast_forw_ip_node_get - get a node with an ipv4/ipv6 flag + * batadv_mcast_forw_ip_node_get() - get a node with an ipv4/ipv6 flag * @bat_priv: the bat priv with all the soft interface information * @ethhdr: an ethernet header to determine the protocol family from * @@ -892,7 +897,7 @@ batadv_mcast_forw_ip_node_get(struct batadv_priv *bat_priv, } /** - * batadv_mcast_forw_unsnoop_node_get - get a node with an unsnoopable flag + * batadv_mcast_forw_unsnoop_node_get() - get a node with an unsnoopable flag * @bat_priv: the bat priv with all the soft interface information * * Return: an orig_node which has the BATADV_MCAST_WANT_ALL_UNSNOOPABLES flag @@ -919,7 +924,7 @@ batadv_mcast_forw_unsnoop_node_get(struct batadv_priv *bat_priv) } /** - * batadv_mcast_forw_mode - check on how to forward a multicast packet + * batadv_mcast_forw_mode() - check on how to forward a multicast packet * @bat_priv: the bat priv with all the soft interface information * @skb: The multicast packet to check * @orig: an originator to be set to forward the skb to @@ -973,7 +978,7 @@ batadv_mcast_forw_mode(struct batadv_priv *bat_priv, struct sk_buff *skb, } /** - * batadv_mcast_want_unsnoop_update - update unsnoop counter and list + * batadv_mcast_want_unsnoop_update() - update unsnoop counter and list * @bat_priv: the bat priv with all the soft interface information * @orig: the orig_node which multicast state might have changed of * @mcast_flags: flags indicating the new multicast state @@ -1018,7 +1023,7 @@ static void batadv_mcast_want_unsnoop_update(struct batadv_priv *bat_priv, } /** - * batadv_mcast_want_ipv4_update - update want-all-ipv4 counter and list + * batadv_mcast_want_ipv4_update() - update want-all-ipv4 counter and list * @bat_priv: the bat priv with all the soft interface information * @orig: the orig_node which multicast state might have changed of * @mcast_flags: flags indicating the new multicast state @@ -1063,7 +1068,7 @@ static void batadv_mcast_want_ipv4_update(struct batadv_priv *bat_priv, } /** - * batadv_mcast_want_ipv6_update - update want-all-ipv6 counter and list + * batadv_mcast_want_ipv6_update() - update want-all-ipv6 counter and list * @bat_priv: the bat priv with all the soft interface information * @orig: the orig_node which multicast state might have changed of * @mcast_flags: flags indicating the new multicast state @@ -1108,7 +1113,7 @@ static void batadv_mcast_want_ipv6_update(struct batadv_priv *bat_priv, } /** - * batadv_mcast_tvlv_ogm_handler - process incoming multicast tvlv container + * batadv_mcast_tvlv_ogm_handler() - process incoming multicast tvlv container * @bat_priv: the bat priv with all the soft interface information * @orig: the orig_node of the ogm * @flags: flags indicating the tvlv state (see batadv_tvlv_handler_flags) @@ -1164,7 +1169,7 @@ static void batadv_mcast_tvlv_ogm_handler(struct batadv_priv *bat_priv, } /** - * batadv_mcast_init - initialize the multicast optimizations structures + * batadv_mcast_init() - initialize the multicast optimizations structures * @bat_priv: the bat priv with all the soft interface information */ void batadv_mcast_init(struct batadv_priv *bat_priv) @@ -1179,7 +1184,7 @@ void batadv_mcast_init(struct batadv_priv *bat_priv) #ifdef CONFIG_BATMAN_ADV_DEBUGFS /** - * batadv_mcast_flags_print_header - print own mcast flags to debugfs table + * batadv_mcast_flags_print_header() - print own mcast flags to debugfs table * @bat_priv: the bat priv with all the soft interface information * @seq: debugfs table seq_file struct * @@ -1220,7 +1225,7 @@ static void batadv_mcast_flags_print_header(struct batadv_priv *bat_priv, } /** - * batadv_mcast_flags_seq_print_text - print the mcast flags of other nodes + * batadv_mcast_flags_seq_print_text() - print the mcast flags of other nodes * @seq: seq file to print on * @offset: not used * @@ -1281,7 +1286,7 @@ int batadv_mcast_flags_seq_print_text(struct seq_file *seq, void *offset) #endif /** - * batadv_mcast_free - free the multicast optimizations structures + * batadv_mcast_free() - free the multicast optimizations structures * @bat_priv: the bat priv with all the soft interface information */ void batadv_mcast_free(struct batadv_priv *bat_priv) @@ -1296,7 +1301,7 @@ void batadv_mcast_free(struct batadv_priv *bat_priv) } /** - * batadv_mcast_purge_orig - reset originator global mcast state modifications + * batadv_mcast_purge_orig() - reset originator global mcast state modifications * @orig: the originator which is going to get purged */ void batadv_mcast_purge_orig(struct batadv_orig_node *orig) diff --git a/net/batman-adv/multicast.h b/net/batman-adv/multicast.h index 2a78cddab0e9..3ac06337ab71 100644 --- a/net/batman-adv/multicast.h +++ b/net/batman-adv/multicast.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* Copyright (C) 2014-2017 B.A.T.M.A.N. contributors: * * Linus Lüssing @@ -25,15 +26,21 @@ struct sk_buff; /** * enum batadv_forw_mode - the way a packet should be forwarded as - * @BATADV_FORW_ALL: forward the packet to all nodes (currently via classic - * flooding) - * @BATADV_FORW_SINGLE: forward the packet to a single node (currently via the - * BATMAN unicast routing protocol) - * @BATADV_FORW_NONE: don't forward, drop it */ enum batadv_forw_mode { + /** + * @BATADV_FORW_ALL: forward the packet to all nodes (currently via + * classic flooding) + */ BATADV_FORW_ALL, + + /** + * @BATADV_FORW_SINGLE: forward the packet to a single node (currently + * via the BATMAN unicast routing protocol) + */ BATADV_FORW_SINGLE, + + /** @BATADV_FORW_NONE: don't forward, drop it */ BATADV_FORW_NONE, }; diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c index ab13b4d58733..a823d3899bad 100644 --- a/net/batman-adv/netlink.c +++ b/net/batman-adv/netlink.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* Copyright (C) 2016-2017 B.A.T.M.A.N. contributors: * * Matthias Schiffer @@ -23,8 +24,8 @@ #include <linux/cache.h> #include <linux/errno.h> #include <linux/export.h> -#include <linux/fs.h> #include <linux/genetlink.h> +#include <linux/gfp.h> #include <linux/if_ether.h> #include <linux/init.h> #include <linux/kernel.h> @@ -39,6 +40,7 @@ #include <net/genetlink.h> #include <net/netlink.h> #include <net/sock.h> +#include <uapi/linux/batadv_packet.h> #include <uapi/linux/batman_adv.h> #include "bat_algo.h" @@ -46,7 +48,6 @@ #include "gateway_client.h" #include "hard-interface.h" #include "originator.h" -#include "packet.h" #include "soft-interface.h" #include "tp_meter.h" #include "translation-table.h" @@ -99,7 +100,7 @@ static const struct nla_policy batadv_netlink_policy[NUM_BATADV_ATTR] = { }; /** - * batadv_netlink_get_ifindex - Extract an interface index from a message + * batadv_netlink_get_ifindex() - Extract an interface index from a message * @nlh: Message header * @attrtype: Attribute which holds an interface index * @@ -114,7 +115,7 @@ batadv_netlink_get_ifindex(const struct nlmsghdr *nlh, int attrtype) } /** - * batadv_netlink_mesh_info_put - fill in generic information about mesh + * batadv_netlink_mesh_info_put() - fill in generic information about mesh * interface * @msg: netlink message to be sent back * @soft_iface: interface for which the data should be taken @@ -169,7 +170,7 @@ batadv_netlink_mesh_info_put(struct sk_buff *msg, struct net_device *soft_iface) } /** - * batadv_netlink_get_mesh_info - handle incoming BATADV_CMD_GET_MESH_INFO + * batadv_netlink_get_mesh_info() - handle incoming BATADV_CMD_GET_MESH_INFO * netlink request * @skb: received netlink message * @info: receiver information @@ -230,7 +231,7 @@ batadv_netlink_get_mesh_info(struct sk_buff *skb, struct genl_info *info) } /** - * batadv_netlink_tp_meter_put - Fill information of started tp_meter session + * batadv_netlink_tp_meter_put() - Fill information of started tp_meter session * @msg: netlink message to be sent back * @cookie: tp meter session cookie * @@ -246,7 +247,7 @@ batadv_netlink_tp_meter_put(struct sk_buff *msg, u32 cookie) } /** - * batadv_netlink_tpmeter_notify - send tp_meter result via netlink to client + * batadv_netlink_tpmeter_notify() - send tp_meter result via netlink to client * @bat_priv: the bat priv with all the soft interface information * @dst: destination of tp_meter session * @result: reason for tp meter session stop @@ -309,7 +310,7 @@ err_genlmsg: } /** - * batadv_netlink_tp_meter_start - Start a new tp_meter session + * batadv_netlink_tp_meter_start() - Start a new tp_meter session * @skb: received netlink message * @info: receiver information * @@ -386,7 +387,7 @@ batadv_netlink_tp_meter_start(struct sk_buff *skb, struct genl_info *info) } /** - * batadv_netlink_tp_meter_start - Cancel a running tp_meter session + * batadv_netlink_tp_meter_start() - Cancel a running tp_meter session * @skb: received netlink message * @info: receiver information * @@ -431,7 +432,7 @@ out: } /** - * batadv_netlink_dump_hardif_entry - Dump one hard interface into a message + * batadv_netlink_dump_hardif_entry() - Dump one hard interface into a message * @msg: Netlink message to dump into * @portid: Port making netlink request * @seq: Sequence number of netlink message @@ -473,7 +474,7 @@ batadv_netlink_dump_hardif_entry(struct sk_buff *msg, u32 portid, u32 seq, } /** - * batadv_netlink_dump_hardifs - Dump all hard interface into a messages + * batadv_netlink_dump_hardifs() - Dump all hard interface into a messages * @msg: Netlink message to dump into * @cb: Parameters from query * @@ -620,7 +621,7 @@ struct genl_family batadv_netlink_family __ro_after_init = { }; /** - * batadv_netlink_register - register batadv genl netlink family + * batadv_netlink_register() - register batadv genl netlink family */ void __init batadv_netlink_register(void) { @@ -632,7 +633,7 @@ void __init batadv_netlink_register(void) } /** - * batadv_netlink_unregister - unregister batadv genl netlink family + * batadv_netlink_unregister() - unregister batadv genl netlink family */ void batadv_netlink_unregister(void) { diff --git a/net/batman-adv/netlink.h b/net/batman-adv/netlink.h index f1cd8c5da966..0e7e57b69b54 100644 --- a/net/batman-adv/netlink.h +++ b/net/batman-adv/netlink.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* Copyright (C) 2016-2017 B.A.T.M.A.N. contributors: * * Matthias Schiffer diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c index 3604d7899e2c..b48116bb24ef 100644 --- a/net/batman-adv/network-coding.c +++ b/net/batman-adv/network-coding.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* Copyright (C) 2012-2017 B.A.T.M.A.N. contributors: * * Martin Hundebøll, Jeppe Ledet-Pedersen @@ -25,7 +26,7 @@ #include <linux/debugfs.h> #include <linux/errno.h> #include <linux/etherdevice.h> -#include <linux/fs.h> +#include <linux/gfp.h> #include <linux/if_ether.h> #include <linux/if_packet.h> #include <linux/init.h> @@ -35,6 +36,7 @@ #include <linux/kref.h> #include <linux/list.h> #include <linux/lockdep.h> +#include <linux/net.h> #include <linux/netdevice.h> #include <linux/printk.h> #include <linux/random.h> @@ -47,12 +49,12 @@ #include <linux/stddef.h> #include <linux/string.h> #include <linux/workqueue.h> +#include <uapi/linux/batadv_packet.h> #include "hard-interface.h" #include "hash.h" #include "log.h" #include "originator.h" -#include "packet.h" #include "routing.h" #include "send.h" #include "tvlv.h" @@ -65,7 +67,7 @@ static int batadv_nc_recv_coded_packet(struct sk_buff *skb, struct batadv_hard_iface *recv_if); /** - * batadv_nc_init - one-time initialization for network coding + * batadv_nc_init() - one-time initialization for network coding * * Return: 0 on success or negative error number in case of failure */ @@ -81,7 +83,7 @@ int __init batadv_nc_init(void) } /** - * batadv_nc_start_timer - initialise the nc periodic worker + * batadv_nc_start_timer() - initialise the nc periodic worker * @bat_priv: the bat priv with all the soft interface information */ static void batadv_nc_start_timer(struct batadv_priv *bat_priv) @@ -91,7 +93,7 @@ static void batadv_nc_start_timer(struct batadv_priv *bat_priv) } /** - * batadv_nc_tvlv_container_update - update the network coding tvlv container + * batadv_nc_tvlv_container_update() - update the network coding tvlv container * after network coding setting change * @bat_priv: the bat priv with all the soft interface information */ @@ -113,7 +115,7 @@ static void batadv_nc_tvlv_container_update(struct batadv_priv *bat_priv) } /** - * batadv_nc_status_update - update the network coding tvlv container after + * batadv_nc_status_update() - update the network coding tvlv container after * network coding setting change * @net_dev: the soft interface net device */ @@ -125,7 +127,7 @@ void batadv_nc_status_update(struct net_device *net_dev) } /** - * batadv_nc_tvlv_ogm_handler_v1 - process incoming nc tvlv container + * batadv_nc_tvlv_ogm_handler_v1() - process incoming nc tvlv container * @bat_priv: the bat priv with all the soft interface information * @orig: the orig_node of the ogm * @flags: flags indicating the tvlv state (see batadv_tvlv_handler_flags) @@ -144,7 +146,7 @@ static void batadv_nc_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv, } /** - * batadv_nc_mesh_init - initialise coding hash table and start house keeping + * batadv_nc_mesh_init() - initialise coding hash table and start house keeping * @bat_priv: the bat priv with all the soft interface information * * Return: 0 on success or negative error number in case of failure @@ -185,7 +187,7 @@ err: } /** - * batadv_nc_init_bat_priv - initialise the nc specific bat_priv variables + * batadv_nc_init_bat_priv() - initialise the nc specific bat_priv variables * @bat_priv: the bat priv with all the soft interface information */ void batadv_nc_init_bat_priv(struct batadv_priv *bat_priv) @@ -197,7 +199,7 @@ void batadv_nc_init_bat_priv(struct batadv_priv *bat_priv) } /** - * batadv_nc_init_orig - initialise the nc fields of an orig_node + * batadv_nc_init_orig() - initialise the nc fields of an orig_node * @orig_node: the orig_node which is going to be initialised */ void batadv_nc_init_orig(struct batadv_orig_node *orig_node) @@ -209,8 +211,8 @@ void batadv_nc_init_orig(struct batadv_orig_node *orig_node) } /** - * batadv_nc_node_release - release nc_node from lists and queue for free after - * rcu grace period + * batadv_nc_node_release() - release nc_node from lists and queue for free + * after rcu grace period * @ref: kref pointer of the nc_node */ static void batadv_nc_node_release(struct kref *ref) @@ -224,7 +226,7 @@ static void batadv_nc_node_release(struct kref *ref) } /** - * batadv_nc_node_put - decrement the nc_node refcounter and possibly + * batadv_nc_node_put() - decrement the nc_node refcounter and possibly * release it * @nc_node: nc_node to be free'd */ @@ -234,8 +236,8 @@ static void batadv_nc_node_put(struct batadv_nc_node *nc_node) } /** - * batadv_nc_path_release - release nc_path from lists and queue for free after - * rcu grace period + * batadv_nc_path_release() - release nc_path from lists and queue for free + * after rcu grace period * @ref: kref pointer of the nc_path */ static void batadv_nc_path_release(struct kref *ref) @@ -248,7 +250,7 @@ static void batadv_nc_path_release(struct kref *ref) } /** - * batadv_nc_path_put - decrement the nc_path refcounter and possibly + * batadv_nc_path_put() - decrement the nc_path refcounter and possibly * release it * @nc_path: nc_path to be free'd */ @@ -258,7 +260,7 @@ static void batadv_nc_path_put(struct batadv_nc_path *nc_path) } /** - * batadv_nc_packet_free - frees nc packet + * batadv_nc_packet_free() - frees nc packet * @nc_packet: the nc packet to free * @dropped: whether the packet is freed because is is dropped */ @@ -275,7 +277,7 @@ static void batadv_nc_packet_free(struct batadv_nc_packet *nc_packet, } /** - * batadv_nc_to_purge_nc_node - checks whether an nc node has to be purged + * batadv_nc_to_purge_nc_node() - checks whether an nc node has to be purged * @bat_priv: the bat priv with all the soft interface information * @nc_node: the nc node to check * @@ -291,7 +293,7 @@ static bool batadv_nc_to_purge_nc_node(struct batadv_priv *bat_priv, } /** - * batadv_nc_to_purge_nc_path_coding - checks whether an nc path has timed out + * batadv_nc_to_purge_nc_path_coding() - checks whether an nc path has timed out * @bat_priv: the bat priv with all the soft interface information * @nc_path: the nc path to check * @@ -311,7 +313,8 @@ static bool batadv_nc_to_purge_nc_path_coding(struct batadv_priv *bat_priv, } /** - * batadv_nc_to_purge_nc_path_decoding - checks whether an nc path has timed out + * batadv_nc_to_purge_nc_path_decoding() - checks whether an nc path has timed + * out * @bat_priv: the bat priv with all the soft interface information * @nc_path: the nc path to check * @@ -331,7 +334,7 @@ static bool batadv_nc_to_purge_nc_path_decoding(struct batadv_priv *bat_priv, } /** - * batadv_nc_purge_orig_nc_nodes - go through list of nc nodes and purge stale + * batadv_nc_purge_orig_nc_nodes() - go through list of nc nodes and purge stale * entries * @bat_priv: the bat priv with all the soft interface information * @list: list of nc nodes @@ -369,7 +372,7 @@ batadv_nc_purge_orig_nc_nodes(struct batadv_priv *bat_priv, } /** - * batadv_nc_purge_orig - purges all nc node data attached of the given + * batadv_nc_purge_orig() - purges all nc node data attached of the given * originator * @bat_priv: the bat priv with all the soft interface information * @orig_node: orig_node with the nc node entries to be purged @@ -395,8 +398,8 @@ void batadv_nc_purge_orig(struct batadv_priv *bat_priv, } /** - * batadv_nc_purge_orig_hash - traverse entire originator hash to check if they - * have timed out nc nodes + * batadv_nc_purge_orig_hash() - traverse entire originator hash to check if + * they have timed out nc nodes * @bat_priv: the bat priv with all the soft interface information */ static void batadv_nc_purge_orig_hash(struct batadv_priv *bat_priv) @@ -422,7 +425,7 @@ static void batadv_nc_purge_orig_hash(struct batadv_priv *bat_priv) } /** - * batadv_nc_purge_paths - traverse all nc paths part of the hash and remove + * batadv_nc_purge_paths() - traverse all nc paths part of the hash and remove * unused ones * @bat_priv: the bat priv with all the soft interface information * @hash: hash table containing the nc paths to check @@ -481,7 +484,7 @@ static void batadv_nc_purge_paths(struct batadv_priv *bat_priv, } /** - * batadv_nc_hash_key_gen - computes the nc_path hash key + * batadv_nc_hash_key_gen() - computes the nc_path hash key * @key: buffer to hold the final hash key * @src: source ethernet mac address going into the hash key * @dst: destination ethernet mac address going into the hash key @@ -494,7 +497,7 @@ static void batadv_nc_hash_key_gen(struct batadv_nc_path *key, const char *src, } /** - * batadv_nc_hash_choose - compute the hash value for an nc path + * batadv_nc_hash_choose() - compute the hash value for an nc path * @data: data to hash * @size: size of the hash table * @@ -512,7 +515,7 @@ static u32 batadv_nc_hash_choose(const void *data, u32 size) } /** - * batadv_nc_hash_compare - comparing function used in the network coding hash + * batadv_nc_hash_compare() - comparing function used in the network coding hash * tables * @node: node in the local table * @data2: second object to compare the node to @@ -538,7 +541,7 @@ static bool batadv_nc_hash_compare(const struct hlist_node *node, } /** - * batadv_nc_hash_find - search for an existing nc path and return it + * batadv_nc_hash_find() - search for an existing nc path and return it * @hash: hash table containing the nc path * @data: search key * @@ -575,7 +578,7 @@ batadv_nc_hash_find(struct batadv_hashtable *hash, } /** - * batadv_nc_send_packet - send non-coded packet and free nc_packet struct + * batadv_nc_send_packet() - send non-coded packet and free nc_packet struct * @nc_packet: the nc packet to send */ static void batadv_nc_send_packet(struct batadv_nc_packet *nc_packet) @@ -586,7 +589,7 @@ static void batadv_nc_send_packet(struct batadv_nc_packet *nc_packet) } /** - * batadv_nc_sniffed_purge - Checks timestamp of given sniffed nc_packet. + * batadv_nc_sniffed_purge() - Checks timestamp of given sniffed nc_packet. * @bat_priv: the bat priv with all the soft interface information * @nc_path: the nc path the packet belongs to * @nc_packet: the nc packet to be checked @@ -625,7 +628,7 @@ out: } /** - * batadv_nc_fwd_flush - Checks the timestamp of the given nc packet. + * batadv_nc_fwd_flush() - Checks the timestamp of the given nc packet. * @bat_priv: the bat priv with all the soft interface information * @nc_path: the nc path the packet belongs to * @nc_packet: the nc packet to be checked @@ -663,8 +666,8 @@ static bool batadv_nc_fwd_flush(struct batadv_priv *bat_priv, } /** - * batadv_nc_process_nc_paths - traverse given nc packet pool and free timed out - * nc packets + * batadv_nc_process_nc_paths() - traverse given nc packet pool and free timed + * out nc packets * @bat_priv: the bat priv with all the soft interface information * @hash: to be processed hash table * @process_fn: Function called to process given nc packet. Should return true @@ -709,7 +712,8 @@ batadv_nc_process_nc_paths(struct batadv_priv *bat_priv, } /** - * batadv_nc_worker - periodic task for house keeping related to network coding + * batadv_nc_worker() - periodic task for house keeping related to network + * coding * @work: kernel work struct */ static void batadv_nc_worker(struct work_struct *work) @@ -749,8 +753,8 @@ static void batadv_nc_worker(struct work_struct *work) } /** - * batadv_can_nc_with_orig - checks whether the given orig node is suitable for - * coding or not + * batadv_can_nc_with_orig() - checks whether the given orig node is suitable + * for coding or not * @bat_priv: the bat priv with all the soft interface information * @orig_node: neighboring orig node which may be used as nc candidate * @ogm_packet: incoming ogm packet also used for the checks @@ -790,7 +794,7 @@ static bool batadv_can_nc_with_orig(struct batadv_priv *bat_priv, } /** - * batadv_nc_find_nc_node - search for an existing nc node and return it + * batadv_nc_find_nc_node() - search for an existing nc node and return it * @orig_node: orig node originating the ogm packet * @orig_neigh_node: neighboring orig node from which we received the ogm packet * (can be equal to orig_node) @@ -830,7 +834,7 @@ batadv_nc_find_nc_node(struct batadv_orig_node *orig_node, } /** - * batadv_nc_get_nc_node - retrieves an nc node or creates the entry if it was + * batadv_nc_get_nc_node() - retrieves an nc node or creates the entry if it was * not found * @bat_priv: the bat priv with all the soft interface information * @orig_node: orig node originating the ogm packet @@ -890,7 +894,7 @@ batadv_nc_get_nc_node(struct batadv_priv *bat_priv, } /** - * batadv_nc_update_nc_node - updates stored incoming and outgoing nc node + * batadv_nc_update_nc_node() - updates stored incoming and outgoing nc node * structs (best called on incoming OGMs) * @bat_priv: the bat priv with all the soft interface information * @orig_node: orig node originating the ogm packet @@ -945,7 +949,7 @@ out: } /** - * batadv_nc_get_path - get existing nc_path or allocate a new one + * batadv_nc_get_path() - get existing nc_path or allocate a new one * @bat_priv: the bat priv with all the soft interface information * @hash: hash table containing the nc path * @src: ethernet source address - first half of the nc path search key @@ -1006,7 +1010,7 @@ static struct batadv_nc_path *batadv_nc_get_path(struct batadv_priv *bat_priv, } /** - * batadv_nc_random_weight_tq - scale the receivers TQ-value to avoid unfair + * batadv_nc_random_weight_tq() - scale the receivers TQ-value to avoid unfair * selection of a receiver with slightly lower TQ than the other * @tq: to be weighted tq value * @@ -1029,7 +1033,7 @@ static u8 batadv_nc_random_weight_tq(u8 tq) } /** - * batadv_nc_memxor - XOR destination with source + * batadv_nc_memxor() - XOR destination with source * @dst: byte array to XOR into * @src: byte array to XOR from * @len: length of destination array @@ -1043,7 +1047,7 @@ static void batadv_nc_memxor(char *dst, const char *src, unsigned int len) } /** - * batadv_nc_code_packets - code a received unicast_packet with an nc packet + * batadv_nc_code_packets() - code a received unicast_packet with an nc packet * into a coded_packet and send it * @bat_priv: the bat priv with all the soft interface information * @skb: data skb to forward @@ -1236,7 +1240,7 @@ out: } /** - * batadv_nc_skb_coding_possible - true if a decoded skb is available at dst. + * batadv_nc_skb_coding_possible() - true if a decoded skb is available at dst. * @skb: data skb to forward * @dst: destination mac address of the other skb to code with * @src: source mac address of skb @@ -1260,7 +1264,7 @@ static bool batadv_nc_skb_coding_possible(struct sk_buff *skb, u8 *dst, u8 *src) } /** - * batadv_nc_path_search - Find the coding path matching in_nc_node and + * batadv_nc_path_search() - Find the coding path matching in_nc_node and * out_nc_node to retrieve a buffered packet that can be used for coding. * @bat_priv: the bat priv with all the soft interface information * @in_nc_node: pointer to skb next hop's neighbor nc node @@ -1328,8 +1332,8 @@ batadv_nc_path_search(struct batadv_priv *bat_priv, } /** - * batadv_nc_skb_src_search - Loops through the list of neighoring nodes of the - * skb's sender (may be equal to the originator). + * batadv_nc_skb_src_search() - Loops through the list of neighoring nodes of + * the skb's sender (may be equal to the originator). * @bat_priv: the bat priv with all the soft interface information * @skb: data skb to forward * @eth_dst: next hop mac address of skb @@ -1374,7 +1378,7 @@ batadv_nc_skb_src_search(struct batadv_priv *bat_priv, } /** - * batadv_nc_skb_store_before_coding - set the ethernet src and dst of the + * batadv_nc_skb_store_before_coding() - set the ethernet src and dst of the * unicast skb before it is stored for use in later decoding * @bat_priv: the bat priv with all the soft interface information * @skb: data skb to store @@ -1409,7 +1413,7 @@ static void batadv_nc_skb_store_before_coding(struct batadv_priv *bat_priv, } /** - * batadv_nc_skb_dst_search - Loops through list of neighboring nodes to dst. + * batadv_nc_skb_dst_search() - Loops through list of neighboring nodes to dst. * @skb: data skb to forward * @neigh_node: next hop to forward packet to * @ethhdr: pointer to the ethernet header inside the skb @@ -1467,7 +1471,7 @@ static bool batadv_nc_skb_dst_search(struct sk_buff *skb, } /** - * batadv_nc_skb_add_to_path - buffer skb for later encoding / decoding + * batadv_nc_skb_add_to_path() - buffer skb for later encoding / decoding * @skb: skb to add to path * @nc_path: path to add skb to * @neigh_node: next hop to forward packet to @@ -1502,7 +1506,7 @@ static bool batadv_nc_skb_add_to_path(struct sk_buff *skb, } /** - * batadv_nc_skb_forward - try to code a packet or add it to the coding packet + * batadv_nc_skb_forward() - try to code a packet or add it to the coding packet * buffer * @skb: data skb to forward * @neigh_node: next hop to forward packet to @@ -1559,8 +1563,8 @@ out: } /** - * batadv_nc_skb_store_for_decoding - save a clone of the skb which can be used - * when decoding coded packets + * batadv_nc_skb_store_for_decoding() - save a clone of the skb which can be + * used when decoding coded packets * @bat_priv: the bat priv with all the soft interface information * @skb: data skb to store */ @@ -1620,7 +1624,7 @@ out: } /** - * batadv_nc_skb_store_sniffed_unicast - check if a received unicast packet + * batadv_nc_skb_store_sniffed_unicast() - check if a received unicast packet * should be saved in the decoding buffer and, if so, store it there * @bat_priv: the bat priv with all the soft interface information * @skb: unicast skb to store @@ -1640,7 +1644,7 @@ void batadv_nc_skb_store_sniffed_unicast(struct batadv_priv *bat_priv, } /** - * batadv_nc_skb_decode_packet - decode given skb using the decode data stored + * batadv_nc_skb_decode_packet() - decode given skb using the decode data stored * in nc_packet * @bat_priv: the bat priv with all the soft interface information * @skb: unicast skb to decode @@ -1734,7 +1738,7 @@ batadv_nc_skb_decode_packet(struct batadv_priv *bat_priv, struct sk_buff *skb, } /** - * batadv_nc_find_decoding_packet - search through buffered decoding data to + * batadv_nc_find_decoding_packet() - search through buffered decoding data to * find the data needed to decode the coded packet * @bat_priv: the bat priv with all the soft interface information * @ethhdr: pointer to the ethernet header inside the coded packet @@ -1799,7 +1803,7 @@ batadv_nc_find_decoding_packet(struct batadv_priv *bat_priv, } /** - * batadv_nc_recv_coded_packet - try to decode coded packet and enqueue the + * batadv_nc_recv_coded_packet() - try to decode coded packet and enqueue the * resulting unicast packet * @skb: incoming coded packet * @recv_if: pointer to interface this packet was received on @@ -1874,7 +1878,7 @@ free_skb: } /** - * batadv_nc_mesh_free - clean up network coding memory + * batadv_nc_mesh_free() - clean up network coding memory * @bat_priv: the bat priv with all the soft interface information */ void batadv_nc_mesh_free(struct batadv_priv *bat_priv) @@ -1891,7 +1895,7 @@ void batadv_nc_mesh_free(struct batadv_priv *bat_priv) #ifdef CONFIG_BATMAN_ADV_DEBUGFS /** - * batadv_nc_nodes_seq_print_text - print the nc node information + * batadv_nc_nodes_seq_print_text() - print the nc node information * @seq: seq file to print on * @offset: not used * @@ -1954,7 +1958,7 @@ out: } /** - * batadv_nc_init_debugfs - create nc folder and related files in debugfs + * batadv_nc_init_debugfs() - create nc folder and related files in debugfs * @bat_priv: the bat priv with all the soft interface information * * Return: 0 on success or negative error number in case of failure diff --git a/net/batman-adv/network-coding.h b/net/batman-adv/network-coding.h index c66efb81d2f4..adaeafa4f71e 100644 --- a/net/batman-adv/network-coding.h +++ b/net/batman-adv/network-coding.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* Copyright (C) 2012-2017 B.A.T.M.A.N. contributors: * * Martin Hundebøll, Jeppe Ledet-Pedersen diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index 2967b86c13da..58a7d9274435 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* Copyright (C) 2009-2017 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich @@ -21,7 +22,7 @@ #include <linux/atomic.h> #include <linux/errno.h> #include <linux/etherdevice.h> -#include <linux/fs.h> +#include <linux/gfp.h> #include <linux/jiffies.h> #include <linux/kernel.h> #include <linux/kref.h> @@ -30,10 +31,12 @@ #include <linux/netdevice.h> #include <linux/netlink.h> #include <linux/rculist.h> +#include <linux/rcupdate.h> #include <linux/seq_file.h> #include <linux/skbuff.h> #include <linux/slab.h> #include <linux/spinlock.h> +#include <linux/stddef.h> #include <linux/workqueue.h> #include <net/sock.h> #include <uapi/linux/batman_adv.h> @@ -55,10 +58,47 @@ /* hash class keys */ static struct lock_class_key batadv_orig_hash_lock_class_key; +/** + * batadv_orig_hash_find() - Find and return originator from orig_hash + * @bat_priv: the bat priv with all the soft interface information + * @data: mac address of the originator + * + * Return: orig_node (with increased refcnt), NULL on errors + */ +struct batadv_orig_node * +batadv_orig_hash_find(struct batadv_priv *bat_priv, const void *data) +{ + struct batadv_hashtable *hash = bat_priv->orig_hash; + struct hlist_head *head; + struct batadv_orig_node *orig_node, *orig_node_tmp = NULL; + int index; + + if (!hash) + return NULL; + + index = batadv_choose_orig(data, hash->size); + head = &hash->table[index]; + + rcu_read_lock(); + hlist_for_each_entry_rcu(orig_node, head, hash_entry) { + if (!batadv_compare_eth(orig_node, data)) + continue; + + if (!kref_get_unless_zero(&orig_node->refcount)) + continue; + + orig_node_tmp = orig_node; + break; + } + rcu_read_unlock(); + + return orig_node_tmp; +} + static void batadv_purge_orig(struct work_struct *work); /** - * batadv_compare_orig - comparing function used in the originator hash table + * batadv_compare_orig() - comparing function used in the originator hash table * @node: node in the local table * @data2: second object to compare the node to * @@ -73,7 +113,7 @@ bool batadv_compare_orig(const struct hlist_node *node, const void *data2) } /** - * batadv_orig_node_vlan_get - get an orig_node_vlan object + * batadv_orig_node_vlan_get() - get an orig_node_vlan object * @orig_node: the originator serving the VLAN * @vid: the VLAN identifier * @@ -104,7 +144,7 @@ batadv_orig_node_vlan_get(struct batadv_orig_node *orig_node, } /** - * batadv_orig_node_vlan_new - search and possibly create an orig_node_vlan + * batadv_orig_node_vlan_new() - search and possibly create an orig_node_vlan * object * @orig_node: the originator serving the VLAN * @vid: the VLAN identifier @@ -145,7 +185,7 @@ out: } /** - * batadv_orig_node_vlan_release - release originator-vlan object from lists + * batadv_orig_node_vlan_release() - release originator-vlan object from lists * and queue for free after rcu grace period * @ref: kref pointer of the originator-vlan object */ @@ -159,7 +199,7 @@ static void batadv_orig_node_vlan_release(struct kref *ref) } /** - * batadv_orig_node_vlan_put - decrement the refcounter and possibly release + * batadv_orig_node_vlan_put() - decrement the refcounter and possibly release * the originator-vlan object * @orig_vlan: the originator-vlan object to release */ @@ -168,6 +208,12 @@ void batadv_orig_node_vlan_put(struct batadv_orig_node_vlan *orig_vlan) kref_put(&orig_vlan->refcount, batadv_orig_node_vlan_release); } +/** + * batadv_originator_init() - Initialize all originator structures + * @bat_priv: the bat priv with all the soft interface information + * + * Return: 0 on success or negative error number in case of failure + */ int batadv_originator_init(struct batadv_priv *bat_priv) { if (bat_priv->orig_hash) @@ -193,7 +239,7 @@ err: } /** - * batadv_neigh_ifinfo_release - release neigh_ifinfo from lists and queue for + * batadv_neigh_ifinfo_release() - release neigh_ifinfo from lists and queue for * free after rcu grace period * @ref: kref pointer of the neigh_ifinfo */ @@ -210,7 +256,7 @@ static void batadv_neigh_ifinfo_release(struct kref *ref) } /** - * batadv_neigh_ifinfo_put - decrement the refcounter and possibly release + * batadv_neigh_ifinfo_put() - decrement the refcounter and possibly release * the neigh_ifinfo * @neigh_ifinfo: the neigh_ifinfo object to release */ @@ -220,7 +266,7 @@ void batadv_neigh_ifinfo_put(struct batadv_neigh_ifinfo *neigh_ifinfo) } /** - * batadv_hardif_neigh_release - release hardif neigh node from lists and + * batadv_hardif_neigh_release() - release hardif neigh node from lists and * queue for free after rcu grace period * @ref: kref pointer of the neigh_node */ @@ -240,7 +286,7 @@ static void batadv_hardif_neigh_release(struct kref *ref) } /** - * batadv_hardif_neigh_put - decrement the hardif neighbors refcounter + * batadv_hardif_neigh_put() - decrement the hardif neighbors refcounter * and possibly release it * @hardif_neigh: hardif neigh neighbor to free */ @@ -250,7 +296,7 @@ void batadv_hardif_neigh_put(struct batadv_hardif_neigh_node *hardif_neigh) } /** - * batadv_neigh_node_release - release neigh_node from lists and queue for + * batadv_neigh_node_release() - release neigh_node from lists and queue for * free after rcu grace period * @ref: kref pointer of the neigh_node */ @@ -275,7 +321,7 @@ static void batadv_neigh_node_release(struct kref *ref) } /** - * batadv_neigh_node_put - decrement the neighbors refcounter and possibly + * batadv_neigh_node_put() - decrement the neighbors refcounter and possibly * release it * @neigh_node: neigh neighbor to free */ @@ -285,7 +331,7 @@ void batadv_neigh_node_put(struct batadv_neigh_node *neigh_node) } /** - * batadv_orig_router_get - router to the originator depending on iface + * batadv_orig_router_get() - router to the originator depending on iface * @orig_node: the orig node for the router * @if_outgoing: the interface where the payload packet has been received or * the OGM should be sent to @@ -318,7 +364,7 @@ batadv_orig_router_get(struct batadv_orig_node *orig_node, } /** - * batadv_orig_ifinfo_get - find the ifinfo from an orig_node + * batadv_orig_ifinfo_get() - find the ifinfo from an orig_node * @orig_node: the orig node to be queried * @if_outgoing: the interface for which the ifinfo should be acquired * @@ -350,7 +396,7 @@ batadv_orig_ifinfo_get(struct batadv_orig_node *orig_node, } /** - * batadv_orig_ifinfo_new - search and possibly create an orig_ifinfo object + * batadv_orig_ifinfo_new() - search and possibly create an orig_ifinfo object * @orig_node: the orig node to be queried * @if_outgoing: the interface for which the ifinfo should be acquired * @@ -396,7 +442,7 @@ out: } /** - * batadv_neigh_ifinfo_get - find the ifinfo from an neigh_node + * batadv_neigh_ifinfo_get() - find the ifinfo from an neigh_node * @neigh: the neigh node to be queried * @if_outgoing: the interface for which the ifinfo should be acquired * @@ -429,7 +475,7 @@ batadv_neigh_ifinfo_get(struct batadv_neigh_node *neigh, } /** - * batadv_neigh_ifinfo_new - search and possibly create an neigh_ifinfo object + * batadv_neigh_ifinfo_new() - search and possibly create an neigh_ifinfo object * @neigh: the neigh node to be queried * @if_outgoing: the interface for which the ifinfo should be acquired * @@ -472,7 +518,7 @@ out: } /** - * batadv_neigh_node_get - retrieve a neighbour from the list + * batadv_neigh_node_get() - retrieve a neighbour from the list * @orig_node: originator which the neighbour belongs to * @hard_iface: the interface where this neighbour is connected to * @addr: the address of the neighbour @@ -509,7 +555,7 @@ batadv_neigh_node_get(const struct batadv_orig_node *orig_node, } /** - * batadv_hardif_neigh_create - create a hardif neighbour node + * batadv_hardif_neigh_create() - create a hardif neighbour node * @hard_iface: the interface this neighbour is connected to * @neigh_addr: the interface address of the neighbour to retrieve * @orig_node: originator object representing the neighbour @@ -555,7 +601,7 @@ out: } /** - * batadv_hardif_neigh_get_or_create - retrieve or create a hardif neighbour + * batadv_hardif_neigh_get_or_create() - retrieve or create a hardif neighbour * node * @hard_iface: the interface this neighbour is connected to * @neigh_addr: the interface address of the neighbour to retrieve @@ -579,7 +625,7 @@ batadv_hardif_neigh_get_or_create(struct batadv_hard_iface *hard_iface, } /** - * batadv_hardif_neigh_get - retrieve a hardif neighbour from the list + * batadv_hardif_neigh_get() - retrieve a hardif neighbour from the list * @hard_iface: the interface where this neighbour is connected to * @neigh_addr: the address of the neighbour * @@ -611,7 +657,7 @@ batadv_hardif_neigh_get(const struct batadv_hard_iface *hard_iface, } /** - * batadv_neigh_node_create - create a neigh node object + * batadv_neigh_node_create() - create a neigh node object * @orig_node: originator object representing the neighbour * @hard_iface: the interface where the neighbour is connected to * @neigh_addr: the mac address of the neighbour interface @@ -676,7 +722,7 @@ out: } /** - * batadv_neigh_node_get_or_create - retrieve or create a neigh node object + * batadv_neigh_node_get_or_create() - retrieve or create a neigh node object * @orig_node: originator object representing the neighbour * @hard_iface: the interface where the neighbour is connected to * @neigh_addr: the mac address of the neighbour interface @@ -700,7 +746,7 @@ batadv_neigh_node_get_or_create(struct batadv_orig_node *orig_node, #ifdef CONFIG_BATMAN_ADV_DEBUGFS /** - * batadv_hardif_neigh_seq_print_text - print the single hop neighbour list + * batadv_hardif_neigh_seq_print_text() - print the single hop neighbour list * @seq: neighbour table seq_file struct * @offset: not used * @@ -735,8 +781,8 @@ int batadv_hardif_neigh_seq_print_text(struct seq_file *seq, void *offset) #endif /** - * batadv_hardif_neigh_dump - Dump to netlink the neighbor infos for a specific - * outgoing interface + * batadv_hardif_neigh_dump() - Dump to netlink the neighbor infos for a + * specific outgoing interface * @msg: message to dump into * @cb: parameters for the dump * @@ -812,7 +858,7 @@ int batadv_hardif_neigh_dump(struct sk_buff *msg, struct netlink_callback *cb) } /** - * batadv_orig_ifinfo_release - release orig_ifinfo from lists and queue for + * batadv_orig_ifinfo_release() - release orig_ifinfo from lists and queue for * free after rcu grace period * @ref: kref pointer of the orig_ifinfo */ @@ -835,7 +881,7 @@ static void batadv_orig_ifinfo_release(struct kref *ref) } /** - * batadv_orig_ifinfo_put - decrement the refcounter and possibly release + * batadv_orig_ifinfo_put() - decrement the refcounter and possibly release * the orig_ifinfo * @orig_ifinfo: the orig_ifinfo object to release */ @@ -845,7 +891,7 @@ void batadv_orig_ifinfo_put(struct batadv_orig_ifinfo *orig_ifinfo) } /** - * batadv_orig_node_free_rcu - free the orig_node + * batadv_orig_node_free_rcu() - free the orig_node * @rcu: rcu pointer of the orig_node */ static void batadv_orig_node_free_rcu(struct rcu_head *rcu) @@ -866,7 +912,7 @@ static void batadv_orig_node_free_rcu(struct rcu_head *rcu) } /** - * batadv_orig_node_release - release orig_node from lists and queue for + * batadv_orig_node_release() - release orig_node from lists and queue for * free after rcu grace period * @ref: kref pointer of the orig_node */ @@ -917,7 +963,7 @@ static void batadv_orig_node_release(struct kref *ref) } /** - * batadv_orig_node_put - decrement the orig node refcounter and possibly + * batadv_orig_node_put() - decrement the orig node refcounter and possibly * release it * @orig_node: the orig node to free */ @@ -926,6 +972,10 @@ void batadv_orig_node_put(struct batadv_orig_node *orig_node) kref_put(&orig_node->refcount, batadv_orig_node_release); } +/** + * batadv_originator_free() - Free all originator structures + * @bat_priv: the bat priv with all the soft interface information + */ void batadv_originator_free(struct batadv_priv *bat_priv) { struct batadv_hashtable *hash = bat_priv->orig_hash; @@ -959,7 +1009,7 @@ void batadv_originator_free(struct batadv_priv *bat_priv) } /** - * batadv_orig_node_new - creates a new orig_node + * batadv_orig_node_new() - creates a new orig_node * @bat_priv: the bat priv with all the soft interface information * @addr: the mac address of the originator * @@ -1038,7 +1088,7 @@ free_orig_node: } /** - * batadv_purge_neigh_ifinfo - purge obsolete ifinfo entries from neighbor + * batadv_purge_neigh_ifinfo() - purge obsolete ifinfo entries from neighbor * @bat_priv: the bat priv with all the soft interface information * @neigh: orig node which is to be checked */ @@ -1079,7 +1129,7 @@ batadv_purge_neigh_ifinfo(struct batadv_priv *bat_priv, } /** - * batadv_purge_orig_ifinfo - purge obsolete ifinfo entries from originator + * batadv_purge_orig_ifinfo() - purge obsolete ifinfo entries from originator * @bat_priv: the bat priv with all the soft interface information * @orig_node: orig node which is to be checked * @@ -1131,7 +1181,7 @@ batadv_purge_orig_ifinfo(struct batadv_priv *bat_priv, } /** - * batadv_purge_orig_neighbors - purges neighbors from originator + * batadv_purge_orig_neighbors() - purges neighbors from originator * @bat_priv: the bat priv with all the soft interface information * @orig_node: orig node which is to be checked * @@ -1189,7 +1239,7 @@ batadv_purge_orig_neighbors(struct batadv_priv *bat_priv, } /** - * batadv_find_best_neighbor - finds the best neighbor after purging + * batadv_find_best_neighbor() - finds the best neighbor after purging * @bat_priv: the bat priv with all the soft interface information * @orig_node: orig node which is to be checked * @if_outgoing: the interface for which the metric should be compared @@ -1224,7 +1274,7 @@ batadv_find_best_neighbor(struct batadv_priv *bat_priv, } /** - * batadv_purge_orig_node - purges obsolete information from an orig_node + * batadv_purge_orig_node() - purges obsolete information from an orig_node * @bat_priv: the bat priv with all the soft interface information * @orig_node: orig node which is to be checked * @@ -1341,12 +1391,24 @@ static void batadv_purge_orig(struct work_struct *work) msecs_to_jiffies(BATADV_ORIG_WORK_PERIOD)); } +/** + * batadv_purge_orig_ref() - Purge all outdated originators + * @bat_priv: the bat priv with all the soft interface information + */ void batadv_purge_orig_ref(struct batadv_priv *bat_priv) { _batadv_purge_orig(bat_priv); } #ifdef CONFIG_BATMAN_ADV_DEBUGFS + +/** + * batadv_orig_seq_print_text() - Print the originator table in a seq file + * @seq: seq file to print on + * @offset: not used + * + * Return: always 0 + */ int batadv_orig_seq_print_text(struct seq_file *seq, void *offset) { struct net_device *net_dev = (struct net_device *)seq->private; @@ -1376,7 +1438,7 @@ int batadv_orig_seq_print_text(struct seq_file *seq, void *offset) } /** - * batadv_orig_hardif_seq_print_text - writes originator infos for a specific + * batadv_orig_hardif_seq_print_text() - writes originator infos for a specific * outgoing interface * @seq: debugfs table seq_file struct * @offset: not used @@ -1423,7 +1485,7 @@ out: #endif /** - * batadv_orig_dump - Dump to netlink the originator infos for a specific + * batadv_orig_dump() - Dump to netlink the originator infos for a specific * outgoing interface * @msg: message to dump into * @cb: parameters for the dump @@ -1499,6 +1561,13 @@ int batadv_orig_dump(struct sk_buff *msg, struct netlink_callback *cb) return ret; } +/** + * batadv_orig_hash_add_if() - Add interface to originators in orig_hash + * @hard_iface: hard interface to add (already slave of the soft interface) + * @max_if_num: new number of interfaces + * + * Return: 0 on success or negative error number in case of failure + */ int batadv_orig_hash_add_if(struct batadv_hard_iface *hard_iface, int max_if_num) { @@ -1534,6 +1603,13 @@ err: return -ENOMEM; } +/** + * batadv_orig_hash_del_if() - Remove interface from originators in orig_hash + * @hard_iface: hard interface to remove (still slave of the soft interface) + * @max_if_num: new number of interfaces + * + * Return: 0 on success or negative error number in case of failure + */ int batadv_orig_hash_del_if(struct batadv_hard_iface *hard_iface, int max_if_num) { diff --git a/net/batman-adv/originator.h b/net/batman-adv/originator.h index d94220a6d21a..8e543a3cdc6c 100644 --- a/net/batman-adv/originator.h +++ b/net/batman-adv/originator.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich @@ -23,14 +24,8 @@ #include <linux/compiler.h> #include <linux/if_ether.h> #include <linux/jhash.h> -#include <linux/kref.h> -#include <linux/rculist.h> -#include <linux/rcupdate.h> -#include <linux/stddef.h> #include <linux/types.h> -#include "hash.h" - struct netlink_callback; struct seq_file; struct sk_buff; @@ -89,8 +84,13 @@ batadv_orig_node_vlan_get(struct batadv_orig_node *orig_node, unsigned short vid); void batadv_orig_node_vlan_put(struct batadv_orig_node_vlan *orig_vlan); -/* hashfunction to choose an entry in a hash table of given size - * hash algorithm from http://en.wikipedia.org/wiki/Hash_table +/** + * batadv_choose_orig() - Return the index of the orig entry in the hash table + * @data: mac address of the originator node + * @size: the size of the hash table + * + * Return: the hash index where the object represented by @data should be + * stored at. */ static inline u32 batadv_choose_orig(const void *data, u32 size) { @@ -100,34 +100,7 @@ static inline u32 batadv_choose_orig(const void *data, u32 size) return hash % size; } -static inline struct batadv_orig_node * -batadv_orig_hash_find(struct batadv_priv *bat_priv, const void *data) -{ - struct batadv_hashtable *hash = bat_priv->orig_hash; - struct hlist_head *head; - struct batadv_orig_node *orig_node, *orig_node_tmp = NULL; - int index; - - if (!hash) - return NULL; - - index = batadv_choose_orig(data, hash->size); - head = &hash->table[index]; - - rcu_read_lock(); - hlist_for_each_entry_rcu(orig_node, head, hash_entry) { - if (!batadv_compare_eth(orig_node, data)) - continue; - - if (!kref_get_unless_zero(&orig_node->refcount)) - continue; - - orig_node_tmp = orig_node; - break; - } - rcu_read_unlock(); - - return orig_node_tmp; -} +struct batadv_orig_node * +batadv_orig_hash_find(struct batadv_priv *bat_priv, const void *data); #endif /* _NET_BATMAN_ADV_ORIGINATOR_H_ */ diff --git a/net/batman-adv/packet.h b/net/batman-adv/packet.h deleted file mode 100644 index 8e8a5db197cb..000000000000 --- a/net/batman-adv/packet.h +++ /dev/null @@ -1,621 +0,0 @@ -/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors: - * - * Marek Lindner, Simon Wunderlich - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see <http://www.gnu.org/licenses/>. - */ - -#ifndef _NET_BATMAN_ADV_PACKET_H_ -#define _NET_BATMAN_ADV_PACKET_H_ - -#include <asm/byteorder.h> -#include <linux/types.h> - -#define batadv_tp_is_error(n) ((u8)(n) > 127 ? 1 : 0) - -/** - * enum batadv_packettype - types for batman-adv encapsulated packets - * @BATADV_IV_OGM: originator messages for B.A.T.M.A.N. IV - * @BATADV_BCAST: broadcast packets carrying broadcast payload - * @BATADV_CODED: network coded packets - * @BATADV_ELP: echo location packets for B.A.T.M.A.N. V - * @BATADV_OGM2: originator messages for B.A.T.M.A.N. V - * - * @BATADV_UNICAST: unicast packets carrying unicast payload traffic - * @BATADV_UNICAST_FRAG: unicast packets carrying a fragment of the original - * payload packet - * @BATADV_UNICAST_4ADDR: unicast packet including the originator address of - * the sender - * @BATADV_ICMP: unicast packet like IP ICMP used for ping or traceroute - * @BATADV_UNICAST_TVLV: unicast packet carrying TVLV containers - */ -enum batadv_packettype { - /* 0x00 - 0x3f: local packets or special rules for handling */ - BATADV_IV_OGM = 0x00, - BATADV_BCAST = 0x01, - BATADV_CODED = 0x02, - BATADV_ELP = 0x03, - BATADV_OGM2 = 0x04, - /* 0x40 - 0x7f: unicast */ -#define BATADV_UNICAST_MIN 0x40 - BATADV_UNICAST = 0x40, - BATADV_UNICAST_FRAG = 0x41, - BATADV_UNICAST_4ADDR = 0x42, - BATADV_ICMP = 0x43, - BATADV_UNICAST_TVLV = 0x44, -#define BATADV_UNICAST_MAX 0x7f - /* 0x80 - 0xff: reserved */ -}; - -/** - * enum batadv_subtype - packet subtype for unicast4addr - * @BATADV_P_DATA: user payload - * @BATADV_P_DAT_DHT_GET: DHT request message - * @BATADV_P_DAT_DHT_PUT: DHT store message - * @BATADV_P_DAT_CACHE_REPLY: ARP reply generated by DAT - */ -enum batadv_subtype { - BATADV_P_DATA = 0x01, - BATADV_P_DAT_DHT_GET = 0x02, - BATADV_P_DAT_DHT_PUT = 0x03, - BATADV_P_DAT_CACHE_REPLY = 0x04, -}; - -/* this file is included by batctl which needs these defines */ -#define BATADV_COMPAT_VERSION 15 - -/** - * enum batadv_iv_flags - flags used in B.A.T.M.A.N. IV OGM packets - * @BATADV_NOT_BEST_NEXT_HOP: flag is set when ogm packet is forwarded and was - * previously received from someone else than the best neighbor. - * @BATADV_PRIMARIES_FIRST_HOP: flag unused. - * @BATADV_DIRECTLINK: flag is for the first hop or if rebroadcasted from a - * one hop neighbor on the interface where it was originally received. - */ -enum batadv_iv_flags { - BATADV_NOT_BEST_NEXT_HOP = BIT(0), - BATADV_PRIMARIES_FIRST_HOP = BIT(1), - BATADV_DIRECTLINK = BIT(2), -}; - -/* ICMP message types */ -enum batadv_icmp_packettype { - BATADV_ECHO_REPLY = 0, - BATADV_DESTINATION_UNREACHABLE = 3, - BATADV_ECHO_REQUEST = 8, - BATADV_TTL_EXCEEDED = 11, - BATADV_PARAMETER_PROBLEM = 12, - BATADV_TP = 15, -}; - -/** - * enum batadv_mcast_flags - flags for multicast capabilities and settings - * @BATADV_MCAST_WANT_ALL_UNSNOOPABLES: we want all packets destined for - * 224.0.0.0/24 or ff02::1 - * @BATADV_MCAST_WANT_ALL_IPV4: we want all IPv4 multicast packets - * @BATADV_MCAST_WANT_ALL_IPV6: we want all IPv6 multicast packets - */ -enum batadv_mcast_flags { - BATADV_MCAST_WANT_ALL_UNSNOOPABLES = BIT(0), - BATADV_MCAST_WANT_ALL_IPV4 = BIT(1), - BATADV_MCAST_WANT_ALL_IPV6 = BIT(2), -}; - -/* tt data subtypes */ -#define BATADV_TT_DATA_TYPE_MASK 0x0F - -/** - * enum batadv_tt_data_flags - flags for tt data tvlv - * @BATADV_TT_OGM_DIFF: TT diff propagated through OGM - * @BATADV_TT_REQUEST: TT request message - * @BATADV_TT_RESPONSE: TT response message - * @BATADV_TT_FULL_TABLE: contains full table to replace existing table - */ -enum batadv_tt_data_flags { - BATADV_TT_OGM_DIFF = BIT(0), - BATADV_TT_REQUEST = BIT(1), - BATADV_TT_RESPONSE = BIT(2), - BATADV_TT_FULL_TABLE = BIT(4), -}; - -/** - * enum batadv_vlan_flags - flags for the four MSB of any vlan ID field - * @BATADV_VLAN_HAS_TAG: whether the field contains a valid vlan tag or not - */ -enum batadv_vlan_flags { - BATADV_VLAN_HAS_TAG = BIT(15), -}; - -/* claim frame types for the bridge loop avoidance */ -enum batadv_bla_claimframe { - BATADV_CLAIM_TYPE_CLAIM = 0x00, - BATADV_CLAIM_TYPE_UNCLAIM = 0x01, - BATADV_CLAIM_TYPE_ANNOUNCE = 0x02, - BATADV_CLAIM_TYPE_REQUEST = 0x03, - BATADV_CLAIM_TYPE_LOOPDETECT = 0x04, -}; - -/** - * enum batadv_tvlv_type - tvlv type definitions - * @BATADV_TVLV_GW: gateway tvlv - * @BATADV_TVLV_DAT: distributed arp table tvlv - * @BATADV_TVLV_NC: network coding tvlv - * @BATADV_TVLV_TT: translation table tvlv - * @BATADV_TVLV_ROAM: roaming advertisement tvlv - * @BATADV_TVLV_MCAST: multicast capability tvlv - */ -enum batadv_tvlv_type { - BATADV_TVLV_GW = 0x01, - BATADV_TVLV_DAT = 0x02, - BATADV_TVLV_NC = 0x03, - BATADV_TVLV_TT = 0x04, - BATADV_TVLV_ROAM = 0x05, - BATADV_TVLV_MCAST = 0x06, -}; - -#pragma pack(2) -/* the destination hardware field in the ARP frame is used to - * transport the claim type and the group id - */ -struct batadv_bla_claim_dst { - u8 magic[3]; /* FF:43:05 */ - u8 type; /* bla_claimframe */ - __be16 group; /* group id */ -}; - -#pragma pack() - -/** - * struct batadv_ogm_packet - ogm (routing protocol) packet - * @packet_type: batman-adv packet type, part of the general header - * @version: batman-adv protocol version, part of the genereal header - * @ttl: time to live for this packet, part of the genereal header - * @flags: contains routing relevant flags - see enum batadv_iv_flags - * @seqno: sequence identification - * @orig: address of the source node - * @prev_sender: address of the previous sender - * @reserved: reserved byte for alignment - * @tq: transmission quality - * @tvlv_len: length of tvlv data following the ogm header - */ -struct batadv_ogm_packet { - u8 packet_type; - u8 version; - u8 ttl; - u8 flags; - __be32 seqno; - u8 orig[ETH_ALEN]; - u8 prev_sender[ETH_ALEN]; - u8 reserved; - u8 tq; - __be16 tvlv_len; - /* __packed is not needed as the struct size is divisible by 4, - * and the largest data type in this struct has a size of 4. - */ -}; - -#define BATADV_OGM_HLEN sizeof(struct batadv_ogm_packet) - -/** - * struct batadv_ogm2_packet - ogm2 (routing protocol) packet - * @packet_type: batman-adv packet type, part of the general header - * @version: batman-adv protocol version, part of the general header - * @ttl: time to live for this packet, part of the general header - * @flags: reseved for routing relevant flags - currently always 0 - * @seqno: sequence number - * @orig: originator mac address - * @tvlv_len: length of the appended tvlv buffer (in bytes) - * @throughput: the currently flooded path throughput - */ -struct batadv_ogm2_packet { - u8 packet_type; - u8 version; - u8 ttl; - u8 flags; - __be32 seqno; - u8 orig[ETH_ALEN]; - __be16 tvlv_len; - __be32 throughput; - /* __packed is not needed as the struct size is divisible by 4, - * and the largest data type in this struct has a size of 4. - */ -}; - -#define BATADV_OGM2_HLEN sizeof(struct batadv_ogm2_packet) - -/** - * struct batadv_elp_packet - elp (neighbor discovery) packet - * @packet_type: batman-adv packet type, part of the general header - * @version: batman-adv protocol version, part of the genereal header - * @orig: originator mac address - * @seqno: sequence number - * @elp_interval: currently used ELP sending interval in ms - */ -struct batadv_elp_packet { - u8 packet_type; - u8 version; - u8 orig[ETH_ALEN]; - __be32 seqno; - __be32 elp_interval; -}; - -#define BATADV_ELP_HLEN sizeof(struct batadv_elp_packet) - -/** - * struct batadv_icmp_header - common members among all the ICMP packets - * @packet_type: batman-adv packet type, part of the general header - * @version: batman-adv protocol version, part of the genereal header - * @ttl: time to live for this packet, part of the genereal header - * @msg_type: ICMP packet type - * @dst: address of the destination node - * @orig: address of the source node - * @uid: local ICMP socket identifier - * @align: not used - useful for alignment purposes only - * - * This structure is used for ICMP packets parsing only and it is never sent - * over the wire. The alignment field at the end is there to ensure that - * members are padded the same way as they are in real packets. - */ -struct batadv_icmp_header { - u8 packet_type; - u8 version; - u8 ttl; - u8 msg_type; /* see ICMP message types above */ - u8 dst[ETH_ALEN]; - u8 orig[ETH_ALEN]; - u8 uid; - u8 align[3]; -}; - -/** - * struct batadv_icmp_packet - ICMP packet - * @packet_type: batman-adv packet type, part of the general header - * @version: batman-adv protocol version, part of the genereal header - * @ttl: time to live for this packet, part of the genereal header - * @msg_type: ICMP packet type - * @dst: address of the destination node - * @orig: address of the source node - * @uid: local ICMP socket identifier - * @reserved: not used - useful for alignment - * @seqno: ICMP sequence number - */ -struct batadv_icmp_packet { - u8 packet_type; - u8 version; - u8 ttl; - u8 msg_type; /* see ICMP message types above */ - u8 dst[ETH_ALEN]; - u8 orig[ETH_ALEN]; - u8 uid; - u8 reserved; - __be16 seqno; -}; - -/** - * struct batadv_icmp_tp_packet - ICMP TP Meter packet - * @packet_type: batman-adv packet type, part of the general header - * @version: batman-adv protocol version, part of the genereal header - * @ttl: time to live for this packet, part of the genereal header - * @msg_type: ICMP packet type - * @dst: address of the destination node - * @orig: address of the source node - * @uid: local ICMP socket identifier - * @subtype: TP packet subtype (see batadv_icmp_tp_subtype) - * @session: TP session identifier - * @seqno: the TP sequence number - * @timestamp: time when the packet has been sent. This value is filled in a - * TP_MSG and echoed back in the next TP_ACK so that the sender can compute the - * RTT. Since it is read only by the host which wrote it, there is no need to - * store it using network order - */ -struct batadv_icmp_tp_packet { - u8 packet_type; - u8 version; - u8 ttl; - u8 msg_type; /* see ICMP message types above */ - u8 dst[ETH_ALEN]; - u8 orig[ETH_ALEN]; - u8 uid; - u8 subtype; - u8 session[2]; - __be32 seqno; - __be32 timestamp; -}; - -/** - * enum batadv_icmp_tp_subtype - ICMP TP Meter packet subtypes - * @BATADV_TP_MSG: Msg from sender to receiver - * @BATADV_TP_ACK: acknowledgment from receiver to sender - */ -enum batadv_icmp_tp_subtype { - BATADV_TP_MSG = 0, - BATADV_TP_ACK, -}; - -#define BATADV_RR_LEN 16 - -/** - * struct batadv_icmp_packet_rr - ICMP RouteRecord packet - * @packet_type: batman-adv packet type, part of the general header - * @version: batman-adv protocol version, part of the genereal header - * @ttl: time to live for this packet, part of the genereal header - * @msg_type: ICMP packet type - * @dst: address of the destination node - * @orig: address of the source node - * @uid: local ICMP socket identifier - * @rr_cur: number of entries the rr array - * @seqno: ICMP sequence number - * @rr: route record array - */ -struct batadv_icmp_packet_rr { - u8 packet_type; - u8 version; - u8 ttl; - u8 msg_type; /* see ICMP message types above */ - u8 dst[ETH_ALEN]; - u8 orig[ETH_ALEN]; - u8 uid; - u8 rr_cur; - __be16 seqno; - u8 rr[BATADV_RR_LEN][ETH_ALEN]; -}; - -#define BATADV_ICMP_MAX_PACKET_SIZE sizeof(struct batadv_icmp_packet_rr) - -/* All packet headers in front of an ethernet header have to be completely - * divisible by 2 but not by 4 to make the payload after the ethernet - * header again 4 bytes boundary aligned. - * - * A packing of 2 is necessary to avoid extra padding at the end of the struct - * caused by a structure member which is larger than two bytes. Otherwise - * the structure would not fulfill the previously mentioned rule to avoid the - * misalignment of the payload after the ethernet header. It may also lead to - * leakage of information when the padding it not initialized before sending. - */ -#pragma pack(2) - -/** - * struct batadv_unicast_packet - unicast packet for network payload - * @packet_type: batman-adv packet type, part of the general header - * @version: batman-adv protocol version, part of the genereal header - * @ttl: time to live for this packet, part of the genereal header - * @ttvn: translation table version number - * @dest: originator destination of the unicast packet - */ -struct batadv_unicast_packet { - u8 packet_type; - u8 version; - u8 ttl; - u8 ttvn; /* destination translation table version number */ - u8 dest[ETH_ALEN]; - /* "4 bytes boundary + 2 bytes" long to make the payload after the - * following ethernet header again 4 bytes boundary aligned - */ -}; - -/** - * struct batadv_unicast_4addr_packet - extended unicast packet - * @u: common unicast packet header - * @src: address of the source - * @subtype: packet subtype - * @reserved: reserved byte for alignment - */ -struct batadv_unicast_4addr_packet { - struct batadv_unicast_packet u; - u8 src[ETH_ALEN]; - u8 subtype; - u8 reserved; - /* "4 bytes boundary + 2 bytes" long to make the payload after the - * following ethernet header again 4 bytes boundary aligned - */ -}; - -/** - * struct batadv_frag_packet - fragmented packet - * @packet_type: batman-adv packet type, part of the general header - * @version: batman-adv protocol version, part of the genereal header - * @ttl: time to live for this packet, part of the genereal header - * @dest: final destination used when routing fragments - * @orig: originator of the fragment used when merging the packet - * @no: fragment number within this sequence - * @priority: priority of frame, from ToS IP precedence or 802.1p - * @reserved: reserved byte for alignment - * @seqno: sequence identification - * @total_size: size of the merged packet - */ -struct batadv_frag_packet { - u8 packet_type; - u8 version; /* batman version field */ - u8 ttl; -#if defined(__BIG_ENDIAN_BITFIELD) - u8 no:4; - u8 priority:3; - u8 reserved:1; -#elif defined(__LITTLE_ENDIAN_BITFIELD) - u8 reserved:1; - u8 priority:3; - u8 no:4; -#else -#error "unknown bitfield endianness" -#endif - u8 dest[ETH_ALEN]; - u8 orig[ETH_ALEN]; - __be16 seqno; - __be16 total_size; -}; - -/** - * struct batadv_bcast_packet - broadcast packet for network payload - * @packet_type: batman-adv packet type, part of the general header - * @version: batman-adv protocol version, part of the genereal header - * @ttl: time to live for this packet, part of the genereal header - * @reserved: reserved byte for alignment - * @seqno: sequence identification - * @orig: originator of the broadcast packet - */ -struct batadv_bcast_packet { - u8 packet_type; - u8 version; /* batman version field */ - u8 ttl; - u8 reserved; - __be32 seqno; - u8 orig[ETH_ALEN]; - /* "4 bytes boundary + 2 bytes" long to make the payload after the - * following ethernet header again 4 bytes boundary aligned - */ -}; - -/** - * struct batadv_coded_packet - network coded packet - * @packet_type: batman-adv packet type, part of the general header - * @version: batman-adv protocol version, part of the genereal header - * @ttl: time to live for this packet, part of the genereal header - * @first_source: original source of first included packet - * @first_orig_dest: original destinal of first included packet - * @first_crc: checksum of first included packet - * @first_ttvn: tt-version number of first included packet - * @second_ttl: ttl of second packet - * @second_dest: second receiver of this coded packet - * @second_source: original source of second included packet - * @second_orig_dest: original destination of second included packet - * @second_crc: checksum of second included packet - * @second_ttvn: tt version number of second included packet - * @coded_len: length of network coded part of the payload - */ -struct batadv_coded_packet { - u8 packet_type; - u8 version; /* batman version field */ - u8 ttl; - u8 first_ttvn; - /* u8 first_dest[ETH_ALEN]; - saved in mac header destination */ - u8 first_source[ETH_ALEN]; - u8 first_orig_dest[ETH_ALEN]; - __be32 first_crc; - u8 second_ttl; - u8 second_ttvn; - u8 second_dest[ETH_ALEN]; - u8 second_source[ETH_ALEN]; - u8 second_orig_dest[ETH_ALEN]; - __be32 second_crc; - __be16 coded_len; -}; - -#pragma pack() - -/** - * struct batadv_unicast_tvlv_packet - generic unicast packet with tvlv payload - * @packet_type: batman-adv packet type, part of the general header - * @version: batman-adv protocol version, part of the genereal header - * @ttl: time to live for this packet, part of the genereal header - * @reserved: reserved field (for packet alignment) - * @src: address of the source - * @dst: address of the destination - * @tvlv_len: length of tvlv data following the unicast tvlv header - * @align: 2 bytes to align the header to a 4 byte boundary - */ -struct batadv_unicast_tvlv_packet { - u8 packet_type; - u8 version; /* batman version field */ - u8 ttl; - u8 reserved; - u8 dst[ETH_ALEN]; - u8 src[ETH_ALEN]; - __be16 tvlv_len; - u16 align; -}; - -/** - * struct batadv_tvlv_hdr - base tvlv header struct - * @type: tvlv container type (see batadv_tvlv_type) - * @version: tvlv container version - * @len: tvlv container length - */ -struct batadv_tvlv_hdr { - u8 type; - u8 version; - __be16 len; -}; - -/** - * struct batadv_tvlv_gateway_data - gateway data propagated through gw tvlv - * container - * @bandwidth_down: advertised uplink download bandwidth - * @bandwidth_up: advertised uplink upload bandwidth - */ -struct batadv_tvlv_gateway_data { - __be32 bandwidth_down; - __be32 bandwidth_up; -}; - -/** - * struct batadv_tvlv_tt_data - tt data propagated through the tt tvlv container - * @flags: translation table flags (see batadv_tt_data_flags) - * @ttvn: translation table version number - * @num_vlan: number of announced VLANs. In the TVLV this struct is followed by - * one batadv_tvlv_tt_vlan_data object per announced vlan - */ -struct batadv_tvlv_tt_data { - u8 flags; - u8 ttvn; - __be16 num_vlan; -}; - -/** - * struct batadv_tvlv_tt_vlan_data - vlan specific tt data propagated through - * the tt tvlv container - * @crc: crc32 checksum of the entries belonging to this vlan - * @vid: vlan identifier - * @reserved: unused, useful for alignment purposes - */ -struct batadv_tvlv_tt_vlan_data { - __be32 crc; - __be16 vid; - u16 reserved; -}; - -/** - * struct batadv_tvlv_tt_change - translation table diff data - * @flags: status indicators concerning the non-mesh client (see - * batadv_tt_client_flags) - * @reserved: reserved field - useful for alignment purposes only - * @addr: mac address of non-mesh client that triggered this tt change - * @vid: VLAN identifier - */ -struct batadv_tvlv_tt_change { - u8 flags; - u8 reserved[3]; - u8 addr[ETH_ALEN]; - __be16 vid; -}; - -/** - * struct batadv_tvlv_roam_adv - roaming advertisement - * @client: mac address of roaming client - * @vid: VLAN identifier - */ -struct batadv_tvlv_roam_adv { - u8 client[ETH_ALEN]; - __be16 vid; -}; - -/** - * struct batadv_tvlv_mcast_data - payload of a multicast tvlv - * @flags: multicast flags announced by the orig node - * @reserved: reserved field - */ -struct batadv_tvlv_mcast_data { - u8 flags; - u8 reserved[3]; -}; - -#endif /* _NET_BATMAN_ADV_PACKET_H_ */ diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index 40d9bf3e5bfe..b6891e8b741c 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich @@ -33,6 +34,7 @@ #include <linux/skbuff.h> #include <linux/spinlock.h> #include <linux/stddef.h> +#include <uapi/linux/batadv_packet.h> #include "bitarray.h" #include "bridge_loop_avoidance.h" @@ -43,7 +45,6 @@ #include "log.h" #include "network-coding.h" #include "originator.h" -#include "packet.h" #include "send.h" #include "soft-interface.h" #include "tp_meter.h" @@ -54,7 +55,7 @@ static int batadv_route_unicast_packet(struct sk_buff *skb, struct batadv_hard_iface *recv_if); /** - * _batadv_update_route - set the router for this originator + * _batadv_update_route() - set the router for this originator * @bat_priv: the bat priv with all the soft interface information * @orig_node: orig node which is to be configured * @recv_if: the receive interface for which this route is set @@ -118,7 +119,7 @@ static void _batadv_update_route(struct batadv_priv *bat_priv, } /** - * batadv_update_route - set the router for this originator + * batadv_update_route() - set the router for this originator * @bat_priv: the bat priv with all the soft interface information * @orig_node: orig node which is to be configured * @recv_if: the receive interface for which this route is set @@ -145,7 +146,7 @@ out: } /** - * batadv_window_protected - checks whether the host restarted and is in the + * batadv_window_protected() - checks whether the host restarted and is in the * protection time. * @bat_priv: the bat priv with all the soft interface information * @seq_num_diff: difference between the current/received sequence number and @@ -180,6 +181,14 @@ bool batadv_window_protected(struct batadv_priv *bat_priv, s32 seq_num_diff, return false; } +/** + * batadv_check_management_packet() - Check preconditions for management packets + * @skb: incoming packet buffer + * @hard_iface: incoming hard interface + * @header_len: minimal header length of packet type + * + * Return: true when management preconditions are met, false otherwise + */ bool batadv_check_management_packet(struct sk_buff *skb, struct batadv_hard_iface *hard_iface, int header_len) @@ -212,7 +221,7 @@ bool batadv_check_management_packet(struct sk_buff *skb, } /** - * batadv_recv_my_icmp_packet - receive an icmp packet locally + * batadv_recv_my_icmp_packet() - receive an icmp packet locally * @bat_priv: the bat priv with all the soft interface information * @skb: icmp packet to process * @@ -347,6 +356,13 @@ out: return ret; } +/** + * batadv_recv_icmp_packet() - Process incoming icmp packet + * @skb: incoming packet buffer + * @recv_if: incoming hard interface + * + * Return: NET_RX_SUCCESS on success or NET_RX_DROP in case of failure + */ int batadv_recv_icmp_packet(struct sk_buff *skb, struct batadv_hard_iface *recv_if) { @@ -440,7 +456,7 @@ free_skb: } /** - * batadv_check_unicast_packet - Check for malformed unicast packets + * batadv_check_unicast_packet() - Check for malformed unicast packets * @bat_priv: the bat priv with all the soft interface information * @skb: packet to check * @hdr_size: size of header to pull @@ -478,7 +494,7 @@ static int batadv_check_unicast_packet(struct batadv_priv *bat_priv, } /** - * batadv_last_bonding_get - Get last_bonding_candidate of orig_node + * batadv_last_bonding_get() - Get last_bonding_candidate of orig_node * @orig_node: originator node whose last bonding candidate should be retrieved * * Return: last bonding candidate of router or NULL if not found @@ -501,7 +517,7 @@ batadv_last_bonding_get(struct batadv_orig_node *orig_node) } /** - * batadv_last_bonding_replace - Replace last_bonding_candidate of orig_node + * batadv_last_bonding_replace() - Replace last_bonding_candidate of orig_node * @orig_node: originator node whose bonding candidates should be replaced * @new_candidate: new bonding candidate or NULL */ @@ -524,7 +540,7 @@ batadv_last_bonding_replace(struct batadv_orig_node *orig_node, } /** - * batadv_find_router - find a suitable router for this originator + * batadv_find_router() - find a suitable router for this originator * @bat_priv: the bat priv with all the soft interface information * @orig_node: the destination node * @recv_if: pointer to interface this packet was received on @@ -741,7 +757,7 @@ free_skb: } /** - * batadv_reroute_unicast_packet - update the unicast header for re-routing + * batadv_reroute_unicast_packet() - update the unicast header for re-routing * @bat_priv: the bat priv with all the soft interface information * @unicast_packet: the unicast header to be updated * @dst_addr: the payload destination @@ -904,7 +920,7 @@ static bool batadv_check_unicast_ttvn(struct batadv_priv *bat_priv, } /** - * batadv_recv_unhandled_unicast_packet - receive and process packets which + * batadv_recv_unhandled_unicast_packet() - receive and process packets which * are in the unicast number space but not yet known to the implementation * @skb: unicast tvlv packet to process * @recv_if: pointer to interface this packet was received on @@ -935,6 +951,13 @@ free_skb: return NET_RX_DROP; } +/** + * batadv_recv_unicast_packet() - Process incoming unicast packet + * @skb: incoming packet buffer + * @recv_if: incoming hard interface + * + * Return: NET_RX_SUCCESS on success or NET_RX_DROP in case of failure + */ int batadv_recv_unicast_packet(struct sk_buff *skb, struct batadv_hard_iface *recv_if) { @@ -1036,7 +1059,7 @@ free_skb: } /** - * batadv_recv_unicast_tvlv - receive and process unicast tvlv packets + * batadv_recv_unicast_tvlv() - receive and process unicast tvlv packets * @skb: unicast tvlv packet to process * @recv_if: pointer to interface this packet was received on * @@ -1090,7 +1113,7 @@ free_skb: } /** - * batadv_recv_frag_packet - process received fragment + * batadv_recv_frag_packet() - process received fragment * @skb: the received fragment * @recv_if: interface that the skb is received on * @@ -1155,6 +1178,13 @@ free_skb: return ret; } +/** + * batadv_recv_bcast_packet() - Process incoming broadcast packet + * @skb: incoming packet buffer + * @recv_if: incoming hard interface + * + * Return: NET_RX_SUCCESS on success or NET_RX_DROP in case of failure + */ int batadv_recv_bcast_packet(struct sk_buff *skb, struct batadv_hard_iface *recv_if) { diff --git a/net/batman-adv/routing.h b/net/batman-adv/routing.h index 5ede16c32f15..a1289bc5f115 100644 --- a/net/batman-adv/routing.h +++ b/net/batman-adv/routing.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c index 7895323fd2a7..2a5ab6f1076d 100644 --- a/net/batman-adv/send.c +++ b/net/batman-adv/send.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich @@ -23,7 +24,7 @@ #include <linux/byteorder/generic.h> #include <linux/errno.h> #include <linux/etherdevice.h> -#include <linux/fs.h> +#include <linux/gfp.h> #include <linux/if.h> #include <linux/if_ether.h> #include <linux/jiffies.h> @@ -54,7 +55,7 @@ static void batadv_send_outstanding_bcast_packet(struct work_struct *work); /** - * batadv_send_skb_packet - send an already prepared packet + * batadv_send_skb_packet() - send an already prepared packet * @skb: the packet to send * @hard_iface: the interface to use to send the broadcast packet * @dst_addr: the payload destination @@ -123,12 +124,30 @@ send_skb_err: return NET_XMIT_DROP; } +/** + * batadv_send_broadcast_skb() - Send broadcast packet via hard interface + * @skb: packet to be transmitted (with batadv header and no outer eth header) + * @hard_iface: outgoing interface + * + * Return: A negative errno code is returned on a failure. A success does not + * guarantee the frame will be transmitted as it may be dropped due + * to congestion or traffic shaping. + */ int batadv_send_broadcast_skb(struct sk_buff *skb, struct batadv_hard_iface *hard_iface) { return batadv_send_skb_packet(skb, hard_iface, batadv_broadcast_addr); } +/** + * batadv_send_unicast_skb() - Send unicast packet to neighbor + * @skb: packet to be transmitted (with batadv header and no outer eth header) + * @neigh: neighbor which is used as next hop to destination + * + * Return: A negative errno code is returned on a failure. A success does not + * guarantee the frame will be transmitted as it may be dropped due + * to congestion or traffic shaping. + */ int batadv_send_unicast_skb(struct sk_buff *skb, struct batadv_neigh_node *neigh) { @@ -153,7 +172,7 @@ int batadv_send_unicast_skb(struct sk_buff *skb, } /** - * batadv_send_skb_to_orig - Lookup next-hop and transmit skb. + * batadv_send_skb_to_orig() - Lookup next-hop and transmit skb. * @skb: Packet to be transmitted. * @orig_node: Final destination of the packet. * @recv_if: Interface used when receiving the packet (can be NULL). @@ -216,7 +235,7 @@ free_skb: } /** - * batadv_send_skb_push_fill_unicast - extend the buffer and initialize the + * batadv_send_skb_push_fill_unicast() - extend the buffer and initialize the * common fields for unicast packets * @skb: the skb carrying the unicast header to initialize * @hdr_size: amount of bytes to push at the beginning of the skb @@ -249,7 +268,7 @@ batadv_send_skb_push_fill_unicast(struct sk_buff *skb, int hdr_size, } /** - * batadv_send_skb_prepare_unicast - encapsulate an skb with a unicast header + * batadv_send_skb_prepare_unicast() - encapsulate an skb with a unicast header * @skb: the skb containing the payload to encapsulate * @orig_node: the destination node * @@ -264,7 +283,7 @@ static bool batadv_send_skb_prepare_unicast(struct sk_buff *skb, } /** - * batadv_send_skb_prepare_unicast_4addr - encapsulate an skb with a + * batadv_send_skb_prepare_unicast_4addr() - encapsulate an skb with a * unicast 4addr header * @bat_priv: the bat priv with all the soft interface information * @skb: the skb containing the payload to encapsulate @@ -308,7 +327,7 @@ out: } /** - * batadv_send_skb_unicast - encapsulate and send an skb via unicast + * batadv_send_skb_unicast() - encapsulate and send an skb via unicast * @bat_priv: the bat priv with all the soft interface information * @skb: payload to send * @packet_type: the batman unicast packet type to use @@ -378,7 +397,7 @@ out: } /** - * batadv_send_skb_via_tt_generic - send an skb via TT lookup + * batadv_send_skb_via_tt_generic() - send an skb via TT lookup * @bat_priv: the bat priv with all the soft interface information * @skb: payload to send * @packet_type: the batman unicast packet type to use @@ -425,7 +444,7 @@ int batadv_send_skb_via_tt_generic(struct batadv_priv *bat_priv, } /** - * batadv_send_skb_via_gw - send an skb via gateway lookup + * batadv_send_skb_via_gw() - send an skb via gateway lookup * @bat_priv: the bat priv with all the soft interface information * @skb: payload to send * @vid: the vid to be used to search the translation table @@ -452,7 +471,7 @@ int batadv_send_skb_via_gw(struct batadv_priv *bat_priv, struct sk_buff *skb, } /** - * batadv_forw_packet_free - free a forwarding packet + * batadv_forw_packet_free() - free a forwarding packet * @forw_packet: The packet to free * @dropped: whether the packet is freed because is is dropped * @@ -477,7 +496,7 @@ void batadv_forw_packet_free(struct batadv_forw_packet *forw_packet, } /** - * batadv_forw_packet_alloc - allocate a forwarding packet + * batadv_forw_packet_alloc() - allocate a forwarding packet * @if_incoming: The (optional) if_incoming to be grabbed * @if_outgoing: The (optional) if_outgoing to be grabbed * @queue_left: The (optional) queue counter to decrease @@ -543,7 +562,7 @@ err: } /** - * batadv_forw_packet_was_stolen - check whether someone stole this packet + * batadv_forw_packet_was_stolen() - check whether someone stole this packet * @forw_packet: the forwarding packet to check * * This function checks whether the given forwarding packet was claimed by @@ -558,7 +577,7 @@ batadv_forw_packet_was_stolen(struct batadv_forw_packet *forw_packet) } /** - * batadv_forw_packet_steal - claim a forw_packet for free() + * batadv_forw_packet_steal() - claim a forw_packet for free() * @forw_packet: the forwarding packet to steal * @lock: a key to the store to steal from (e.g. forw_{bat,bcast}_list_lock) * @@ -589,7 +608,7 @@ bool batadv_forw_packet_steal(struct batadv_forw_packet *forw_packet, } /** - * batadv_forw_packet_list_steal - claim a list of forward packets for free() + * batadv_forw_packet_list_steal() - claim a list of forward packets for free() * @forw_list: the to be stolen forward packets * @cleanup_list: a backup pointer, to be able to dispose the packet later * @hard_iface: the interface to steal forward packets from @@ -625,7 +644,7 @@ batadv_forw_packet_list_steal(struct hlist_head *forw_list, } /** - * batadv_forw_packet_list_free - free a list of forward packets + * batadv_forw_packet_list_free() - free a list of forward packets * @head: a list of to be freed forw_packets * * This function cancels the scheduling of any packet in the provided list, @@ -649,7 +668,7 @@ static void batadv_forw_packet_list_free(struct hlist_head *head) } /** - * batadv_forw_packet_queue - try to queue a forwarding packet + * batadv_forw_packet_queue() - try to queue a forwarding packet * @forw_packet: the forwarding packet to queue * @lock: a key to the store (e.g. forw_{bat,bcast}_list_lock) * @head: the shelve to queue it on (e.g. forw_{bat,bcast}_list) @@ -693,7 +712,7 @@ static void batadv_forw_packet_queue(struct batadv_forw_packet *forw_packet, } /** - * batadv_forw_packet_bcast_queue - try to queue a broadcast packet + * batadv_forw_packet_bcast_queue() - try to queue a broadcast packet * @bat_priv: the bat priv with all the soft interface information * @forw_packet: the forwarding packet to queue * @send_time: timestamp (jiffies) when the packet is to be sent @@ -712,7 +731,7 @@ batadv_forw_packet_bcast_queue(struct batadv_priv *bat_priv, } /** - * batadv_forw_packet_ogmv1_queue - try to queue an OGMv1 packet + * batadv_forw_packet_ogmv1_queue() - try to queue an OGMv1 packet * @bat_priv: the bat priv with all the soft interface information * @forw_packet: the forwarding packet to queue * @send_time: timestamp (jiffies) when the packet is to be sent @@ -730,7 +749,7 @@ void batadv_forw_packet_ogmv1_queue(struct batadv_priv *bat_priv, } /** - * batadv_add_bcast_packet_to_list - queue broadcast packet for multiple sends + * batadv_add_bcast_packet_to_list() - queue broadcast packet for multiple sends * @bat_priv: the bat priv with all the soft interface information * @skb: broadcast packet to add * @delay: number of jiffies to wait before sending @@ -790,7 +809,7 @@ err: } /** - * batadv_forw_packet_bcasts_left - check if a retransmission is necessary + * batadv_forw_packet_bcasts_left() - check if a retransmission is necessary * @forw_packet: the forwarding packet to check * @hard_iface: the interface to check on * @@ -818,7 +837,8 @@ batadv_forw_packet_bcasts_left(struct batadv_forw_packet *forw_packet, } /** - * batadv_forw_packet_bcasts_inc - increment retransmission counter of a packet + * batadv_forw_packet_bcasts_inc() - increment retransmission counter of a + * packet * @forw_packet: the packet to increase the counter for */ static void @@ -828,7 +848,7 @@ batadv_forw_packet_bcasts_inc(struct batadv_forw_packet *forw_packet) } /** - * batadv_forw_packet_is_rebroadcast - check packet for previous transmissions + * batadv_forw_packet_is_rebroadcast() - check packet for previous transmissions * @forw_packet: the packet to check * * Return: True if this packet was transmitted before, false otherwise. @@ -953,7 +973,7 @@ out: } /** - * batadv_purge_outstanding_packets - stop/purge scheduled bcast/OGMv1 packets + * batadv_purge_outstanding_packets() - stop/purge scheduled bcast/OGMv1 packets * @bat_priv: the bat priv with all the soft interface information * @hard_iface: the hard interface to cancel and purge bcast/ogm packets on * diff --git a/net/batman-adv/send.h b/net/batman-adv/send.h index a16b34f473ef..1e8c79093623 100644 --- a/net/batman-adv/send.h +++ b/net/batman-adv/send.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich @@ -23,8 +24,7 @@ #include <linux/compiler.h> #include <linux/spinlock.h> #include <linux/types.h> - -#include "packet.h" +#include <uapi/linux/batadv_packet.h> struct sk_buff; @@ -76,7 +76,7 @@ int batadv_send_skb_via_gw(struct batadv_priv *bat_priv, struct sk_buff *skb, unsigned short vid); /** - * batadv_send_skb_via_tt - send an skb via TT lookup + * batadv_send_skb_via_tt() - send an skb via TT lookup * @bat_priv: the bat priv with all the soft interface information * @skb: the payload to send * @dst_hint: can be used to override the destination contained in the skb @@ -97,7 +97,7 @@ static inline int batadv_send_skb_via_tt(struct batadv_priv *bat_priv, } /** - * batadv_send_skb_via_tt_4addr - send an skb via TT lookup + * batadv_send_skb_via_tt_4addr() - send an skb via TT lookup * @bat_priv: the bat priv with all the soft interface information * @skb: the payload to send * @packet_subtype: the unicast 4addr packet subtype to use diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index 9f673cdfecf8..900c5ce21cd4 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich @@ -26,7 +27,7 @@ #include <linux/errno.h> #include <linux/etherdevice.h> #include <linux/ethtool.h> -#include <linux/fs.h> +#include <linux/gfp.h> #include <linux/if_ether.h> #include <linux/if_vlan.h> #include <linux/jiffies.h> @@ -48,6 +49,7 @@ #include <linux/stddef.h> #include <linux/string.h> #include <linux/types.h> +#include <uapi/linux/batadv_packet.h> #include "bat_algo.h" #include "bridge_loop_avoidance.h" @@ -59,11 +61,17 @@ #include "multicast.h" #include "network-coding.h" #include "originator.h" -#include "packet.h" #include "send.h" #include "sysfs.h" #include "translation-table.h" +/** + * batadv_skb_head_push() - Increase header size and move (push) head pointer + * @skb: packet buffer which should be modified + * @len: number of bytes to add + * + * Return: 0 on success or negative error number in case of failure + */ int batadv_skb_head_push(struct sk_buff *skb, unsigned int len) { int result; @@ -96,7 +104,7 @@ static int batadv_interface_release(struct net_device *dev) } /** - * batadv_sum_counter - Sum the cpu-local counters for index 'idx' + * batadv_sum_counter() - Sum the cpu-local counters for index 'idx' * @bat_priv: the bat priv with all the soft interface information * @idx: index of counter to sum up * @@ -169,7 +177,7 @@ static int batadv_interface_change_mtu(struct net_device *dev, int new_mtu) } /** - * batadv_interface_set_rx_mode - set the rx mode of a device + * batadv_interface_set_rx_mode() - set the rx mode of a device * @dev: registered network device to modify * * We do not actually need to set any rx filters for the virtual batman @@ -389,7 +397,7 @@ end: } /** - * batadv_interface_rx - receive ethernet frame on local batman-adv interface + * batadv_interface_rx() - receive ethernet frame on local batman-adv interface * @soft_iface: local interface which will receive the ethernet frame * @skb: ethernet frame for @soft_iface * @hdr_size: size of already parsed batman-adv header @@ -501,8 +509,8 @@ out: } /** - * batadv_softif_vlan_release - release vlan from lists and queue for free after - * rcu grace period + * batadv_softif_vlan_release() - release vlan from lists and queue for free + * after rcu grace period * @ref: kref pointer of the vlan object */ static void batadv_softif_vlan_release(struct kref *ref) @@ -519,7 +527,7 @@ static void batadv_softif_vlan_release(struct kref *ref) } /** - * batadv_softif_vlan_put - decrease the vlan object refcounter and + * batadv_softif_vlan_put() - decrease the vlan object refcounter and * possibly release it * @vlan: the vlan object to release */ @@ -532,7 +540,7 @@ void batadv_softif_vlan_put(struct batadv_softif_vlan *vlan) } /** - * batadv_softif_vlan_get - get the vlan object for a specific vid + * batadv_softif_vlan_get() - get the vlan object for a specific vid * @bat_priv: the bat priv with all the soft interface information * @vid: the identifier of the vlan object to retrieve * @@ -561,7 +569,7 @@ struct batadv_softif_vlan *batadv_softif_vlan_get(struct batadv_priv *bat_priv, } /** - * batadv_softif_create_vlan - allocate the needed resources for a new vlan + * batadv_softif_create_vlan() - allocate the needed resources for a new vlan * @bat_priv: the bat priv with all the soft interface information * @vid: the VLAN identifier * @@ -613,7 +621,7 @@ int batadv_softif_create_vlan(struct batadv_priv *bat_priv, unsigned short vid) } /** - * batadv_softif_destroy_vlan - remove and destroy a softif_vlan object + * batadv_softif_destroy_vlan() - remove and destroy a softif_vlan object * @bat_priv: the bat priv with all the soft interface information * @vlan: the object to remove */ @@ -631,7 +639,7 @@ static void batadv_softif_destroy_vlan(struct batadv_priv *bat_priv, } /** - * batadv_interface_add_vid - ndo_add_vid API implementation + * batadv_interface_add_vid() - ndo_add_vid API implementation * @dev: the netdev of the mesh interface * @proto: protocol of the the vlan id * @vid: identifier of the new vlan @@ -689,7 +697,7 @@ static int batadv_interface_add_vid(struct net_device *dev, __be16 proto, } /** - * batadv_interface_kill_vid - ndo_kill_vid API implementation + * batadv_interface_kill_vid() - ndo_kill_vid API implementation * @dev: the netdev of the mesh interface * @proto: protocol of the the vlan id * @vid: identifier of the deleted vlan @@ -732,7 +740,7 @@ static struct lock_class_key batadv_netdev_xmit_lock_key; static struct lock_class_key batadv_netdev_addr_lock_key; /** - * batadv_set_lockdep_class_one - Set lockdep class for a single tx queue + * batadv_set_lockdep_class_one() - Set lockdep class for a single tx queue * @dev: device which owns the tx queue * @txq: tx queue to modify * @_unused: always NULL @@ -745,7 +753,7 @@ static void batadv_set_lockdep_class_one(struct net_device *dev, } /** - * batadv_set_lockdep_class - Set txq and addr_list lockdep class + * batadv_set_lockdep_class() - Set txq and addr_list lockdep class * @dev: network device to modify */ static void batadv_set_lockdep_class(struct net_device *dev) @@ -755,7 +763,7 @@ static void batadv_set_lockdep_class(struct net_device *dev) } /** - * batadv_softif_init_late - late stage initialization of soft interface + * batadv_softif_init_late() - late stage initialization of soft interface * @dev: registered network device to modify * * Return: error code on failures @@ -860,7 +868,7 @@ free_bat_counters: } /** - * batadv_softif_slave_add - Add a slave interface to a batadv_soft_interface + * batadv_softif_slave_add() - Add a slave interface to a batadv_soft_interface * @dev: batadv_soft_interface used as master interface * @slave_dev: net_device which should become the slave interface * @extack: extended ACK report struct @@ -888,7 +896,7 @@ out: } /** - * batadv_softif_slave_del - Delete a slave iface from a batadv_soft_interface + * batadv_softif_slave_del() - Delete a slave iface from a batadv_soft_interface * @dev: batadv_soft_interface used as master interface * @slave_dev: net_device which should be removed from the master interface * @@ -1023,7 +1031,7 @@ static const struct ethtool_ops batadv_ethtool_ops = { }; /** - * batadv_softif_free - Deconstructor of batadv_soft_interface + * batadv_softif_free() - Deconstructor of batadv_soft_interface * @dev: Device to cleanup and remove */ static void batadv_softif_free(struct net_device *dev) @@ -1039,7 +1047,7 @@ static void batadv_softif_free(struct net_device *dev) } /** - * batadv_softif_init_early - early stage initialization of soft interface + * batadv_softif_init_early() - early stage initialization of soft interface * @dev: registered network device to modify */ static void batadv_softif_init_early(struct net_device *dev) @@ -1063,6 +1071,13 @@ static void batadv_softif_init_early(struct net_device *dev) dev->ethtool_ops = &batadv_ethtool_ops; } +/** + * batadv_softif_create() - Create and register soft interface + * @net: the applicable net namespace + * @name: name of the new soft interface + * + * Return: newly allocated soft_interface, NULL on errors + */ struct net_device *batadv_softif_create(struct net *net, const char *name) { struct net_device *soft_iface; @@ -1089,7 +1104,7 @@ struct net_device *batadv_softif_create(struct net *net, const char *name) } /** - * batadv_softif_destroy_sysfs - deletion of batadv_soft_interface via sysfs + * batadv_softif_destroy_sysfs() - deletion of batadv_soft_interface via sysfs * @soft_iface: the to-be-removed batman-adv interface */ void batadv_softif_destroy_sysfs(struct net_device *soft_iface) @@ -1111,7 +1126,8 @@ void batadv_softif_destroy_sysfs(struct net_device *soft_iface) } /** - * batadv_softif_destroy_netlink - deletion of batadv_soft_interface via netlink + * batadv_softif_destroy_netlink() - deletion of batadv_soft_interface via + * netlink * @soft_iface: the to-be-removed batman-adv interface * @head: list pointer */ @@ -1139,6 +1155,12 @@ static void batadv_softif_destroy_netlink(struct net_device *soft_iface, unregister_netdevice_queue(soft_iface, head); } +/** + * batadv_softif_is_valid() - Check whether device is a batadv soft interface + * @net_dev: device which should be checked + * + * Return: true when net_dev is a batman-adv interface, false otherwise + */ bool batadv_softif_is_valid(const struct net_device *net_dev) { if (net_dev->netdev_ops->ndo_start_xmit == batadv_interface_tx) diff --git a/net/batman-adv/soft-interface.h b/net/batman-adv/soft-interface.h index 639c3abb214a..075c5b5b2ce1 100644 --- a/net/batman-adv/soft-interface.h +++ b/net/batman-adv/soft-interface.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors: * * Marek Lindner diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c index aa187fd42475..c1578fa0b952 100644 --- a/net/batman-adv/sysfs.c +++ b/net/batman-adv/sysfs.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* Copyright (C) 2010-2017 B.A.T.M.A.N. contributors: * * Marek Lindner @@ -22,10 +23,11 @@ #include <linux/compiler.h> #include <linux/device.h> #include <linux/errno.h> -#include <linux/fs.h> +#include <linux/gfp.h> #include <linux/if.h> #include <linux/if_vlan.h> #include <linux/kernel.h> +#include <linux/kobject.h> #include <linux/kref.h> #include <linux/netdevice.h> #include <linux/printk.h> @@ -37,6 +39,7 @@ #include <linux/string.h> #include <linux/stringify.h> #include <linux/workqueue.h> +#include <uapi/linux/batadv_packet.h> #include "bridge_loop_avoidance.h" #include "distributed-arp-table.h" @@ -45,7 +48,6 @@ #include "hard-interface.h" #include "log.h" #include "network-coding.h" -#include "packet.h" #include "soft-interface.h" static struct net_device *batadv_kobj_to_netdev(struct kobject *obj) @@ -63,7 +65,7 @@ static struct batadv_priv *batadv_kobj_to_batpriv(struct kobject *obj) } /** - * batadv_vlan_kobj_to_batpriv - convert a vlan kobj in the associated batpriv + * batadv_vlan_kobj_to_batpriv() - convert a vlan kobj in the associated batpriv * @obj: kobject to covert * * Return: the associated batadv_priv struct. @@ -83,7 +85,7 @@ static struct batadv_priv *batadv_vlan_kobj_to_batpriv(struct kobject *obj) } /** - * batadv_kobj_to_vlan - convert a kobj in the associated softif_vlan struct + * batadv_kobj_to_vlan() - convert a kobj in the associated softif_vlan struct * @bat_priv: the bat priv with all the soft interface information * @obj: kobject to covert * @@ -598,7 +600,7 @@ static ssize_t batadv_store_gw_bwidth(struct kobject *kobj, } /** - * batadv_show_isolation_mark - print the current isolation mark/mask + * batadv_show_isolation_mark() - print the current isolation mark/mask * @kobj: kobject representing the private mesh sysfs directory * @attr: the batman-adv attribute the user is interacting with * @buff: the buffer that will contain the data to send back to the user @@ -616,8 +618,8 @@ static ssize_t batadv_show_isolation_mark(struct kobject *kobj, } /** - * batadv_store_isolation_mark - parse and store the isolation mark/mask entered - * by the user + * batadv_store_isolation_mark() - parse and store the isolation mark/mask + * entered by the user * @kobj: kobject representing the private mesh sysfs directory * @attr: the batman-adv attribute the user is interacting with * @buff: the buffer containing the user data @@ -733,6 +735,12 @@ static struct batadv_attribute *batadv_vlan_attrs[] = { NULL, }; +/** + * batadv_sysfs_add_meshif() - Add soft interface specific sysfs entries + * @dev: netdev struct of the soft interface + * + * Return: 0 on success or negative error number in case of failure + */ int batadv_sysfs_add_meshif(struct net_device *dev) { struct kobject *batif_kobject = &dev->dev.kobj; @@ -773,6 +781,10 @@ out: return -ENOMEM; } +/** + * batadv_sysfs_del_meshif() - Remove soft interface specific sysfs entries + * @dev: netdev struct of the soft interface + */ void batadv_sysfs_del_meshif(struct net_device *dev) { struct batadv_priv *bat_priv = netdev_priv(dev); @@ -788,7 +800,7 @@ void batadv_sysfs_del_meshif(struct net_device *dev) } /** - * batadv_sysfs_add_vlan - add all the needed sysfs objects for the new vlan + * batadv_sysfs_add_vlan() - add all the needed sysfs objects for the new vlan * @dev: netdev of the mesh interface * @vlan: private data of the newly added VLAN interface * @@ -849,7 +861,7 @@ out: } /** - * batadv_sysfs_del_vlan - remove all the sysfs objects for a given VLAN + * batadv_sysfs_del_vlan() - remove all the sysfs objects for a given VLAN * @bat_priv: the bat priv with all the soft interface information * @vlan: the private data of the VLAN to destroy */ @@ -894,7 +906,7 @@ static ssize_t batadv_show_mesh_iface(struct kobject *kobj, } /** - * batadv_store_mesh_iface_finish - store new hardif mesh_iface state + * batadv_store_mesh_iface_finish() - store new hardif mesh_iface state * @net_dev: netdevice to add/remove to/from batman-adv soft-interface * @ifname: name of soft-interface to modify * @@ -947,7 +959,7 @@ out: } /** - * batadv_store_mesh_iface_work - store new hardif mesh_iface state + * batadv_store_mesh_iface_work() - store new hardif mesh_iface state * @work: work queue item * * Changes the parts of the hard+soft interface which can not be modified under @@ -1043,7 +1055,7 @@ static ssize_t batadv_show_iface_status(struct kobject *kobj, #ifdef CONFIG_BATMAN_ADV_BATMAN_V /** - * batadv_store_throughput_override - parse and store throughput override + * batadv_store_throughput_override() - parse and store throughput override * entered by the user * @kobj: kobject representing the private mesh sysfs directory * @attr: the batman-adv attribute the user is interacting with @@ -1130,6 +1142,13 @@ static struct batadv_attribute *batadv_batman_attrs[] = { NULL, }; +/** + * batadv_sysfs_add_hardif() - Add hard interface specific sysfs entries + * @hardif_obj: address where to store the pointer to new sysfs folder + * @dev: netdev struct of the hard interface + * + * Return: 0 on success or negative error number in case of failure + */ int batadv_sysfs_add_hardif(struct kobject **hardif_obj, struct net_device *dev) { struct kobject *hardif_kobject = &dev->dev.kobj; @@ -1164,6 +1183,11 @@ out: return -ENOMEM; } +/** + * batadv_sysfs_del_hardif() - Remove hard interface specific sysfs entries + * @hardif_obj: address to the pointer to which stores batman-adv sysfs folder + * of the hard interface + */ void batadv_sysfs_del_hardif(struct kobject **hardif_obj) { kobject_uevent(*hardif_obj, KOBJ_REMOVE); @@ -1172,6 +1196,16 @@ void batadv_sysfs_del_hardif(struct kobject **hardif_obj) *hardif_obj = NULL; } +/** + * batadv_throw_uevent() - Send an uevent with batman-adv specific env data + * @bat_priv: the bat priv with all the soft interface information + * @type: subsystem type of event. Stored in uevent's BATTYPE + * @action: action type of event. Stored in uevent's BATACTION + * @data: string with additional information to the event (ignored for + * BATADV_UEV_DEL). Stored in uevent's BATDATA + * + * Return: 0 on success or negative error number in case of failure + */ int batadv_throw_uevent(struct batadv_priv *bat_priv, enum batadv_uev_type type, enum batadv_uev_action action, const char *data) { diff --git a/net/batman-adv/sysfs.h b/net/batman-adv/sysfs.h index e487412e256b..bbeee61221fa 100644 --- a/net/batman-adv/sysfs.h +++ b/net/batman-adv/sysfs.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* Copyright (C) 2010-2017 B.A.T.M.A.N. contributors: * * Marek Lindner @@ -35,10 +36,23 @@ struct net_device; */ #define BATADV_SYSFS_VLAN_SUBDIR_PREFIX "vlan" +/** + * struct batadv_attribute - sysfs export helper for batman-adv attributes + */ struct batadv_attribute { + /** @attr: sysfs attribute file */ struct attribute attr; + + /** + * @show: function to export the current attribute's content to sysfs + */ ssize_t (*show)(struct kobject *kobj, struct attribute *attr, char *buf); + + /** + * @store: function to load new value from character buffer and save it + * in batman-adv attribute + */ ssize_t (*store)(struct kobject *kobj, struct attribute *attr, char *buf, size_t count); }; diff --git a/net/batman-adv/tp_meter.c b/net/batman-adv/tp_meter.c index ebc4e2241c77..8b576712d0c1 100644 --- a/net/batman-adv/tp_meter.c +++ b/net/batman-adv/tp_meter.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* Copyright (C) 2012-2017 B.A.T.M.A.N. contributors: * * Edo Monticelli, Antonio Quartulli @@ -19,13 +20,13 @@ #include "main.h" #include <linux/atomic.h> -#include <linux/bug.h> +#include <linux/build_bug.h> #include <linux/byteorder/generic.h> #include <linux/cache.h> #include <linux/compiler.h> #include <linux/err.h> #include <linux/etherdevice.h> -#include <linux/fs.h> +#include <linux/gfp.h> #include <linux/if_ether.h> #include <linux/init.h> #include <linux/jiffies.h> @@ -48,13 +49,13 @@ #include <linux/timer.h> #include <linux/wait.h> #include <linux/workqueue.h> +#include <uapi/linux/batadv_packet.h> #include <uapi/linux/batman_adv.h> #include "hard-interface.h" #include "log.h" #include "netlink.h" #include "originator.h" -#include "packet.h" #include "send.h" /** @@ -97,7 +98,7 @@ static u8 batadv_tp_prerandom[4096] __read_mostly; /** - * batadv_tp_session_cookie - generate session cookie based on session ids + * batadv_tp_session_cookie() - generate session cookie based on session ids * @session: TP session identifier * @icmp_uid: icmp pseudo uid of the tp session * @@ -115,7 +116,7 @@ static u32 batadv_tp_session_cookie(const u8 session[2], u8 icmp_uid) } /** - * batadv_tp_cwnd - compute the new cwnd size + * batadv_tp_cwnd() - compute the new cwnd size * @base: base cwnd size value * @increment: the value to add to base to get the new size * @min: minumim cwnd value (usually MSS) @@ -140,7 +141,7 @@ static u32 batadv_tp_cwnd(u32 base, u32 increment, u32 min) } /** - * batadv_tp_updated_cwnd - update the Congestion Windows + * batadv_tp_updated_cwnd() - update the Congestion Windows * @tp_vars: the private data of the current TP meter session * @mss: maximum segment size of transmission * @@ -176,7 +177,7 @@ static void batadv_tp_update_cwnd(struct batadv_tp_vars *tp_vars, u32 mss) } /** - * batadv_tp_update_rto - calculate new retransmission timeout + * batadv_tp_update_rto() - calculate new retransmission timeout * @tp_vars: the private data of the current TP meter session * @new_rtt: new roundtrip time in msec */ @@ -212,7 +213,7 @@ static void batadv_tp_update_rto(struct batadv_tp_vars *tp_vars, } /** - * batadv_tp_batctl_notify - send client status result to client + * batadv_tp_batctl_notify() - send client status result to client * @reason: reason for tp meter session stop * @dst: destination of tp_meter session * @bat_priv: the bat priv with all the soft interface information @@ -244,7 +245,7 @@ static void batadv_tp_batctl_notify(enum batadv_tp_meter_reason reason, } /** - * batadv_tp_batctl_error_notify - send client error result to client + * batadv_tp_batctl_error_notify() - send client error result to client * @reason: reason for tp meter session stop * @dst: destination of tp_meter session * @bat_priv: the bat priv with all the soft interface information @@ -259,7 +260,7 @@ static void batadv_tp_batctl_error_notify(enum batadv_tp_meter_reason reason, } /** - * batadv_tp_list_find - find a tp_vars object in the global list + * batadv_tp_list_find() - find a tp_vars object in the global list * @bat_priv: the bat priv with all the soft interface information * @dst: the other endpoint MAC address to look for * @@ -294,7 +295,8 @@ static struct batadv_tp_vars *batadv_tp_list_find(struct batadv_priv *bat_priv, } /** - * batadv_tp_list_find_session - find tp_vars session object in the global list + * batadv_tp_list_find_session() - find tp_vars session object in the global + * list * @bat_priv: the bat priv with all the soft interface information * @dst: the other endpoint MAC address to look for * @session: session identifier @@ -335,7 +337,7 @@ batadv_tp_list_find_session(struct batadv_priv *bat_priv, const u8 *dst, } /** - * batadv_tp_vars_release - release batadv_tp_vars from lists and queue for + * batadv_tp_vars_release() - release batadv_tp_vars from lists and queue for * free after rcu grace period * @ref: kref pointer of the batadv_tp_vars */ @@ -360,7 +362,7 @@ static void batadv_tp_vars_release(struct kref *ref) } /** - * batadv_tp_vars_put - decrement the batadv_tp_vars refcounter and possibly + * batadv_tp_vars_put() - decrement the batadv_tp_vars refcounter and possibly * release it * @tp_vars: the private data of the current TP meter session to be free'd */ @@ -370,7 +372,7 @@ static void batadv_tp_vars_put(struct batadv_tp_vars *tp_vars) } /** - * batadv_tp_sender_cleanup - cleanup sender data and drop and timer + * batadv_tp_sender_cleanup() - cleanup sender data and drop and timer * @bat_priv: the bat priv with all the soft interface information * @tp_vars: the private data of the current TP meter session to cleanup */ @@ -400,7 +402,7 @@ static void batadv_tp_sender_cleanup(struct batadv_priv *bat_priv, } /** - * batadv_tp_sender_end - print info about ended session and inform client + * batadv_tp_sender_end() - print info about ended session and inform client * @bat_priv: the bat priv with all the soft interface information * @tp_vars: the private data of the current TP meter session */ @@ -433,7 +435,7 @@ static void batadv_tp_sender_end(struct batadv_priv *bat_priv, } /** - * batadv_tp_sender_shutdown - let sender thread/timer stop gracefully + * batadv_tp_sender_shutdown() - let sender thread/timer stop gracefully * @tp_vars: the private data of the current TP meter session * @reason: reason for tp meter session stop */ @@ -447,7 +449,7 @@ static void batadv_tp_sender_shutdown(struct batadv_tp_vars *tp_vars, } /** - * batadv_tp_sender_finish - stop sender session after test_length was reached + * batadv_tp_sender_finish() - stop sender session after test_length was reached * @work: delayed work reference of the related tp_vars */ static void batadv_tp_sender_finish(struct work_struct *work) @@ -463,7 +465,7 @@ static void batadv_tp_sender_finish(struct work_struct *work) } /** - * batadv_tp_reset_sender_timer - reschedule the sender timer + * batadv_tp_reset_sender_timer() - reschedule the sender timer * @tp_vars: the private TP meter data for this session * * Reschedule the timer using tp_vars->rto as delay @@ -481,7 +483,7 @@ static void batadv_tp_reset_sender_timer(struct batadv_tp_vars *tp_vars) } /** - * batadv_tp_sender_timeout - timer that fires in case of packet loss + * batadv_tp_sender_timeout() - timer that fires in case of packet loss * @t: address to timer_list inside tp_vars * * If fired it means that there was packet loss. @@ -531,7 +533,7 @@ static void batadv_tp_sender_timeout(struct timer_list *t) } /** - * batadv_tp_fill_prerandom - Fill buffer with prefetched random bytes + * batadv_tp_fill_prerandom() - Fill buffer with prefetched random bytes * @tp_vars: the private TP meter data for this session * @buf: Buffer to fill with bytes * @nbytes: amount of pseudorandom bytes @@ -563,7 +565,7 @@ static void batadv_tp_fill_prerandom(struct batadv_tp_vars *tp_vars, } /** - * batadv_tp_send_msg - send a single message + * batadv_tp_send_msg() - send a single message * @tp_vars: the private TP meter data for this session * @src: source mac address * @orig_node: the originator of the destination @@ -623,7 +625,7 @@ static int batadv_tp_send_msg(struct batadv_tp_vars *tp_vars, const u8 *src, } /** - * batadv_tp_recv_ack - ACK receiving function + * batadv_tp_recv_ack() - ACK receiving function * @bat_priv: the bat priv with all the soft interface information * @skb: the buffer containing the received packet * @@ -765,7 +767,7 @@ out: } /** - * batadv_tp_avail - check if congestion window is not full + * batadv_tp_avail() - check if congestion window is not full * @tp_vars: the private data of the current TP meter session * @payload_len: size of the payload of a single message * @@ -783,7 +785,7 @@ static bool batadv_tp_avail(struct batadv_tp_vars *tp_vars, } /** - * batadv_tp_wait_available - wait until congestion window becomes free or + * batadv_tp_wait_available() - wait until congestion window becomes free or * timeout is reached * @tp_vars: the private data of the current TP meter session * @plen: size of the payload of a single message @@ -805,7 +807,7 @@ static int batadv_tp_wait_available(struct batadv_tp_vars *tp_vars, size_t plen) } /** - * batadv_tp_send - main sending thread of a tp meter session + * batadv_tp_send() - main sending thread of a tp meter session * @arg: address of the related tp_vars * * Return: nothing, this function never returns @@ -904,7 +906,8 @@ out: } /** - * batadv_tp_start_kthread - start new thread which manages the tp meter sender + * batadv_tp_start_kthread() - start new thread which manages the tp meter + * sender * @tp_vars: the private data of the current TP meter session */ static void batadv_tp_start_kthread(struct batadv_tp_vars *tp_vars) @@ -935,7 +938,7 @@ static void batadv_tp_start_kthread(struct batadv_tp_vars *tp_vars) } /** - * batadv_tp_start - start a new tp meter session + * batadv_tp_start() - start a new tp meter session * @bat_priv: the bat priv with all the soft interface information * @dst: the receiver MAC address * @test_length: test length in milliseconds @@ -1060,7 +1063,7 @@ void batadv_tp_start(struct batadv_priv *bat_priv, const u8 *dst, } /** - * batadv_tp_stop - stop currently running tp meter session + * batadv_tp_stop() - stop currently running tp meter session * @bat_priv: the bat priv with all the soft interface information * @dst: the receiver MAC address * @return_value: reason for tp meter session stop @@ -1092,7 +1095,7 @@ out: } /** - * batadv_tp_reset_receiver_timer - reset the receiver shutdown timer + * batadv_tp_reset_receiver_timer() - reset the receiver shutdown timer * @tp_vars: the private data of the current TP meter session * * start the receiver shutdown timer or reset it if already started @@ -1104,7 +1107,7 @@ static void batadv_tp_reset_receiver_timer(struct batadv_tp_vars *tp_vars) } /** - * batadv_tp_receiver_shutdown - stop a tp meter receiver when timeout is + * batadv_tp_receiver_shutdown() - stop a tp meter receiver when timeout is * reached without received ack * @t: address to timer_list inside tp_vars */ @@ -1149,7 +1152,7 @@ static void batadv_tp_receiver_shutdown(struct timer_list *t) } /** - * batadv_tp_send_ack - send an ACK packet + * batadv_tp_send_ack() - send an ACK packet * @bat_priv: the bat priv with all the soft interface information * @dst: the mac address of the destination originator * @seq: the sequence number to ACK @@ -1221,7 +1224,7 @@ out: } /** - * batadv_tp_handle_out_of_order - store an out of order packet + * batadv_tp_handle_out_of_order() - store an out of order packet * @tp_vars: the private data of the current TP meter session * @skb: the buffer containing the received packet * @@ -1297,7 +1300,7 @@ out: } /** - * batadv_tp_ack_unordered - update number received bytes in current stream + * batadv_tp_ack_unordered() - update number received bytes in current stream * without gaps * @tp_vars: the private data of the current TP meter session */ @@ -1330,7 +1333,7 @@ static void batadv_tp_ack_unordered(struct batadv_tp_vars *tp_vars) } /** - * batadv_tp_init_recv - return matching or create new receiver tp_vars + * batadv_tp_init_recv() - return matching or create new receiver tp_vars * @bat_priv: the bat priv with all the soft interface information * @icmp: received icmp tp msg * @@ -1383,7 +1386,7 @@ out_unlock: } /** - * batadv_tp_recv_msg - process a single data message + * batadv_tp_recv_msg() - process a single data message * @bat_priv: the bat priv with all the soft interface information * @skb: the buffer containing the received packet * @@ -1468,7 +1471,7 @@ out: } /** - * batadv_tp_meter_recv - main TP Meter receiving function + * batadv_tp_meter_recv() - main TP Meter receiving function * @bat_priv: the bat priv with all the soft interface information * @skb: the buffer containing the received packet */ @@ -1494,7 +1497,7 @@ void batadv_tp_meter_recv(struct batadv_priv *bat_priv, struct sk_buff *skb) } /** - * batadv_tp_meter_init - initialize global tp_meter structures + * batadv_tp_meter_init() - initialize global tp_meter structures */ void __init batadv_tp_meter_init(void) { diff --git a/net/batman-adv/tp_meter.h b/net/batman-adv/tp_meter.h index a8ada5c123bd..c8b8f2cb2c2b 100644 --- a/net/batman-adv/tp_meter.h +++ b/net/batman-adv/tp_meter.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* Copyright (C) 2012-2017 B.A.T.M.A.N. contributors: * * Edo Monticelli, Antonio Quartulli diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index 8a3ce79b1307..7550a9ccd695 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich, Antonio Quartulli @@ -20,14 +21,14 @@ #include <linux/atomic.h> #include <linux/bitops.h> -#include <linux/bug.h> +#include <linux/build_bug.h> #include <linux/byteorder/generic.h> #include <linux/cache.h> #include <linux/compiler.h> #include <linux/crc32c.h> #include <linux/errno.h> #include <linux/etherdevice.h> -#include <linux/fs.h> +#include <linux/gfp.h> #include <linux/if_ether.h> #include <linux/init.h> #include <linux/jhash.h> @@ -36,6 +37,7 @@ #include <linux/kref.h> #include <linux/list.h> #include <linux/lockdep.h> +#include <linux/net.h> #include <linux/netdevice.h> #include <linux/netlink.h> #include <linux/rculist.h> @@ -50,6 +52,7 @@ #include <net/genetlink.h> #include <net/netlink.h> #include <net/sock.h> +#include <uapi/linux/batadv_packet.h> #include <uapi/linux/batman_adv.h> #include "bridge_loop_avoidance.h" @@ -58,7 +61,6 @@ #include "log.h" #include "netlink.h" #include "originator.h" -#include "packet.h" #include "soft-interface.h" #include "tvlv.h" @@ -86,7 +88,7 @@ static void batadv_tt_global_del(struct batadv_priv *bat_priv, bool roaming); /** - * batadv_compare_tt - check if two TT entries are the same + * batadv_compare_tt() - check if two TT entries are the same * @node: the list element pointer of the first TT entry * @data2: pointer to the tt_common_entry of the second TT entry * @@ -105,7 +107,7 @@ static bool batadv_compare_tt(const struct hlist_node *node, const void *data2) } /** - * batadv_choose_tt - return the index of the tt entry in the hash table + * batadv_choose_tt() - return the index of the tt entry in the hash table * @data: pointer to the tt_common_entry object to map * @size: the size of the hash table * @@ -125,7 +127,7 @@ static inline u32 batadv_choose_tt(const void *data, u32 size) } /** - * batadv_tt_hash_find - look for a client in the given hash table + * batadv_tt_hash_find() - look for a client in the given hash table * @hash: the hash table to search * @addr: the mac address of the client to look for * @vid: VLAN identifier @@ -170,7 +172,7 @@ batadv_tt_hash_find(struct batadv_hashtable *hash, const u8 *addr, } /** - * batadv_tt_local_hash_find - search the local table for a given client + * batadv_tt_local_hash_find() - search the local table for a given client * @bat_priv: the bat priv with all the soft interface information * @addr: the mac address of the client to look for * @vid: VLAN identifier @@ -195,7 +197,7 @@ batadv_tt_local_hash_find(struct batadv_priv *bat_priv, const u8 *addr, } /** - * batadv_tt_global_hash_find - search the global table for a given client + * batadv_tt_global_hash_find() - search the global table for a given client * @bat_priv: the bat priv with all the soft interface information * @addr: the mac address of the client to look for * @vid: VLAN identifier @@ -220,7 +222,7 @@ batadv_tt_global_hash_find(struct batadv_priv *bat_priv, const u8 *addr, } /** - * batadv_tt_local_entry_free_rcu - free the tt_local_entry + * batadv_tt_local_entry_free_rcu() - free the tt_local_entry * @rcu: rcu pointer of the tt_local_entry */ static void batadv_tt_local_entry_free_rcu(struct rcu_head *rcu) @@ -234,7 +236,7 @@ static void batadv_tt_local_entry_free_rcu(struct rcu_head *rcu) } /** - * batadv_tt_local_entry_release - release tt_local_entry from lists and queue + * batadv_tt_local_entry_release() - release tt_local_entry from lists and queue * for free after rcu grace period * @ref: kref pointer of the nc_node */ @@ -251,7 +253,7 @@ static void batadv_tt_local_entry_release(struct kref *ref) } /** - * batadv_tt_local_entry_put - decrement the tt_local_entry refcounter and + * batadv_tt_local_entry_put() - decrement the tt_local_entry refcounter and * possibly release it * @tt_local_entry: tt_local_entry to be free'd */ @@ -263,7 +265,7 @@ batadv_tt_local_entry_put(struct batadv_tt_local_entry *tt_local_entry) } /** - * batadv_tt_global_entry_free_rcu - free the tt_global_entry + * batadv_tt_global_entry_free_rcu() - free the tt_global_entry * @rcu: rcu pointer of the tt_global_entry */ static void batadv_tt_global_entry_free_rcu(struct rcu_head *rcu) @@ -277,8 +279,8 @@ static void batadv_tt_global_entry_free_rcu(struct rcu_head *rcu) } /** - * batadv_tt_global_entry_release - release tt_global_entry from lists and queue - * for free after rcu grace period + * batadv_tt_global_entry_release() - release tt_global_entry from lists and + * queue for free after rcu grace period * @ref: kref pointer of the nc_node */ static void batadv_tt_global_entry_release(struct kref *ref) @@ -294,7 +296,7 @@ static void batadv_tt_global_entry_release(struct kref *ref) } /** - * batadv_tt_global_entry_put - decrement the tt_global_entry refcounter and + * batadv_tt_global_entry_put() - decrement the tt_global_entry refcounter and * possibly release it * @tt_global_entry: tt_global_entry to be free'd */ @@ -306,7 +308,7 @@ batadv_tt_global_entry_put(struct batadv_tt_global_entry *tt_global_entry) } /** - * batadv_tt_global_hash_count - count the number of orig entries + * batadv_tt_global_hash_count() - count the number of orig entries * @bat_priv: the bat priv with all the soft interface information * @addr: the mac address of the client to count entries for * @vid: VLAN identifier @@ -331,8 +333,8 @@ int batadv_tt_global_hash_count(struct batadv_priv *bat_priv, } /** - * batadv_tt_local_size_mod - change the size by v of the local table identified - * by vid + * batadv_tt_local_size_mod() - change the size by v of the local table + * identified by vid * @bat_priv: the bat priv with all the soft interface information * @vid: the VLAN identifier of the sub-table to change * @v: the amount to sum to the local table size @@ -352,8 +354,8 @@ static void batadv_tt_local_size_mod(struct batadv_priv *bat_priv, } /** - * batadv_tt_local_size_inc - increase by one the local table size for the given - * vid + * batadv_tt_local_size_inc() - increase by one the local table size for the + * given vid * @bat_priv: the bat priv with all the soft interface information * @vid: the VLAN identifier */ @@ -364,8 +366,8 @@ static void batadv_tt_local_size_inc(struct batadv_priv *bat_priv, } /** - * batadv_tt_local_size_dec - decrease by one the local table size for the given - * vid + * batadv_tt_local_size_dec() - decrease by one the local table size for the + * given vid * @bat_priv: the bat priv with all the soft interface information * @vid: the VLAN identifier */ @@ -376,7 +378,7 @@ static void batadv_tt_local_size_dec(struct batadv_priv *bat_priv, } /** - * batadv_tt_global_size_mod - change the size by v of the global table + * batadv_tt_global_size_mod() - change the size by v of the global table * for orig_node identified by vid * @orig_node: the originator for which the table has to be modified * @vid: the VLAN identifier @@ -404,7 +406,7 @@ static void batadv_tt_global_size_mod(struct batadv_orig_node *orig_node, } /** - * batadv_tt_global_size_inc - increase by one the global table size for the + * batadv_tt_global_size_inc() - increase by one the global table size for the * given vid * @orig_node: the originator which global table size has to be decreased * @vid: the vlan identifier @@ -416,7 +418,7 @@ static void batadv_tt_global_size_inc(struct batadv_orig_node *orig_node, } /** - * batadv_tt_global_size_dec - decrease by one the global table size for the + * batadv_tt_global_size_dec() - decrease by one the global table size for the * given vid * @orig_node: the originator which global table size has to be decreased * @vid: the vlan identifier @@ -428,7 +430,7 @@ static void batadv_tt_global_size_dec(struct batadv_orig_node *orig_node, } /** - * batadv_tt_orig_list_entry_free_rcu - free the orig_entry + * batadv_tt_orig_list_entry_free_rcu() - free the orig_entry * @rcu: rcu pointer of the orig_entry */ static void batadv_tt_orig_list_entry_free_rcu(struct rcu_head *rcu) @@ -441,7 +443,7 @@ static void batadv_tt_orig_list_entry_free_rcu(struct rcu_head *rcu) } /** - * batadv_tt_orig_list_entry_release - release tt orig entry from lists and + * batadv_tt_orig_list_entry_release() - release tt orig entry from lists and * queue for free after rcu grace period * @ref: kref pointer of the tt orig entry */ @@ -457,7 +459,7 @@ static void batadv_tt_orig_list_entry_release(struct kref *ref) } /** - * batadv_tt_orig_list_entry_put - decrement the tt orig entry refcounter and + * batadv_tt_orig_list_entry_put() - decrement the tt orig entry refcounter and * possibly release it * @orig_entry: tt orig entry to be free'd */ @@ -468,7 +470,7 @@ batadv_tt_orig_list_entry_put(struct batadv_tt_orig_list_entry *orig_entry) } /** - * batadv_tt_local_event - store a local TT event (ADD/DEL) + * batadv_tt_local_event() - store a local TT event (ADD/DEL) * @bat_priv: the bat priv with all the soft interface information * @tt_local_entry: the TT entry involved in the event * @event_flags: flags to store in the event structure @@ -543,7 +545,7 @@ unlock: } /** - * batadv_tt_len - compute length in bytes of given number of tt changes + * batadv_tt_len() - compute length in bytes of given number of tt changes * @changes_num: number of tt changes * * Return: computed length in bytes. @@ -554,7 +556,7 @@ static int batadv_tt_len(int changes_num) } /** - * batadv_tt_entries - compute the number of entries fitting in tt_len bytes + * batadv_tt_entries() - compute the number of entries fitting in tt_len bytes * @tt_len: available space * * Return: the number of entries. @@ -565,8 +567,8 @@ static u16 batadv_tt_entries(u16 tt_len) } /** - * batadv_tt_local_table_transmit_size - calculates the local translation table - * size when transmitted over the air + * batadv_tt_local_table_transmit_size() - calculates the local translation + * table size when transmitted over the air * @bat_priv: the bat priv with all the soft interface information * * Return: local translation table size in bytes. @@ -625,7 +627,7 @@ static void batadv_tt_global_free(struct batadv_priv *bat_priv, } /** - * batadv_tt_local_add - add a new client to the local table or update an + * batadv_tt_local_add() - add a new client to the local table or update an * existing client * @soft_iface: netdev struct of the mesh interface * @addr: the mac address of the client to add @@ -830,7 +832,7 @@ out: } /** - * batadv_tt_prepare_tvlv_global_data - prepare the TVLV TT header to send + * batadv_tt_prepare_tvlv_global_data() - prepare the TVLV TT header to send * within a TT Response directed to another node * @orig_node: originator for which the TT data has to be prepared * @tt_data: uninitialised pointer to the address of the TVLV buffer @@ -903,8 +905,8 @@ out: } /** - * batadv_tt_prepare_tvlv_local_data - allocate and prepare the TT TVLV for this - * node + * batadv_tt_prepare_tvlv_local_data() - allocate and prepare the TT TVLV for + * this node * @bat_priv: the bat priv with all the soft interface information * @tt_data: uninitialised pointer to the address of the TVLV buffer * @tt_change: uninitialised pointer to the address of the area where the TT @@ -977,8 +979,8 @@ out: } /** - * batadv_tt_tvlv_container_update - update the translation table tvlv container - * after local tt changes have been committed + * batadv_tt_tvlv_container_update() - update the translation table tvlv + * container after local tt changes have been committed * @bat_priv: the bat priv with all the soft interface information */ static void batadv_tt_tvlv_container_update(struct batadv_priv *bat_priv) @@ -1053,6 +1055,14 @@ container_register: } #ifdef CONFIG_BATMAN_ADV_DEBUGFS + +/** + * batadv_tt_local_seq_print_text() - Print the local tt table in a seq file + * @seq: seq file to print on + * @offset: not used + * + * Return: always 0 + */ int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset) { struct net_device *net_dev = (struct net_device *)seq->private; @@ -1123,7 +1133,7 @@ out: #endif /** - * batadv_tt_local_dump_entry - Dump one TT local entry into a message + * batadv_tt_local_dump_entry() - Dump one TT local entry into a message * @msg :Netlink message to dump into * @portid: Port making netlink request * @seq: Sequence number of netlink message @@ -1179,7 +1189,7 @@ batadv_tt_local_dump_entry(struct sk_buff *msg, u32 portid, u32 seq, } /** - * batadv_tt_local_dump_bucket - Dump one TT local bucket into a message + * batadv_tt_local_dump_bucket() - Dump one TT local bucket into a message * @msg: Netlink message to dump into * @portid: Port making netlink request * @seq: Sequence number of netlink message @@ -1216,7 +1226,7 @@ batadv_tt_local_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq, } /** - * batadv_tt_local_dump - Dump TT local entries into a message + * batadv_tt_local_dump() - Dump TT local entries into a message * @msg: Netlink message to dump into * @cb: Parameters from query * @@ -1300,7 +1310,7 @@ batadv_tt_local_set_pending(struct batadv_priv *bat_priv, } /** - * batadv_tt_local_remove - logically remove an entry from the local table + * batadv_tt_local_remove() - logically remove an entry from the local table * @bat_priv: the bat priv with all the soft interface information * @addr: the MAC address of the client to remove * @vid: VLAN identifier @@ -1362,7 +1372,7 @@ out: } /** - * batadv_tt_local_purge_list - purge inactive tt local entries + * batadv_tt_local_purge_list() - purge inactive tt local entries * @bat_priv: the bat priv with all the soft interface information * @head: pointer to the list containing the local tt entries * @timeout: parameter deciding whether a given tt local entry is considered @@ -1397,7 +1407,7 @@ static void batadv_tt_local_purge_list(struct batadv_priv *bat_priv, } /** - * batadv_tt_local_purge - purge inactive tt local entries + * batadv_tt_local_purge() - purge inactive tt local entries * @bat_priv: the bat priv with all the soft interface information * @timeout: parameter deciding whether a given tt local entry is considered * inactive or not @@ -1490,7 +1500,7 @@ static void batadv_tt_changes_list_free(struct batadv_priv *bat_priv) } /** - * batadv_tt_global_orig_entry_find - find a TT orig_list_entry + * batadv_tt_global_orig_entry_find() - find a TT orig_list_entry * @entry: the TT global entry where the orig_list_entry has to be * extracted from * @orig_node: the originator for which the orig_list_entry has to be found @@ -1524,8 +1534,8 @@ batadv_tt_global_orig_entry_find(const struct batadv_tt_global_entry *entry, } /** - * batadv_tt_global_entry_has_orig - check if a TT global entry is also handled - * by a given originator + * batadv_tt_global_entry_has_orig() - check if a TT global entry is also + * handled by a given originator * @entry: the TT global entry to check * @orig_node: the originator to search in the list * @@ -1550,7 +1560,7 @@ batadv_tt_global_entry_has_orig(const struct batadv_tt_global_entry *entry, } /** - * batadv_tt_global_sync_flags - update TT sync flags + * batadv_tt_global_sync_flags() - update TT sync flags * @tt_global: the TT global entry to update sync flags in * * Updates the sync flag bits in the tt_global flag attribute with a logical @@ -1574,7 +1584,7 @@ batadv_tt_global_sync_flags(struct batadv_tt_global_entry *tt_global) } /** - * batadv_tt_global_orig_entry_add - add or update a TT orig entry + * batadv_tt_global_orig_entry_add() - add or update a TT orig entry * @tt_global: the TT global entry to add an orig entry in * @orig_node: the originator to add an orig entry for * @ttvn: translation table version number of this changeset @@ -1624,7 +1634,7 @@ out: } /** - * batadv_tt_global_add - add a new TT global entry or update an existing one + * batadv_tt_global_add() - add a new TT global entry or update an existing one * @bat_priv: the bat priv with all the soft interface information * @orig_node: the originator announcing the client * @tt_addr: the mac address of the non-mesh client @@ -1796,7 +1806,7 @@ out: } /** - * batadv_transtable_best_orig - Get best originator list entry from tt entry + * batadv_transtable_best_orig() - Get best originator list entry from tt entry * @bat_priv: the bat priv with all the soft interface information * @tt_global_entry: global translation table entry to be analyzed * @@ -1842,8 +1852,8 @@ batadv_transtable_best_orig(struct batadv_priv *bat_priv, #ifdef CONFIG_BATMAN_ADV_DEBUGFS /** - * batadv_tt_global_print_entry - print all orig nodes who announce the address - * for this global entry + * batadv_tt_global_print_entry() - print all orig nodes who announce the + * address for this global entry * @bat_priv: the bat priv with all the soft interface information * @tt_global_entry: global translation table entry to be printed * @seq: debugfs table seq_file struct @@ -1925,6 +1935,13 @@ print_list: } } +/** + * batadv_tt_global_seq_print_text() - Print the global tt table in a seq file + * @seq: seq file to print on + * @offset: not used + * + * Return: always 0 + */ int batadv_tt_global_seq_print_text(struct seq_file *seq, void *offset) { struct net_device *net_dev = (struct net_device *)seq->private; @@ -1967,7 +1984,7 @@ out: #endif /** - * batadv_tt_global_dump_subentry - Dump all TT local entries into a message + * batadv_tt_global_dump_subentry() - Dump all TT local entries into a message * @msg: Netlink message to dump into * @portid: Port making netlink request * @seq: Sequence number of netlink message @@ -2028,7 +2045,7 @@ batadv_tt_global_dump_subentry(struct sk_buff *msg, u32 portid, u32 seq, } /** - * batadv_tt_global_dump_entry - Dump one TT global entry into a message + * batadv_tt_global_dump_entry() - Dump one TT global entry into a message * @msg: Netlink message to dump into * @portid: Port making netlink request * @seq: Sequence number of netlink message @@ -2073,7 +2090,7 @@ batadv_tt_global_dump_entry(struct sk_buff *msg, u32 portid, u32 seq, } /** - * batadv_tt_global_dump_bucket - Dump one TT local bucket into a message + * batadv_tt_global_dump_bucket() - Dump one TT local bucket into a message * @msg: Netlink message to dump into * @portid: Port making netlink request * @seq: Sequence number of netlink message @@ -2112,7 +2129,7 @@ batadv_tt_global_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq, } /** - * batadv_tt_global_dump - Dump TT global entries into a message + * batadv_tt_global_dump() - Dump TT global entries into a message * @msg: Netlink message to dump into * @cb: Parameters from query * @@ -2180,7 +2197,7 @@ int batadv_tt_global_dump(struct sk_buff *msg, struct netlink_callback *cb) } /** - * _batadv_tt_global_del_orig_entry - remove and free an orig_entry + * _batadv_tt_global_del_orig_entry() - remove and free an orig_entry * @tt_global_entry: the global entry to remove the orig_entry from * @orig_entry: the orig entry to remove and free * @@ -2222,7 +2239,7 @@ batadv_tt_global_del_orig_list(struct batadv_tt_global_entry *tt_global_entry) } /** - * batadv_tt_global_del_orig_node - remove orig_node from a global tt entry + * batadv_tt_global_del_orig_node() - remove orig_node from a global tt entry * @bat_priv: the bat priv with all the soft interface information * @tt_global_entry: the global entry to remove the orig_node from * @orig_node: the originator announcing the client @@ -2301,7 +2318,7 @@ batadv_tt_global_del_roaming(struct batadv_priv *bat_priv, } /** - * batadv_tt_global_del - remove a client from the global table + * batadv_tt_global_del() - remove a client from the global table * @bat_priv: the bat priv with all the soft interface information * @orig_node: an originator serving this client * @addr: the mac address of the client @@ -2367,8 +2384,8 @@ out: } /** - * batadv_tt_global_del_orig - remove all the TT global entries belonging to the - * given originator matching the provided vid + * batadv_tt_global_del_orig() - remove all the TT global entries belonging to + * the given originator matching the provided vid * @bat_priv: the bat priv with all the soft interface information * @orig_node: the originator owning the entries to remove * @match_vid: the VLAN identifier to match. If negative all the entries will be @@ -2539,7 +2556,7 @@ _batadv_is_ap_isolated(struct batadv_tt_local_entry *tt_local_entry, } /** - * batadv_transtable_search - get the mesh destination for a given client + * batadv_transtable_search() - get the mesh destination for a given client * @bat_priv: the bat priv with all the soft interface information * @src: mac address of the source client * @addr: mac address of the destination client @@ -2599,7 +2616,7 @@ out: } /** - * batadv_tt_global_crc - calculates the checksum of the local table belonging + * batadv_tt_global_crc() - calculates the checksum of the local table belonging * to the given orig_node * @bat_priv: the bat priv with all the soft interface information * @orig_node: originator for which the CRC should be computed @@ -2694,7 +2711,7 @@ static u32 batadv_tt_global_crc(struct batadv_priv *bat_priv, } /** - * batadv_tt_local_crc - calculates the checksum of the local table + * batadv_tt_local_crc() - calculates the checksum of the local table * @bat_priv: the bat priv with all the soft interface information * @vid: VLAN identifier for which the CRC32 has to be computed * @@ -2751,7 +2768,7 @@ static u32 batadv_tt_local_crc(struct batadv_priv *bat_priv, } /** - * batadv_tt_req_node_release - free tt_req node entry + * batadv_tt_req_node_release() - free tt_req node entry * @ref: kref pointer of the tt req_node entry */ static void batadv_tt_req_node_release(struct kref *ref) @@ -2764,7 +2781,7 @@ static void batadv_tt_req_node_release(struct kref *ref) } /** - * batadv_tt_req_node_put - decrement the tt_req_node refcounter and + * batadv_tt_req_node_put() - decrement the tt_req_node refcounter and * possibly release it * @tt_req_node: tt_req_node to be free'd */ @@ -2826,7 +2843,7 @@ static void batadv_tt_req_purge(struct batadv_priv *bat_priv) } /** - * batadv_tt_req_node_new - search and possibly create a tt_req_node object + * batadv_tt_req_node_new() - search and possibly create a tt_req_node object * @bat_priv: the bat priv with all the soft interface information * @orig_node: orig node this request is being issued for * @@ -2863,7 +2880,7 @@ unlock: } /** - * batadv_tt_local_valid - verify that given tt entry is a valid one + * batadv_tt_local_valid() - verify that given tt entry is a valid one * @entry_ptr: to be checked local tt entry * @data_ptr: not used but definition required to satisfy the callback prototype * @@ -2897,7 +2914,7 @@ static bool batadv_tt_global_valid(const void *entry_ptr, } /** - * batadv_tt_tvlv_generate - fill the tvlv buff with the tt entries from the + * batadv_tt_tvlv_generate() - fill the tvlv buff with the tt entries from the * specified tt hash * @bat_priv: the bat priv with all the soft interface information * @hash: hash table containing the tt entries @@ -2948,7 +2965,7 @@ static void batadv_tt_tvlv_generate(struct batadv_priv *bat_priv, } /** - * batadv_tt_global_check_crc - check if all the CRCs are correct + * batadv_tt_global_check_crc() - check if all the CRCs are correct * @orig_node: originator for which the CRCs have to be checked * @tt_vlan: pointer to the first tvlv VLAN entry * @num_vlan: number of tvlv VLAN entries @@ -3005,7 +3022,7 @@ static bool batadv_tt_global_check_crc(struct batadv_orig_node *orig_node, } /** - * batadv_tt_local_update_crc - update all the local CRCs + * batadv_tt_local_update_crc() - update all the local CRCs * @bat_priv: the bat priv with all the soft interface information */ static void batadv_tt_local_update_crc(struct batadv_priv *bat_priv) @@ -3021,7 +3038,7 @@ static void batadv_tt_local_update_crc(struct batadv_priv *bat_priv) } /** - * batadv_tt_global_update_crc - update all the global CRCs for this orig_node + * batadv_tt_global_update_crc() - update all the global CRCs for this orig_node * @bat_priv: the bat priv with all the soft interface information * @orig_node: the orig_node for which the CRCs have to be updated */ @@ -3048,7 +3065,7 @@ static void batadv_tt_global_update_crc(struct batadv_priv *bat_priv, } /** - * batadv_send_tt_request - send a TT Request message to a given node + * batadv_send_tt_request() - send a TT Request message to a given node * @bat_priv: the bat priv with all the soft interface information * @dst_orig_node: the destination of the message * @ttvn: the version number that the source of the message is looking for @@ -3137,7 +3154,7 @@ out: } /** - * batadv_send_other_tt_response - send reply to tt request concerning another + * batadv_send_other_tt_response() - send reply to tt request concerning another * node's translation table * @bat_priv: the bat priv with all the soft interface information * @tt_data: tt data containing the tt request information @@ -3270,8 +3287,8 @@ out: } /** - * batadv_send_my_tt_response - send reply to tt request concerning this node's - * translation table + * batadv_send_my_tt_response() - send reply to tt request concerning this + * node's translation table * @bat_priv: the bat priv with all the soft interface information * @tt_data: tt data containing the tt request information * @req_src: mac address of tt request sender @@ -3388,7 +3405,7 @@ out: } /** - * batadv_send_tt_response - send reply to tt request + * batadv_send_tt_response() - send reply to tt request * @bat_priv: the bat priv with all the soft interface information * @tt_data: tt data containing the tt request information * @req_src: mac address of tt request sender @@ -3484,7 +3501,7 @@ static void batadv_tt_update_changes(struct batadv_priv *bat_priv, } /** - * batadv_is_my_client - check if a client is served by the local node + * batadv_is_my_client() - check if a client is served by the local node * @bat_priv: the bat priv with all the soft interface information * @addr: the mac address of the client to check * @vid: VLAN identifier @@ -3514,7 +3531,7 @@ out: } /** - * batadv_handle_tt_response - process incoming tt reply + * batadv_handle_tt_response() - process incoming tt reply * @bat_priv: the bat priv with all the soft interface information * @tt_data: tt data containing the tt request information * @resp_src: mac address of tt reply sender @@ -3607,7 +3624,7 @@ static void batadv_tt_roam_purge(struct batadv_priv *bat_priv) } /** - * batadv_tt_check_roam_count - check if a client has roamed too frequently + * batadv_tt_check_roam_count() - check if a client has roamed too frequently * @bat_priv: the bat priv with all the soft interface information * @client: mac address of the roaming client * @@ -3662,7 +3679,7 @@ unlock: } /** - * batadv_send_roam_adv - send a roaming advertisement message + * batadv_send_roam_adv() - send a roaming advertisement message * @bat_priv: the bat priv with all the soft interface information * @client: mac address of the roaming client * @vid: VLAN identifier @@ -3727,6 +3744,10 @@ static void batadv_tt_purge(struct work_struct *work) msecs_to_jiffies(BATADV_TT_WORK_PERIOD)); } +/** + * batadv_tt_free() - Free translation table of soft interface + * @bat_priv: the bat priv with all the soft interface information + */ void batadv_tt_free(struct batadv_priv *bat_priv) { batadv_tvlv_container_unregister(bat_priv, BATADV_TVLV_TT, 1); @@ -3744,7 +3765,7 @@ void batadv_tt_free(struct batadv_priv *bat_priv) } /** - * batadv_tt_local_set_flags - set or unset the specified flags on the local + * batadv_tt_local_set_flags() - set or unset the specified flags on the local * table and possibly count them in the TT size * @bat_priv: the bat priv with all the soft interface information * @flags: the flag to switch @@ -3830,7 +3851,7 @@ static void batadv_tt_local_purge_pending_clients(struct batadv_priv *bat_priv) } /** - * batadv_tt_local_commit_changes_nolock - commit all pending local tt changes + * batadv_tt_local_commit_changes_nolock() - commit all pending local tt changes * which have been queued in the time since the last commit * @bat_priv: the bat priv with all the soft interface information * @@ -3863,7 +3884,7 @@ static void batadv_tt_local_commit_changes_nolock(struct batadv_priv *bat_priv) } /** - * batadv_tt_local_commit_changes - commit all pending local tt changes which + * batadv_tt_local_commit_changes() - commit all pending local tt changes which * have been queued in the time since the last commit * @bat_priv: the bat priv with all the soft interface information */ @@ -3874,6 +3895,15 @@ void batadv_tt_local_commit_changes(struct batadv_priv *bat_priv) spin_unlock_bh(&bat_priv->tt.commit_lock); } +/** + * batadv_is_ap_isolated() - Check if packet from upper layer should be dropped + * @bat_priv: the bat priv with all the soft interface information + * @src: source mac address of packet + * @dst: destination mac address of packet + * @vid: vlan id of packet + * + * Return: true when src+dst(+vid) pair should be isolated, false otherwise + */ bool batadv_is_ap_isolated(struct batadv_priv *bat_priv, u8 *src, u8 *dst, unsigned short vid) { @@ -3909,7 +3939,7 @@ vlan_put: } /** - * batadv_tt_update_orig - update global translation table with new tt + * batadv_tt_update_orig() - update global translation table with new tt * information received via ogms * @bat_priv: the bat priv with all the soft interface information * @orig_node: the orig_node of the ogm @@ -3994,7 +4024,7 @@ request_table: } /** - * batadv_tt_global_client_is_roaming - check if a client is marked as roaming + * batadv_tt_global_client_is_roaming() - check if a client is marked as roaming * @bat_priv: the bat priv with all the soft interface information * @addr: the mac address of the client to check * @vid: VLAN identifier @@ -4020,7 +4050,7 @@ out: } /** - * batadv_tt_local_client_is_roaming - tells whether the client is roaming + * batadv_tt_local_client_is_roaming() - tells whether the client is roaming * @bat_priv: the bat priv with all the soft interface information * @addr: the mac address of the local client to query * @vid: VLAN identifier @@ -4045,6 +4075,15 @@ out: return ret; } +/** + * batadv_tt_add_temporary_global_entry() - Add temporary entry to global TT + * @bat_priv: the bat priv with all the soft interface information + * @orig_node: orig node which the temporary entry should be associated with + * @addr: mac address of the client + * @vid: VLAN id of the new temporary global translation table + * + * Return: true when temporary tt entry could be added, false otherwise + */ bool batadv_tt_add_temporary_global_entry(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node, const unsigned char *addr, @@ -4069,7 +4108,7 @@ bool batadv_tt_add_temporary_global_entry(struct batadv_priv *bat_priv, } /** - * batadv_tt_local_resize_to_mtu - resize the local translation table fit the + * batadv_tt_local_resize_to_mtu() - resize the local translation table fit the * maximum packet size that can be transported through the mesh * @soft_iface: netdev struct of the mesh interface * @@ -4110,7 +4149,7 @@ void batadv_tt_local_resize_to_mtu(struct net_device *soft_iface) } /** - * batadv_tt_tvlv_ogm_handler_v1 - process incoming tt tvlv container + * batadv_tt_tvlv_ogm_handler_v1() - process incoming tt tvlv container * @bat_priv: the bat priv with all the soft interface information * @orig: the orig_node of the ogm * @flags: flags indicating the tvlv state (see batadv_tvlv_handler_flags) @@ -4149,7 +4188,7 @@ static void batadv_tt_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv, } /** - * batadv_tt_tvlv_unicast_handler_v1 - process incoming (unicast) tt tvlv + * batadv_tt_tvlv_unicast_handler_v1() - process incoming (unicast) tt tvlv * container * @bat_priv: the bat priv with all the soft interface information * @src: mac address of tt tvlv sender @@ -4231,7 +4270,8 @@ static int batadv_tt_tvlv_unicast_handler_v1(struct batadv_priv *bat_priv, } /** - * batadv_roam_tvlv_unicast_handler_v1 - process incoming tt roam tvlv container + * batadv_roam_tvlv_unicast_handler_v1() - process incoming tt roam tvlv + * container * @bat_priv: the bat priv with all the soft interface information * @src: mac address of tt tvlv sender * @dst: mac address of tt tvlv recipient @@ -4281,7 +4321,7 @@ out: } /** - * batadv_tt_init - initialise the translation table internals + * batadv_tt_init() - initialise the translation table internals * @bat_priv: the bat priv with all the soft interface information * * Return: 0 on success or negative error number in case of failure. @@ -4317,7 +4357,7 @@ int batadv_tt_init(struct batadv_priv *bat_priv) } /** - * batadv_tt_global_is_isolated - check if a client is marked as isolated + * batadv_tt_global_is_isolated() - check if a client is marked as isolated * @bat_priv: the bat priv with all the soft interface information * @addr: the mac address of the client * @vid: the identifier of the VLAN where this client is connected @@ -4343,7 +4383,7 @@ bool batadv_tt_global_is_isolated(struct batadv_priv *bat_priv, } /** - * batadv_tt_cache_init - Initialize tt memory object cache + * batadv_tt_cache_init() - Initialize tt memory object cache * * Return: 0 on success or negative error number in case of failure. */ @@ -4412,7 +4452,7 @@ err_tt_tl_destroy: } /** - * batadv_tt_cache_destroy - Destroy tt memory object cache + * batadv_tt_cache_destroy() - Destroy tt memory object cache */ void batadv_tt_cache_destroy(void) { diff --git a/net/batman-adv/translation-table.h b/net/batman-adv/translation-table.h index 411d586191da..8d9e3abec2c8 100644 --- a/net/batman-adv/translation-table.h +++ b/net/batman-adv/translation-table.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich, Antonio Quartulli diff --git a/net/batman-adv/tvlv.c b/net/batman-adv/tvlv.c index 1d9e267caec9..5ffcb45ac6ff 100644 --- a/net/batman-adv/tvlv.c +++ b/net/batman-adv/tvlv.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich @@ -19,7 +20,7 @@ #include <linux/byteorder/generic.h> #include <linux/etherdevice.h> -#include <linux/fs.h> +#include <linux/gfp.h> #include <linux/if_ether.h> #include <linux/kernel.h> #include <linux/kref.h> @@ -35,14 +36,14 @@ #include <linux/stddef.h> #include <linux/string.h> #include <linux/types.h> +#include <uapi/linux/batadv_packet.h> #include "originator.h" -#include "packet.h" #include "send.h" #include "tvlv.h" /** - * batadv_tvlv_handler_release - release tvlv handler from lists and queue for + * batadv_tvlv_handler_release() - release tvlv handler from lists and queue for * free after rcu grace period * @ref: kref pointer of the tvlv */ @@ -55,7 +56,7 @@ static void batadv_tvlv_handler_release(struct kref *ref) } /** - * batadv_tvlv_handler_put - decrement the tvlv container refcounter and + * batadv_tvlv_handler_put() - decrement the tvlv container refcounter and * possibly release it * @tvlv_handler: the tvlv handler to free */ @@ -65,7 +66,7 @@ static void batadv_tvlv_handler_put(struct batadv_tvlv_handler *tvlv_handler) } /** - * batadv_tvlv_handler_get - retrieve tvlv handler from the tvlv handler list + * batadv_tvlv_handler_get() - retrieve tvlv handler from the tvlv handler list * based on the provided type and version (both need to match) * @bat_priv: the bat priv with all the soft interface information * @type: tvlv handler type to look for @@ -99,7 +100,7 @@ batadv_tvlv_handler_get(struct batadv_priv *bat_priv, u8 type, u8 version) } /** - * batadv_tvlv_container_release - release tvlv from lists and free + * batadv_tvlv_container_release() - release tvlv from lists and free * @ref: kref pointer of the tvlv */ static void batadv_tvlv_container_release(struct kref *ref) @@ -111,7 +112,7 @@ static void batadv_tvlv_container_release(struct kref *ref) } /** - * batadv_tvlv_container_put - decrement the tvlv container refcounter and + * batadv_tvlv_container_put() - decrement the tvlv container refcounter and * possibly release it * @tvlv: the tvlv container to free */ @@ -121,7 +122,7 @@ static void batadv_tvlv_container_put(struct batadv_tvlv_container *tvlv) } /** - * batadv_tvlv_container_get - retrieve tvlv container from the tvlv container + * batadv_tvlv_container_get() - retrieve tvlv container from the tvlv container * list based on the provided type and version (both need to match) * @bat_priv: the bat priv with all the soft interface information * @type: tvlv container type to look for @@ -155,7 +156,7 @@ batadv_tvlv_container_get(struct batadv_priv *bat_priv, u8 type, u8 version) } /** - * batadv_tvlv_container_list_size - calculate the size of the tvlv container + * batadv_tvlv_container_list_size() - calculate the size of the tvlv container * list entries * @bat_priv: the bat priv with all the soft interface information * @@ -180,8 +181,8 @@ static u16 batadv_tvlv_container_list_size(struct batadv_priv *bat_priv) } /** - * batadv_tvlv_container_remove - remove tvlv container from the tvlv container - * list + * batadv_tvlv_container_remove() - remove tvlv container from the tvlv + * container list * @bat_priv: the bat priv with all the soft interface information * @tvlv: the to be removed tvlv container * @@ -204,7 +205,7 @@ static void batadv_tvlv_container_remove(struct batadv_priv *bat_priv, } /** - * batadv_tvlv_container_unregister - unregister tvlv container based on the + * batadv_tvlv_container_unregister() - unregister tvlv container based on the * provided type and version (both need to match) * @bat_priv: the bat priv with all the soft interface information * @type: tvlv container type to unregister @@ -222,7 +223,7 @@ void batadv_tvlv_container_unregister(struct batadv_priv *bat_priv, } /** - * batadv_tvlv_container_register - register tvlv type, version and content + * batadv_tvlv_container_register() - register tvlv type, version and content * to be propagated with each (primary interface) OGM * @bat_priv: the bat priv with all the soft interface information * @type: tvlv container type @@ -267,7 +268,7 @@ void batadv_tvlv_container_register(struct batadv_priv *bat_priv, } /** - * batadv_tvlv_realloc_packet_buff - reallocate packet buffer to accommodate + * batadv_tvlv_realloc_packet_buff() - reallocate packet buffer to accommodate * requested packet size * @packet_buff: packet buffer * @packet_buff_len: packet buffer size @@ -300,7 +301,7 @@ static bool batadv_tvlv_realloc_packet_buff(unsigned char **packet_buff, } /** - * batadv_tvlv_container_ogm_append - append tvlv container content to given + * batadv_tvlv_container_ogm_append() - append tvlv container content to given * OGM packet buffer * @bat_priv: the bat priv with all the soft interface information * @packet_buff: ogm packet buffer @@ -353,7 +354,7 @@ end: } /** - * batadv_tvlv_call_handler - parse the given tvlv buffer to call the + * batadv_tvlv_call_handler() - parse the given tvlv buffer to call the * appropriate handlers * @bat_priv: the bat priv with all the soft interface information * @tvlv_handler: tvlv callback function handling the tvlv content @@ -407,7 +408,7 @@ static int batadv_tvlv_call_handler(struct batadv_priv *bat_priv, } /** - * batadv_tvlv_containers_process - parse the given tvlv buffer to call the + * batadv_tvlv_containers_process() - parse the given tvlv buffer to call the * appropriate handlers * @bat_priv: the bat priv with all the soft interface information * @ogm_source: flag indicating whether the tvlv is an ogm or a unicast packet @@ -474,7 +475,7 @@ int batadv_tvlv_containers_process(struct batadv_priv *bat_priv, } /** - * batadv_tvlv_ogm_receive - process an incoming ogm and call the appropriate + * batadv_tvlv_ogm_receive() - process an incoming ogm and call the appropriate * handlers * @bat_priv: the bat priv with all the soft interface information * @batadv_ogm_packet: ogm packet containing the tvlv containers @@ -501,7 +502,7 @@ void batadv_tvlv_ogm_receive(struct batadv_priv *bat_priv, } /** - * batadv_tvlv_handler_register - register tvlv handler based on the provided + * batadv_tvlv_handler_register() - register tvlv handler based on the provided * type and version (both need to match) for ogm tvlv payload and/or unicast * payload * @bat_priv: the bat priv with all the soft interface information @@ -556,7 +557,7 @@ void batadv_tvlv_handler_register(struct batadv_priv *bat_priv, } /** - * batadv_tvlv_handler_unregister - unregister tvlv handler based on the + * batadv_tvlv_handler_unregister() - unregister tvlv handler based on the * provided type and version (both need to match) * @bat_priv: the bat priv with all the soft interface information * @type: tvlv handler type to be unregistered @@ -579,7 +580,7 @@ void batadv_tvlv_handler_unregister(struct batadv_priv *bat_priv, } /** - * batadv_tvlv_unicast_send - send a unicast packet with tvlv payload to the + * batadv_tvlv_unicast_send() - send a unicast packet with tvlv payload to the * specified host * @bat_priv: the bat priv with all the soft interface information * @src: source mac address of the unicast packet diff --git a/net/batman-adv/tvlv.h b/net/batman-adv/tvlv.h index 4d01400ada30..a74df33f446d 100644 --- a/net/batman-adv/tvlv.h +++ b/net/batman-adv/tvlv.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index a62795868794..bb1578410e0c 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich @@ -34,10 +35,9 @@ #include <linux/types.h> #include <linux/wait.h> #include <linux/workqueue.h> +#include <uapi/linux/batadv_packet.h> #include <uapi/linux/batman_adv.h> -#include "packet.h" - struct seq_file; #ifdef CONFIG_BATMAN_ADV_DAT @@ -54,13 +54,15 @@ struct seq_file; /** * enum batadv_dhcp_recipient - dhcp destination - * @BATADV_DHCP_NO: packet is not a dhcp message - * @BATADV_DHCP_TO_SERVER: dhcp message is directed to a server - * @BATADV_DHCP_TO_CLIENT: dhcp message is directed to a client */ enum batadv_dhcp_recipient { + /** @BATADV_DHCP_NO: packet is not a dhcp message */ BATADV_DHCP_NO = 0, + + /** @BATADV_DHCP_TO_SERVER: dhcp message is directed to a server */ BATADV_DHCP_TO_SERVER, + + /** @BATADV_DHCP_TO_CLIENT: dhcp message is directed to a client */ BATADV_DHCP_TO_CLIENT, }; @@ -78,196 +80,274 @@ enum batadv_dhcp_recipient { /** * struct batadv_hard_iface_bat_iv - per hard-interface B.A.T.M.A.N. IV data - * @ogm_buff: buffer holding the OGM packet - * @ogm_buff_len: length of the OGM packet buffer - * @ogm_seqno: OGM sequence number - used to identify each OGM */ struct batadv_hard_iface_bat_iv { + /** @ogm_buff: buffer holding the OGM packet */ unsigned char *ogm_buff; + + /** @ogm_buff_len: length of the OGM packet buffer */ int ogm_buff_len; + + /** @ogm_seqno: OGM sequence number - used to identify each OGM */ atomic_t ogm_seqno; }; /** * enum batadv_v_hard_iface_flags - interface flags useful to B.A.T.M.A.N. V - * @BATADV_FULL_DUPLEX: tells if the connection over this link is full-duplex - * @BATADV_WARNING_DEFAULT: tells whether we have warned the user that no - * throughput data is available for this interface and that default values are - * assumed. */ enum batadv_v_hard_iface_flags { + /** + * @BATADV_FULL_DUPLEX: tells if the connection over this link is + * full-duplex + */ BATADV_FULL_DUPLEX = BIT(0), + + /** + * @BATADV_WARNING_DEFAULT: tells whether we have warned the user that + * no throughput data is available for this interface and that default + * values are assumed. + */ BATADV_WARNING_DEFAULT = BIT(1), }; /** * struct batadv_hard_iface_bat_v - per hard-interface B.A.T.M.A.N. V data - * @elp_interval: time interval between two ELP transmissions - * @elp_seqno: current ELP sequence number - * @elp_skb: base skb containing the ELP message to send - * @elp_wq: workqueue used to schedule ELP transmissions - * @throughput_override: throughput override to disable link auto-detection - * @flags: interface specific flags */ struct batadv_hard_iface_bat_v { + /** @elp_interval: time interval between two ELP transmissions */ atomic_t elp_interval; + + /** @elp_seqno: current ELP sequence number */ atomic_t elp_seqno; + + /** @elp_skb: base skb containing the ELP message to send */ struct sk_buff *elp_skb; + + /** @elp_wq: workqueue used to schedule ELP transmissions */ struct delayed_work elp_wq; + + /** + * @throughput_override: throughput override to disable link + * auto-detection + */ atomic_t throughput_override; + + /** @flags: interface specific flags */ u8 flags; }; /** * enum batadv_hard_iface_wifi_flags - Flags describing the wifi configuration * of a batadv_hard_iface - * @BATADV_HARDIF_WIFI_WEXT_DIRECT: it is a wext wifi device - * @BATADV_HARDIF_WIFI_CFG80211_DIRECT: it is a cfg80211 wifi device - * @BATADV_HARDIF_WIFI_WEXT_INDIRECT: link device is a wext wifi device - * @BATADV_HARDIF_WIFI_CFG80211_INDIRECT: link device is a cfg80211 wifi device */ enum batadv_hard_iface_wifi_flags { + /** @BATADV_HARDIF_WIFI_WEXT_DIRECT: it is a wext wifi device */ BATADV_HARDIF_WIFI_WEXT_DIRECT = BIT(0), + + /** @BATADV_HARDIF_WIFI_CFG80211_DIRECT: it is a cfg80211 wifi device */ BATADV_HARDIF_WIFI_CFG80211_DIRECT = BIT(1), + + /** + * @BATADV_HARDIF_WIFI_WEXT_INDIRECT: link device is a wext wifi device + */ BATADV_HARDIF_WIFI_WEXT_INDIRECT = BIT(2), + + /** + * @BATADV_HARDIF_WIFI_CFG80211_INDIRECT: link device is a cfg80211 wifi + * device + */ BATADV_HARDIF_WIFI_CFG80211_INDIRECT = BIT(3), }; /** * struct batadv_hard_iface - network device known to batman-adv - * @list: list node for batadv_hardif_list - * @if_num: identificator of the interface - * @if_status: status of the interface for batman-adv - * @num_bcasts: number of payload re-broadcasts on this interface (ARQ) - * @wifi_flags: flags whether this is (directly or indirectly) a wifi interface - * @net_dev: pointer to the net_device - * @hardif_obj: kobject of the per interface sysfs "mesh" directory - * @refcount: number of contexts the object is used - * @batman_adv_ptype: packet type describing packets that should be processed by - * batman-adv for this interface - * @soft_iface: the batman-adv interface which uses this network interface - * @rcu: struct used for freeing in an RCU-safe manner - * @bat_iv: per hard-interface B.A.T.M.A.N. IV data - * @bat_v: per hard-interface B.A.T.M.A.N. V data - * @debug_dir: dentry for nc subdir in batman-adv directory in debugfs - * @neigh_list: list of unique single hop neighbors via this interface - * @neigh_list_lock: lock protecting neigh_list */ struct batadv_hard_iface { + /** @list: list node for batadv_hardif_list */ struct list_head list; + + /** @if_num: identificator of the interface */ s16 if_num; + + /** @if_status: status of the interface for batman-adv */ char if_status; + + /** + * @num_bcasts: number of payload re-broadcasts on this interface (ARQ) + */ u8 num_bcasts; + + /** + * @wifi_flags: flags whether this is (directly or indirectly) a wifi + * interface + */ u32 wifi_flags; + + /** @net_dev: pointer to the net_device */ struct net_device *net_dev; + + /** @hardif_obj: kobject of the per interface sysfs "mesh" directory */ struct kobject *hardif_obj; + + /** @refcount: number of contexts the object is used */ struct kref refcount; + + /** + * @batman_adv_ptype: packet type describing packets that should be + * processed by batman-adv for this interface + */ struct packet_type batman_adv_ptype; + + /** + * @soft_iface: the batman-adv interface which uses this network + * interface + */ struct net_device *soft_iface; + + /** @rcu: struct used for freeing in an RCU-safe manner */ struct rcu_head rcu; + + /** @bat_iv: per hard-interface B.A.T.M.A.N. IV data */ struct batadv_hard_iface_bat_iv bat_iv; + #ifdef CONFIG_BATMAN_ADV_BATMAN_V + /** @bat_v: per hard-interface B.A.T.M.A.N. V data */ struct batadv_hard_iface_bat_v bat_v; #endif + + /** + * @debug_dir: dentry for nc subdir in batman-adv directory in debugfs + */ struct dentry *debug_dir; + + /** + * @neigh_list: list of unique single hop neighbors via this interface + */ struct hlist_head neigh_list; - /* neigh_list_lock protects: neigh_list */ + + /** @neigh_list_lock: lock protecting neigh_list */ spinlock_t neigh_list_lock; }; /** * struct batadv_orig_ifinfo - originator info per outgoing interface - * @list: list node for orig_node::ifinfo_list - * @if_outgoing: pointer to outgoing hard-interface - * @router: router that should be used to reach this originator - * @last_real_seqno: last and best known sequence number - * @last_ttl: ttl of last received packet - * @last_seqno_forwarded: seqno of the OGM which was forwarded last - * @batman_seqno_reset: time when the batman seqno window was reset - * @refcount: number of contexts the object is used - * @rcu: struct used for freeing in an RCU-safe manner */ struct batadv_orig_ifinfo { + /** @list: list node for &batadv_orig_node.ifinfo_list */ struct hlist_node list; + + /** @if_outgoing: pointer to outgoing hard-interface */ struct batadv_hard_iface *if_outgoing; - struct batadv_neigh_node __rcu *router; /* rcu protected pointer */ + + /** @router: router that should be used to reach this originator */ + struct batadv_neigh_node __rcu *router; + + /** @last_real_seqno: last and best known sequence number */ u32 last_real_seqno; + + /** @last_ttl: ttl of last received packet */ u8 last_ttl; + + /** @last_seqno_forwarded: seqno of the OGM which was forwarded last */ u32 last_seqno_forwarded; + + /** @batman_seqno_reset: time when the batman seqno window was reset */ unsigned long batman_seqno_reset; + + /** @refcount: number of contexts the object is used */ struct kref refcount; + + /** @rcu: struct used for freeing in an RCU-safe manner */ struct rcu_head rcu; }; /** * struct batadv_frag_table_entry - head in the fragment buffer table - * @fragment_list: head of list with fragments - * @lock: lock to protect the list of fragments - * @timestamp: time (jiffie) of last received fragment - * @seqno: sequence number of the fragments in the list - * @size: accumulated size of packets in list - * @total_size: expected size of the assembled packet */ struct batadv_frag_table_entry { + /** @fragment_list: head of list with fragments */ struct hlist_head fragment_list; - spinlock_t lock; /* protects fragment_list */ + + /** @lock: lock to protect the list of fragments */ + spinlock_t lock; + + /** @timestamp: time (jiffie) of last received fragment */ unsigned long timestamp; + + /** @seqno: sequence number of the fragments in the list */ u16 seqno; + + /** @size: accumulated size of packets in list */ u16 size; + + /** @total_size: expected size of the assembled packet */ u16 total_size; }; /** * struct batadv_frag_list_entry - entry in a list of fragments - * @list: list node information - * @skb: fragment - * @no: fragment number in the set */ struct batadv_frag_list_entry { + /** @list: list node information */ struct hlist_node list; + + /** @skb: fragment */ struct sk_buff *skb; + + /** @no: fragment number in the set */ u8 no; }; /** * struct batadv_vlan_tt - VLAN specific TT attributes - * @crc: CRC32 checksum of the entries belonging to this vlan - * @num_entries: number of TT entries for this VLAN */ struct batadv_vlan_tt { + /** @crc: CRC32 checksum of the entries belonging to this vlan */ u32 crc; + + /** @num_entries: number of TT entries for this VLAN */ atomic_t num_entries; }; /** * struct batadv_orig_node_vlan - VLAN specific data per orig_node - * @vid: the VLAN identifier - * @tt: VLAN specific TT attributes - * @list: list node for orig_node::vlan_list - * @refcount: number of context where this object is currently in use - * @rcu: struct used for freeing in a RCU-safe manner */ struct batadv_orig_node_vlan { + /** @vid: the VLAN identifier */ unsigned short vid; + + /** @tt: VLAN specific TT attributes */ struct batadv_vlan_tt tt; + + /** @list: list node for &batadv_orig_node.vlan_list */ struct hlist_node list; + + /** + * @refcount: number of context where this object is currently in use + */ struct kref refcount; + + /** @rcu: struct used for freeing in a RCU-safe manner */ struct rcu_head rcu; }; /** * struct batadv_orig_bat_iv - B.A.T.M.A.N. IV private orig_node members - * @bcast_own: set of bitfields (one per hard-interface) where each one counts - * the number of our OGMs this orig_node rebroadcasted "back" to us (relative - * to last_real_seqno). Every bitfield is BATADV_TQ_LOCAL_WINDOW_SIZE bits long. - * @bcast_own_sum: sum of bcast_own - * @ogm_cnt_lock: lock protecting bcast_own, bcast_own_sum, - * neigh_node->bat_iv.real_bits & neigh_node->bat_iv.real_packet_count */ struct batadv_orig_bat_iv { + /** + * @bcast_own: set of bitfields (one per hard-interface) where each one + * counts the number of our OGMs this orig_node rebroadcasted "back" to + * us (relative to last_real_seqno). Every bitfield is + * BATADV_TQ_LOCAL_WINDOW_SIZE bits long. + */ unsigned long *bcast_own; + + /** @bcast_own_sum: sum of bcast_own */ u8 *bcast_own_sum; - /* ogm_cnt_lock protects: bcast_own, bcast_own_sum, + + /** + * @ogm_cnt_lock: lock protecting bcast_own, bcast_own_sum, * neigh_node->bat_iv.real_bits & neigh_node->bat_iv.real_packet_count */ spinlock_t ogm_cnt_lock; @@ -275,130 +355,205 @@ struct batadv_orig_bat_iv { /** * struct batadv_orig_node - structure for orig_list maintaining nodes of mesh - * @orig: originator ethernet address - * @ifinfo_list: list for routers per outgoing interface - * @last_bonding_candidate: pointer to last ifinfo of last used router - * @dat_addr: address of the orig node in the distributed hash - * @last_seen: time when last packet from this node was received - * @bcast_seqno_reset: time when the broadcast seqno window was reset - * @mcast_handler_lock: synchronizes mcast-capability and -flag changes - * @mcast_flags: multicast flags announced by the orig node - * @mcast_want_all_unsnoopables_node: a list node for the - * mcast.want_all_unsnoopables list - * @mcast_want_all_ipv4_node: a list node for the mcast.want_all_ipv4 list - * @mcast_want_all_ipv6_node: a list node for the mcast.want_all_ipv6 list - * @capabilities: announced capabilities of this originator - * @capa_initialized: bitfield to remember whether a capability was initialized - * @last_ttvn: last seen translation table version number - * @tt_buff: last tt changeset this node received from the orig node - * @tt_buff_len: length of the last tt changeset this node received from the - * orig node - * @tt_buff_lock: lock that protects tt_buff and tt_buff_len - * @tt_lock: prevents from updating the table while reading it. Table update is - * made up by two operations (data structure update and metdata -CRC/TTVN- - * recalculation) and they have to be executed atomically in order to avoid - * another thread to read the table/metadata between those. - * @bcast_bits: bitfield containing the info which payload broadcast originated - * from this orig node this host already has seen (relative to - * last_bcast_seqno) - * @last_bcast_seqno: last broadcast sequence number received by this host - * @neigh_list: list of potential next hop neighbor towards this orig node - * @neigh_list_lock: lock protecting neigh_list and router - * @hash_entry: hlist node for batadv_priv::orig_hash - * @bat_priv: pointer to soft_iface this orig node belongs to - * @bcast_seqno_lock: lock protecting bcast_bits & last_bcast_seqno - * @refcount: number of contexts the object is used - * @rcu: struct used for freeing in an RCU-safe manner - * @in_coding_list: list of nodes this orig can hear - * @out_coding_list: list of nodes that can hear this orig - * @in_coding_list_lock: protects in_coding_list - * @out_coding_list_lock: protects out_coding_list - * @fragments: array with heads for fragment chains - * @vlan_list: a list of orig_node_vlan structs, one per VLAN served by the - * originator represented by this object - * @vlan_list_lock: lock protecting vlan_list - * @bat_iv: B.A.T.M.A.N. IV private structure */ struct batadv_orig_node { + /** @orig: originator ethernet address */ u8 orig[ETH_ALEN]; + + /** @ifinfo_list: list for routers per outgoing interface */ struct hlist_head ifinfo_list; + + /** + * @last_bonding_candidate: pointer to last ifinfo of last used router + */ struct batadv_orig_ifinfo *last_bonding_candidate; + #ifdef CONFIG_BATMAN_ADV_DAT + /** @dat_addr: address of the orig node in the distributed hash */ batadv_dat_addr_t dat_addr; #endif + + /** @last_seen: time when last packet from this node was received */ unsigned long last_seen; + + /** + * @bcast_seqno_reset: time when the broadcast seqno window was reset + */ unsigned long bcast_seqno_reset; + #ifdef CONFIG_BATMAN_ADV_MCAST - /* synchronizes mcast tvlv specific orig changes */ + /** + * @mcast_handler_lock: synchronizes mcast-capability and -flag changes + */ spinlock_t mcast_handler_lock; + + /** @mcast_flags: multicast flags announced by the orig node */ u8 mcast_flags; + + /** + * @mcast_want_all_unsnoopables_node: a list node for the + * mcast.want_all_unsnoopables list + */ struct hlist_node mcast_want_all_unsnoopables_node; + + /** + * @mcast_want_all_ipv4_node: a list node for the mcast.want_all_ipv4 + * list + */ struct hlist_node mcast_want_all_ipv4_node; + /** + * @mcast_want_all_ipv6_node: a list node for the mcast.want_all_ipv6 + * list + */ struct hlist_node mcast_want_all_ipv6_node; #endif + + /** @capabilities: announced capabilities of this originator */ unsigned long capabilities; + + /** + * @capa_initialized: bitfield to remember whether a capability was + * initialized + */ unsigned long capa_initialized; + + /** @last_ttvn: last seen translation table version number */ atomic_t last_ttvn; + + /** @tt_buff: last tt changeset this node received from the orig node */ unsigned char *tt_buff; + + /** + * @tt_buff_len: length of the last tt changeset this node received + * from the orig node + */ s16 tt_buff_len; - spinlock_t tt_buff_lock; /* protects tt_buff & tt_buff_len */ - /* prevents from changing the table while reading it */ + + /** @tt_buff_lock: lock that protects tt_buff and tt_buff_len */ + spinlock_t tt_buff_lock; + + /** + * @tt_lock: prevents from updating the table while reading it. Table + * update is made up by two operations (data structure update and + * metdata -CRC/TTVN-recalculation) and they have to be executed + * atomically in order to avoid another thread to read the + * table/metadata between those. + */ spinlock_t tt_lock; + + /** + * @bcast_bits: bitfield containing the info which payload broadcast + * originated from this orig node this host already has seen (relative + * to last_bcast_seqno) + */ DECLARE_BITMAP(bcast_bits, BATADV_TQ_LOCAL_WINDOW_SIZE); + + /** + * @last_bcast_seqno: last broadcast sequence number received by this + * host + */ u32 last_bcast_seqno; + + /** + * @neigh_list: list of potential next hop neighbor towards this orig + * node + */ struct hlist_head neigh_list; - /* neigh_list_lock protects: neigh_list, ifinfo_list, - * last_bonding_candidate and router + + /** + * @neigh_list_lock: lock protecting neigh_list, ifinfo_list, + * last_bonding_candidate and router */ spinlock_t neigh_list_lock; + + /** @hash_entry: hlist node for &batadv_priv.orig_hash */ struct hlist_node hash_entry; + + /** @bat_priv: pointer to soft_iface this orig node belongs to */ struct batadv_priv *bat_priv; - /* bcast_seqno_lock protects: bcast_bits & last_bcast_seqno */ + + /** @bcast_seqno_lock: lock protecting bcast_bits & last_bcast_seqno */ spinlock_t bcast_seqno_lock; + + /** @refcount: number of contexts the object is used */ struct kref refcount; + + /** @rcu: struct used for freeing in an RCU-safe manner */ struct rcu_head rcu; + #ifdef CONFIG_BATMAN_ADV_NC + /** @in_coding_list: list of nodes this orig can hear */ struct list_head in_coding_list; + + /** @out_coding_list: list of nodes that can hear this orig */ struct list_head out_coding_list; - spinlock_t in_coding_list_lock; /* Protects in_coding_list */ - spinlock_t out_coding_list_lock; /* Protects out_coding_list */ + + /** @in_coding_list_lock: protects in_coding_list */ + spinlock_t in_coding_list_lock; + + /** @out_coding_list_lock: protects out_coding_list */ + spinlock_t out_coding_list_lock; #endif + + /** @fragments: array with heads for fragment chains */ struct batadv_frag_table_entry fragments[BATADV_FRAG_BUFFER_COUNT]; + + /** + * @vlan_list: a list of orig_node_vlan structs, one per VLAN served by + * the originator represented by this object + */ struct hlist_head vlan_list; - spinlock_t vlan_list_lock; /* protects vlan_list */ + + /** @vlan_list_lock: lock protecting vlan_list */ + spinlock_t vlan_list_lock; + + /** @bat_iv: B.A.T.M.A.N. IV private structure */ struct batadv_orig_bat_iv bat_iv; }; /** * enum batadv_orig_capabilities - orig node capabilities - * @BATADV_ORIG_CAPA_HAS_DAT: orig node has distributed arp table enabled - * @BATADV_ORIG_CAPA_HAS_NC: orig node has network coding enabled - * @BATADV_ORIG_CAPA_HAS_TT: orig node has tt capability - * @BATADV_ORIG_CAPA_HAS_MCAST: orig node has some multicast capability - * (= orig node announces a tvlv of type BATADV_TVLV_MCAST) */ enum batadv_orig_capabilities { + /** + * @BATADV_ORIG_CAPA_HAS_DAT: orig node has distributed arp table + * enabled + */ BATADV_ORIG_CAPA_HAS_DAT, + + /** @BATADV_ORIG_CAPA_HAS_NC: orig node has network coding enabled */ BATADV_ORIG_CAPA_HAS_NC, + + /** @BATADV_ORIG_CAPA_HAS_TT: orig node has tt capability */ BATADV_ORIG_CAPA_HAS_TT, + + /** + * @BATADV_ORIG_CAPA_HAS_MCAST: orig node has some multicast capability + * (= orig node announces a tvlv of type BATADV_TVLV_MCAST) + */ BATADV_ORIG_CAPA_HAS_MCAST, }; /** * struct batadv_gw_node - structure for orig nodes announcing gw capabilities - * @list: list node for batadv_priv_gw::list - * @orig_node: pointer to corresponding orig node - * @bandwidth_down: advertised uplink download bandwidth - * @bandwidth_up: advertised uplink upload bandwidth - * @refcount: number of contexts the object is used - * @rcu: struct used for freeing in an RCU-safe manner */ struct batadv_gw_node { + /** @list: list node for &batadv_priv_gw.list */ struct hlist_node list; + + /** @orig_node: pointer to corresponding orig node */ struct batadv_orig_node *orig_node; + + /** @bandwidth_down: advertised uplink download bandwidth */ u32 bandwidth_down; + + /** @bandwidth_up: advertised uplink upload bandwidth */ u32 bandwidth_up; + + /** @refcount: number of contexts the object is used */ struct kref refcount; + + /** @rcu: struct used for freeing in an RCU-safe manner */ struct rcu_head rcu; }; @@ -407,118 +562,161 @@ DECLARE_EWMA(throughput, 10, 8) /** * struct batadv_hardif_neigh_node_bat_v - B.A.T.M.A.N. V private neighbor * information - * @throughput: ewma link throughput towards this neighbor - * @elp_interval: time interval between two ELP transmissions - * @elp_latest_seqno: latest and best known ELP sequence number - * @last_unicast_tx: when the last unicast packet has been sent to this neighbor - * @metric_work: work queue callback item for metric update */ struct batadv_hardif_neigh_node_bat_v { + /** @throughput: ewma link throughput towards this neighbor */ struct ewma_throughput throughput; + + /** @elp_interval: time interval between two ELP transmissions */ u32 elp_interval; + + /** @elp_latest_seqno: latest and best known ELP sequence number */ u32 elp_latest_seqno; + + /** + * @last_unicast_tx: when the last unicast packet has been sent to this + * neighbor + */ unsigned long last_unicast_tx; + + /** @metric_work: work queue callback item for metric update */ struct work_struct metric_work; }; /** * struct batadv_hardif_neigh_node - unique neighbor per hard-interface - * @list: list node for batadv_hard_iface::neigh_list - * @addr: the MAC address of the neighboring interface - * @orig: the address of the originator this neighbor node belongs to - * @if_incoming: pointer to incoming hard-interface - * @last_seen: when last packet via this neighbor was received - * @bat_v: B.A.T.M.A.N. V private data - * @refcount: number of contexts the object is used - * @rcu: struct used for freeing in a RCU-safe manner */ struct batadv_hardif_neigh_node { + /** @list: list node for &batadv_hard_iface.neigh_list */ struct hlist_node list; + + /** @addr: the MAC address of the neighboring interface */ u8 addr[ETH_ALEN]; + + /** + * @orig: the address of the originator this neighbor node belongs to + */ u8 orig[ETH_ALEN]; + + /** @if_incoming: pointer to incoming hard-interface */ struct batadv_hard_iface *if_incoming; + + /** @last_seen: when last packet via this neighbor was received */ unsigned long last_seen; + #ifdef CONFIG_BATMAN_ADV_BATMAN_V + /** @bat_v: B.A.T.M.A.N. V private data */ struct batadv_hardif_neigh_node_bat_v bat_v; #endif + + /** @refcount: number of contexts the object is used */ struct kref refcount; + + /** @rcu: struct used for freeing in a RCU-safe manner */ struct rcu_head rcu; }; /** * struct batadv_neigh_node - structure for single hops neighbors - * @list: list node for batadv_orig_node::neigh_list - * @orig_node: pointer to corresponding orig_node - * @addr: the MAC address of the neighboring interface - * @ifinfo_list: list for routing metrics per outgoing interface - * @ifinfo_lock: lock protecting private ifinfo members and list - * @if_incoming: pointer to incoming hard-interface - * @last_seen: when last packet via this neighbor was received - * @hardif_neigh: hardif_neigh of this neighbor - * @refcount: number of contexts the object is used - * @rcu: struct used for freeing in an RCU-safe manner */ struct batadv_neigh_node { + /** @list: list node for &batadv_orig_node.neigh_list */ struct hlist_node list; + + /** @orig_node: pointer to corresponding orig_node */ struct batadv_orig_node *orig_node; + + /** @addr: the MAC address of the neighboring interface */ u8 addr[ETH_ALEN]; + + /** @ifinfo_list: list for routing metrics per outgoing interface */ struct hlist_head ifinfo_list; - spinlock_t ifinfo_lock; /* protects ifinfo_list and its members */ + + /** @ifinfo_lock: lock protecting ifinfo_list and its members */ + spinlock_t ifinfo_lock; + + /** @if_incoming: pointer to incoming hard-interface */ struct batadv_hard_iface *if_incoming; + + /** @last_seen: when last packet via this neighbor was received */ unsigned long last_seen; + + /** @hardif_neigh: hardif_neigh of this neighbor */ struct batadv_hardif_neigh_node *hardif_neigh; + + /** @refcount: number of contexts the object is used */ struct kref refcount; + + /** @rcu: struct used for freeing in an RCU-safe manner */ struct rcu_head rcu; }; /** * struct batadv_neigh_ifinfo_bat_iv - neighbor information per outgoing * interface for B.A.T.M.A.N. IV - * @tq_recv: ring buffer of received TQ values from this neigh node - * @tq_index: ring buffer index - * @tq_avg: averaged tq of all tq values in the ring buffer (tq_recv) - * @real_bits: bitfield containing the number of OGMs received from this neigh - * node (relative to orig_node->last_real_seqno) - * @real_packet_count: counted result of real_bits */ struct batadv_neigh_ifinfo_bat_iv { + /** @tq_recv: ring buffer of received TQ values from this neigh node */ u8 tq_recv[BATADV_TQ_GLOBAL_WINDOW_SIZE]; + + /** @tq_index: ring buffer index */ u8 tq_index; + + /** + * @tq_avg: averaged tq of all tq values in the ring buffer (tq_recv) + */ u8 tq_avg; + + /** + * @real_bits: bitfield containing the number of OGMs received from this + * neigh node (relative to orig_node->last_real_seqno) + */ DECLARE_BITMAP(real_bits, BATADV_TQ_LOCAL_WINDOW_SIZE); + + /** @real_packet_count: counted result of real_bits */ u8 real_packet_count; }; /** * struct batadv_neigh_ifinfo_bat_v - neighbor information per outgoing * interface for B.A.T.M.A.N. V - * @throughput: last throughput metric received from originator via this neigh - * @last_seqno: last sequence number known for this neighbor */ struct batadv_neigh_ifinfo_bat_v { + /** + * @throughput: last throughput metric received from originator via this + * neigh + */ u32 throughput; + + /** @last_seqno: last sequence number known for this neighbor */ u32 last_seqno; }; /** * struct batadv_neigh_ifinfo - neighbor information per outgoing interface - * @list: list node for batadv_neigh_node::ifinfo_list - * @if_outgoing: pointer to outgoing hard-interface - * @bat_iv: B.A.T.M.A.N. IV private structure - * @bat_v: B.A.T.M.A.N. V private data - * @last_ttl: last received ttl from this neigh node - * @refcount: number of contexts the object is used - * @rcu: struct used for freeing in a RCU-safe manner */ struct batadv_neigh_ifinfo { + /** @list: list node for &batadv_neigh_node.ifinfo_list */ struct hlist_node list; + + /** @if_outgoing: pointer to outgoing hard-interface */ struct batadv_hard_iface *if_outgoing; + + /** @bat_iv: B.A.T.M.A.N. IV private structure */ struct batadv_neigh_ifinfo_bat_iv bat_iv; + #ifdef CONFIG_BATMAN_ADV_BATMAN_V + /** @bat_v: B.A.T.M.A.N. V private data */ struct batadv_neigh_ifinfo_bat_v bat_v; #endif + + /** @last_ttl: last received ttl from this neigh node */ u8 last_ttl; + + /** @refcount: number of contexts the object is used */ struct kref refcount; + + /** @rcu: struct used for freeing in a RCU-safe manner */ struct rcu_head rcu; }; @@ -526,148 +724,278 @@ struct batadv_neigh_ifinfo { /** * struct batadv_bcast_duplist_entry - structure for LAN broadcast suppression - * @orig: mac address of orig node orginating the broadcast - * @crc: crc32 checksum of broadcast payload - * @entrytime: time when the broadcast packet was received */ struct batadv_bcast_duplist_entry { + /** @orig: mac address of orig node orginating the broadcast */ u8 orig[ETH_ALEN]; + + /** @crc: crc32 checksum of broadcast payload */ __be32 crc; + + /** @entrytime: time when the broadcast packet was received */ unsigned long entrytime; }; #endif /** * enum batadv_counters - indices for traffic counters - * @BATADV_CNT_TX: transmitted payload traffic packet counter - * @BATADV_CNT_TX_BYTES: transmitted payload traffic bytes counter - * @BATADV_CNT_TX_DROPPED: dropped transmission payload traffic packet counter - * @BATADV_CNT_RX: received payload traffic packet counter - * @BATADV_CNT_RX_BYTES: received payload traffic bytes counter - * @BATADV_CNT_FORWARD: forwarded payload traffic packet counter - * @BATADV_CNT_FORWARD_BYTES: forwarded payload traffic bytes counter - * @BATADV_CNT_MGMT_TX: transmitted routing protocol traffic packet counter - * @BATADV_CNT_MGMT_TX_BYTES: transmitted routing protocol traffic bytes counter - * @BATADV_CNT_MGMT_RX: received routing protocol traffic packet counter - * @BATADV_CNT_MGMT_RX_BYTES: received routing protocol traffic bytes counter - * @BATADV_CNT_FRAG_TX: transmitted fragment traffic packet counter - * @BATADV_CNT_FRAG_TX_BYTES: transmitted fragment traffic bytes counter - * @BATADV_CNT_FRAG_RX: received fragment traffic packet counter - * @BATADV_CNT_FRAG_RX_BYTES: received fragment traffic bytes counter - * @BATADV_CNT_FRAG_FWD: forwarded fragment traffic packet counter - * @BATADV_CNT_FRAG_FWD_BYTES: forwarded fragment traffic bytes counter - * @BATADV_CNT_TT_REQUEST_TX: transmitted tt req traffic packet counter - * @BATADV_CNT_TT_REQUEST_RX: received tt req traffic packet counter - * @BATADV_CNT_TT_RESPONSE_TX: transmitted tt resp traffic packet counter - * @BATADV_CNT_TT_RESPONSE_RX: received tt resp traffic packet counter - * @BATADV_CNT_TT_ROAM_ADV_TX: transmitted tt roam traffic packet counter - * @BATADV_CNT_TT_ROAM_ADV_RX: received tt roam traffic packet counter - * @BATADV_CNT_DAT_GET_TX: transmitted dht GET traffic packet counter - * @BATADV_CNT_DAT_GET_RX: received dht GET traffic packet counter - * @BATADV_CNT_DAT_PUT_TX: transmitted dht PUT traffic packet counter - * @BATADV_CNT_DAT_PUT_RX: received dht PUT traffic packet counter - * @BATADV_CNT_DAT_CACHED_REPLY_TX: transmitted dat cache reply traffic packet - * counter - * @BATADV_CNT_NC_CODE: transmitted nc-combined traffic packet counter - * @BATADV_CNT_NC_CODE_BYTES: transmitted nc-combined traffic bytes counter - * @BATADV_CNT_NC_RECODE: transmitted nc-recombined traffic packet counter - * @BATADV_CNT_NC_RECODE_BYTES: transmitted nc-recombined traffic bytes counter - * @BATADV_CNT_NC_BUFFER: counter for packets buffered for later nc decoding - * @BATADV_CNT_NC_DECODE: received and nc-decoded traffic packet counter - * @BATADV_CNT_NC_DECODE_BYTES: received and nc-decoded traffic bytes counter - * @BATADV_CNT_NC_DECODE_FAILED: received and decode-failed traffic packet - * counter - * @BATADV_CNT_NC_SNIFFED: counter for nc-decoded packets received in promisc - * mode. - * @BATADV_CNT_NUM: number of traffic counters */ enum batadv_counters { + /** @BATADV_CNT_TX: transmitted payload traffic packet counter */ BATADV_CNT_TX, + + /** @BATADV_CNT_TX_BYTES: transmitted payload traffic bytes counter */ BATADV_CNT_TX_BYTES, + + /** + * @BATADV_CNT_TX_DROPPED: dropped transmission payload traffic packet + * counter + */ BATADV_CNT_TX_DROPPED, + + /** @BATADV_CNT_RX: received payload traffic packet counter */ BATADV_CNT_RX, + + /** @BATADV_CNT_RX_BYTES: received payload traffic bytes counter */ BATADV_CNT_RX_BYTES, + + /** @BATADV_CNT_FORWARD: forwarded payload traffic packet counter */ BATADV_CNT_FORWARD, + + /** + * @BATADV_CNT_FORWARD_BYTES: forwarded payload traffic bytes counter + */ BATADV_CNT_FORWARD_BYTES, + + /** + * @BATADV_CNT_MGMT_TX: transmitted routing protocol traffic packet + * counter + */ BATADV_CNT_MGMT_TX, + + /** + * @BATADV_CNT_MGMT_TX_BYTES: transmitted routing protocol traffic bytes + * counter + */ BATADV_CNT_MGMT_TX_BYTES, + + /** + * @BATADV_CNT_MGMT_RX: received routing protocol traffic packet counter + */ BATADV_CNT_MGMT_RX, + + /** + * @BATADV_CNT_MGMT_RX_BYTES: received routing protocol traffic bytes + * counter + */ BATADV_CNT_MGMT_RX_BYTES, + + /** @BATADV_CNT_FRAG_TX: transmitted fragment traffic packet counter */ BATADV_CNT_FRAG_TX, + + /** + * @BATADV_CNT_FRAG_TX_BYTES: transmitted fragment traffic bytes counter + */ BATADV_CNT_FRAG_TX_BYTES, + + /** @BATADV_CNT_FRAG_RX: received fragment traffic packet counter */ BATADV_CNT_FRAG_RX, + + /** + * @BATADV_CNT_FRAG_RX_BYTES: received fragment traffic bytes counter + */ BATADV_CNT_FRAG_RX_BYTES, + + /** @BATADV_CNT_FRAG_FWD: forwarded fragment traffic packet counter */ BATADV_CNT_FRAG_FWD, + + /** + * @BATADV_CNT_FRAG_FWD_BYTES: forwarded fragment traffic bytes counter + */ BATADV_CNT_FRAG_FWD_BYTES, + + /** + * @BATADV_CNT_TT_REQUEST_TX: transmitted tt req traffic packet counter + */ BATADV_CNT_TT_REQUEST_TX, + + /** @BATADV_CNT_TT_REQUEST_RX: received tt req traffic packet counter */ BATADV_CNT_TT_REQUEST_RX, + + /** + * @BATADV_CNT_TT_RESPONSE_TX: transmitted tt resp traffic packet + * counter + */ BATADV_CNT_TT_RESPONSE_TX, + + /** + * @BATADV_CNT_TT_RESPONSE_RX: received tt resp traffic packet counter + */ BATADV_CNT_TT_RESPONSE_RX, + + /** + * @BATADV_CNT_TT_ROAM_ADV_TX: transmitted tt roam traffic packet + * counter + */ BATADV_CNT_TT_ROAM_ADV_TX, + + /** + * @BATADV_CNT_TT_ROAM_ADV_RX: received tt roam traffic packet counter + */ BATADV_CNT_TT_ROAM_ADV_RX, + #ifdef CONFIG_BATMAN_ADV_DAT + /** + * @BATADV_CNT_DAT_GET_TX: transmitted dht GET traffic packet counter + */ BATADV_CNT_DAT_GET_TX, + + /** @BATADV_CNT_DAT_GET_RX: received dht GET traffic packet counter */ BATADV_CNT_DAT_GET_RX, + + /** + * @BATADV_CNT_DAT_PUT_TX: transmitted dht PUT traffic packet counter + */ BATADV_CNT_DAT_PUT_TX, + + /** @BATADV_CNT_DAT_PUT_RX: received dht PUT traffic packet counter */ BATADV_CNT_DAT_PUT_RX, + + /** + * @BATADV_CNT_DAT_CACHED_REPLY_TX: transmitted dat cache reply traffic + * packet counter + */ BATADV_CNT_DAT_CACHED_REPLY_TX, #endif + #ifdef CONFIG_BATMAN_ADV_NC + /** + * @BATADV_CNT_NC_CODE: transmitted nc-combined traffic packet counter + */ BATADV_CNT_NC_CODE, + + /** + * @BATADV_CNT_NC_CODE_BYTES: transmitted nc-combined traffic bytes + * counter + */ BATADV_CNT_NC_CODE_BYTES, + + /** + * @BATADV_CNT_NC_RECODE: transmitted nc-recombined traffic packet + * counter + */ BATADV_CNT_NC_RECODE, + + /** + * @BATADV_CNT_NC_RECODE_BYTES: transmitted nc-recombined traffic bytes + * counter + */ BATADV_CNT_NC_RECODE_BYTES, + + /** + * @BATADV_CNT_NC_BUFFER: counter for packets buffered for later nc + * decoding + */ BATADV_CNT_NC_BUFFER, + + /** + * @BATADV_CNT_NC_DECODE: received and nc-decoded traffic packet counter + */ BATADV_CNT_NC_DECODE, + + /** + * @BATADV_CNT_NC_DECODE_BYTES: received and nc-decoded traffic bytes + * counter + */ BATADV_CNT_NC_DECODE_BYTES, + + /** + * @BATADV_CNT_NC_DECODE_FAILED: received and decode-failed traffic + * packet counter + */ BATADV_CNT_NC_DECODE_FAILED, + + /** + * @BATADV_CNT_NC_SNIFFED: counter for nc-decoded packets received in + * promisc mode. + */ BATADV_CNT_NC_SNIFFED, #endif + + /** @BATADV_CNT_NUM: number of traffic counters */ BATADV_CNT_NUM, }; /** * struct batadv_priv_tt - per mesh interface translation table data - * @vn: translation table version number - * @ogm_append_cnt: counter of number of OGMs containing the local tt diff - * @local_changes: changes registered in an originator interval - * @changes_list: tracks tt local changes within an originator interval - * @local_hash: local translation table hash table - * @global_hash: global translation table hash table - * @req_list: list of pending & unanswered tt_requests - * @roam_list: list of the last roaming events of each client limiting the - * number of roaming events to avoid route flapping - * @changes_list_lock: lock protecting changes_list - * @req_list_lock: lock protecting req_list - * @roam_list_lock: lock protecting roam_list - * @last_changeset: last tt changeset this host has generated - * @last_changeset_len: length of last tt changeset this host has generated - * @last_changeset_lock: lock protecting last_changeset & last_changeset_len - * @commit_lock: prevents from executing a local TT commit while reading the - * local table. The local TT commit is made up by two operations (data - * structure update and metdata -CRC/TTVN- recalculation) and they have to be - * executed atomically in order to avoid another thread to read the - * table/metadata between those. - * @work: work queue callback item for translation table purging */ struct batadv_priv_tt { + /** @vn: translation table version number */ atomic_t vn; + + /** + * @ogm_append_cnt: counter of number of OGMs containing the local tt + * diff + */ atomic_t ogm_append_cnt; + + /** @local_changes: changes registered in an originator interval */ atomic_t local_changes; + + /** + * @changes_list: tracks tt local changes within an originator interval + */ struct list_head changes_list; + + /** @local_hash: local translation table hash table */ struct batadv_hashtable *local_hash; + + /** @global_hash: global translation table hash table */ struct batadv_hashtable *global_hash; + + /** @req_list: list of pending & unanswered tt_requests */ struct hlist_head req_list; + + /** + * @roam_list: list of the last roaming events of each client limiting + * the number of roaming events to avoid route flapping + */ struct list_head roam_list; - spinlock_t changes_list_lock; /* protects changes */ - spinlock_t req_list_lock; /* protects req_list */ - spinlock_t roam_list_lock; /* protects roam_list */ + + /** @changes_list_lock: lock protecting changes_list */ + spinlock_t changes_list_lock; + + /** @req_list_lock: lock protecting req_list */ + spinlock_t req_list_lock; + + /** @roam_list_lock: lock protecting roam_list */ + spinlock_t roam_list_lock; + + /** @last_changeset: last tt changeset this host has generated */ unsigned char *last_changeset; + + /** + * @last_changeset_len: length of last tt changeset this host has + * generated + */ s16 last_changeset_len; - /* protects last_changeset & last_changeset_len */ + + /** + * @last_changeset_lock: lock protecting last_changeset & + * last_changeset_len + */ spinlock_t last_changeset_lock; - /* prevents from executing a commit while reading the table */ + + /** + * @commit_lock: prevents from executing a local TT commit while reading + * the local table. The local TT commit is made up by two operations + * (data structure update and metdata -CRC/TTVN- recalculation) and + * they have to be executed atomically in order to avoid another thread + * to read the table/metadata between those. + */ spinlock_t commit_lock; + + /** @work: work queue callback item for translation table purging */ struct delayed_work work; }; @@ -675,31 +1003,57 @@ struct batadv_priv_tt { /** * struct batadv_priv_bla - per mesh interface bridge loope avoidance data - * @num_requests: number of bla requests in flight - * @claim_hash: hash table containing mesh nodes this host has claimed - * @backbone_hash: hash table containing all detected backbone gateways - * @loopdetect_addr: MAC address used for own loopdetection frames - * @loopdetect_lasttime: time when the loopdetection frames were sent - * @loopdetect_next: how many periods to wait for the next loopdetect process - * @bcast_duplist: recently received broadcast packets array (for broadcast - * duplicate suppression) - * @bcast_duplist_curr: index of last broadcast packet added to bcast_duplist - * @bcast_duplist_lock: lock protecting bcast_duplist & bcast_duplist_curr - * @claim_dest: local claim data (e.g. claim group) - * @work: work queue callback item for cleanups & bla announcements */ struct batadv_priv_bla { + /** @num_requests: number of bla requests in flight */ atomic_t num_requests; + + /** + * @claim_hash: hash table containing mesh nodes this host has claimed + */ struct batadv_hashtable *claim_hash; + + /** + * @backbone_hash: hash table containing all detected backbone gateways + */ struct batadv_hashtable *backbone_hash; + + /** @loopdetect_addr: MAC address used for own loopdetection frames */ u8 loopdetect_addr[ETH_ALEN]; + + /** + * @loopdetect_lasttime: time when the loopdetection frames were sent + */ unsigned long loopdetect_lasttime; + + /** + * @loopdetect_next: how many periods to wait for the next loopdetect + * process + */ atomic_t loopdetect_next; + + /** + * @bcast_duplist: recently received broadcast packets array (for + * broadcast duplicate suppression) + */ struct batadv_bcast_duplist_entry bcast_duplist[BATADV_DUPLIST_SIZE]; + + /** + * @bcast_duplist_curr: index of last broadcast packet added to + * bcast_duplist + */ int bcast_duplist_curr; - /* protects bcast_duplist & bcast_duplist_curr */ + + /** + * @bcast_duplist_lock: lock protecting bcast_duplist & + * bcast_duplist_curr + */ spinlock_t bcast_duplist_lock; + + /** @claim_dest: local claim data (e.g. claim group) */ struct batadv_bla_claim_dst claim_dest; + + /** @work: work queue callback item for cleanups & bla announcements */ struct delayed_work work; }; #endif @@ -708,68 +1062,94 @@ struct batadv_priv_bla { /** * struct batadv_priv_debug_log - debug logging data - * @log_buff: buffer holding the logs (ring bufer) - * @log_start: index of next character to read - * @log_end: index of next character to write - * @lock: lock protecting log_buff, log_start & log_end - * @queue_wait: log reader's wait queue */ struct batadv_priv_debug_log { + /** @log_buff: buffer holding the logs (ring bufer) */ char log_buff[BATADV_LOG_BUF_LEN]; + + /** @log_start: index of next character to read */ unsigned long log_start; + + /** @log_end: index of next character to write */ unsigned long log_end; - spinlock_t lock; /* protects log_buff, log_start and log_end */ + + /** @lock: lock protecting log_buff, log_start & log_end */ + spinlock_t lock; + + /** @queue_wait: log reader's wait queue */ wait_queue_head_t queue_wait; }; #endif /** * struct batadv_priv_gw - per mesh interface gateway data - * @gateway_list: list of available gateway nodes - * @list_lock: lock protecting gateway_list & curr_gw - * @curr_gw: pointer to currently selected gateway node - * @mode: gateway operation: off, client or server (see batadv_gw_modes) - * @sel_class: gateway selection class (applies if gw_mode client) - * @bandwidth_down: advertised uplink download bandwidth (if gw_mode server) - * @bandwidth_up: advertised uplink upload bandwidth (if gw_mode server) - * @reselect: bool indicating a gateway re-selection is in progress */ struct batadv_priv_gw { + /** @gateway_list: list of available gateway nodes */ struct hlist_head gateway_list; - spinlock_t list_lock; /* protects gateway_list & curr_gw */ - struct batadv_gw_node __rcu *curr_gw; /* rcu protected pointer */ + + /** @list_lock: lock protecting gateway_list & curr_gw */ + spinlock_t list_lock; + + /** @curr_gw: pointer to currently selected gateway node */ + struct batadv_gw_node __rcu *curr_gw; + + /** + * @mode: gateway operation: off, client or server (see batadv_gw_modes) + */ atomic_t mode; + + /** @sel_class: gateway selection class (applies if gw_mode client) */ atomic_t sel_class; + + /** + * @bandwidth_down: advertised uplink download bandwidth (if gw_mode + * server) + */ atomic_t bandwidth_down; + + /** + * @bandwidth_up: advertised uplink upload bandwidth (if gw_mode server) + */ atomic_t bandwidth_up; + + /** @reselect: bool indicating a gateway re-selection is in progress */ atomic_t reselect; }; /** * struct batadv_priv_tvlv - per mesh interface tvlv data - * @container_list: list of registered tvlv containers to be sent with each OGM - * @handler_list: list of the various tvlv content handlers - * @container_list_lock: protects tvlv container list access - * @handler_list_lock: protects handler list access */ struct batadv_priv_tvlv { + /** + * @container_list: list of registered tvlv containers to be sent with + * each OGM + */ struct hlist_head container_list; + + /** @handler_list: list of the various tvlv content handlers */ struct hlist_head handler_list; - spinlock_t container_list_lock; /* protects container_list */ - spinlock_t handler_list_lock; /* protects handler_list */ + + /** @container_list_lock: protects tvlv container list access */ + spinlock_t container_list_lock; + + /** @handler_list_lock: protects handler list access */ + spinlock_t handler_list_lock; }; #ifdef CONFIG_BATMAN_ADV_DAT /** * struct batadv_priv_dat - per mesh interface DAT private data - * @addr: node DAT address - * @hash: hashtable representing the local ARP cache - * @work: work queue callback item for cache purging */ struct batadv_priv_dat { + /** @addr: node DAT address */ batadv_dat_addr_t addr; + + /** @hash: hashtable representing the local ARP cache */ struct batadv_hashtable *hash; + + /** @work: work queue callback item for cache purging */ struct delayed_work work; }; #endif @@ -777,375 +1157,582 @@ struct batadv_priv_dat { #ifdef CONFIG_BATMAN_ADV_MCAST /** * struct batadv_mcast_querier_state - IGMP/MLD querier state when bridged - * @exists: whether a querier exists in the mesh - * @shadowing: if a querier exists, whether it is potentially shadowing - * multicast listeners (i.e. querier is behind our own bridge segment) */ struct batadv_mcast_querier_state { + /** @exists: whether a querier exists in the mesh */ bool exists; + + /** + * @shadowing: if a querier exists, whether it is potentially shadowing + * multicast listeners (i.e. querier is behind our own bridge segment) + */ bool shadowing; }; /** * struct batadv_priv_mcast - per mesh interface mcast data - * @mla_list: list of multicast addresses we are currently announcing via TT - * @want_all_unsnoopables_list: a list of orig_nodes wanting all unsnoopable - * multicast traffic - * @want_all_ipv4_list: a list of orig_nodes wanting all IPv4 multicast traffic - * @want_all_ipv6_list: a list of orig_nodes wanting all IPv6 multicast traffic - * @querier_ipv4: the current state of an IGMP querier in the mesh - * @querier_ipv6: the current state of an MLD querier in the mesh - * @flags: the flags we have last sent in our mcast tvlv - * @enabled: whether the multicast tvlv is currently enabled - * @bridged: whether the soft interface has a bridge on top - * @num_disabled: number of nodes that have no mcast tvlv - * @num_want_all_unsnoopables: number of nodes wanting unsnoopable IP traffic - * @num_want_all_ipv4: counter for items in want_all_ipv4_list - * @num_want_all_ipv6: counter for items in want_all_ipv6_list - * @want_lists_lock: lock for protecting modifications to mcast want lists - * (traversals are rcu-locked) - * @work: work queue callback item for multicast TT and TVLV updates */ struct batadv_priv_mcast { + /** + * @mla_list: list of multicast addresses we are currently announcing + * via TT + */ struct hlist_head mla_list; /* see __batadv_mcast_mla_update() */ + + /** + * @want_all_unsnoopables_list: a list of orig_nodes wanting all + * unsnoopable multicast traffic + */ struct hlist_head want_all_unsnoopables_list; + + /** + * @want_all_ipv4_list: a list of orig_nodes wanting all IPv4 multicast + * traffic + */ struct hlist_head want_all_ipv4_list; + + /** + * @want_all_ipv6_list: a list of orig_nodes wanting all IPv6 multicast + * traffic + */ struct hlist_head want_all_ipv6_list; + + /** @querier_ipv4: the current state of an IGMP querier in the mesh */ struct batadv_mcast_querier_state querier_ipv4; + + /** @querier_ipv6: the current state of an MLD querier in the mesh */ struct batadv_mcast_querier_state querier_ipv6; + + /** @flags: the flags we have last sent in our mcast tvlv */ u8 flags; + + /** @enabled: whether the multicast tvlv is currently enabled */ bool enabled; + + /** @bridged: whether the soft interface has a bridge on top */ bool bridged; + + /** @num_disabled: number of nodes that have no mcast tvlv */ atomic_t num_disabled; + + /** + * @num_want_all_unsnoopables: number of nodes wanting unsnoopable IP + * traffic + */ atomic_t num_want_all_unsnoopables; + + /** @num_want_all_ipv4: counter for items in want_all_ipv4_list */ atomic_t num_want_all_ipv4; + + /** @num_want_all_ipv6: counter for items in want_all_ipv6_list */ atomic_t num_want_all_ipv6; - /* protects want_all_{unsnoopables,ipv4,ipv6}_list */ + + /** + * @want_lists_lock: lock for protecting modifications to mcasts + * want_all_{unsnoopables,ipv4,ipv6}_list (traversals are rcu-locked) + */ spinlock_t want_lists_lock; + + /** @work: work queue callback item for multicast TT and TVLV updates */ struct delayed_work work; }; #endif /** * struct batadv_priv_nc - per mesh interface network coding private data - * @work: work queue callback item for cleanup - * @debug_dir: dentry for nc subdir in batman-adv directory in debugfs - * @min_tq: only consider neighbors for encoding if neigh_tq > min_tq - * @max_fwd_delay: maximum packet forward delay to allow coding of packets - * @max_buffer_time: buffer time for sniffed packets used to decoding - * @timestamp_fwd_flush: timestamp of last forward packet queue flush - * @timestamp_sniffed_purge: timestamp of last sniffed packet queue purge - * @coding_hash: Hash table used to buffer skbs while waiting for another - * incoming skb to code it with. Skbs are added to the buffer just before being - * forwarded in routing.c - * @decoding_hash: Hash table used to buffer skbs that might be needed to decode - * a received coded skb. The buffer is used for 1) skbs arriving on the - * soft-interface; 2) skbs overheard on the hard-interface; and 3) skbs - * forwarded by batman-adv. */ struct batadv_priv_nc { + /** @work: work queue callback item for cleanup */ struct delayed_work work; + + /** + * @debug_dir: dentry for nc subdir in batman-adv directory in debugfs + */ struct dentry *debug_dir; + + /** + * @min_tq: only consider neighbors for encoding if neigh_tq > min_tq + */ u8 min_tq; + + /** + * @max_fwd_delay: maximum packet forward delay to allow coding of + * packets + */ u32 max_fwd_delay; + + /** + * @max_buffer_time: buffer time for sniffed packets used to decoding + */ u32 max_buffer_time; + + /** + * @timestamp_fwd_flush: timestamp of last forward packet queue flush + */ unsigned long timestamp_fwd_flush; + + /** + * @timestamp_sniffed_purge: timestamp of last sniffed packet queue + * purge + */ unsigned long timestamp_sniffed_purge; + + /** + * @coding_hash: Hash table used to buffer skbs while waiting for + * another incoming skb to code it with. Skbs are added to the buffer + * just before being forwarded in routing.c + */ struct batadv_hashtable *coding_hash; + + /** + * @decoding_hash: Hash table used to buffer skbs that might be needed + * to decode a received coded skb. The buffer is used for 1) skbs + * arriving on the soft-interface; 2) skbs overheard on the + * hard-interface; and 3) skbs forwarded by batman-adv. + */ struct batadv_hashtable *decoding_hash; }; /** * struct batadv_tp_unacked - unacked packet meta-information - * @seqno: seqno of the unacked packet - * @len: length of the packet - * @list: list node for batadv_tp_vars::unacked_list * * This struct is supposed to represent a buffer unacked packet. However, since * the purpose of the TP meter is to count the traffic only, there is no need to * store the entire sk_buff, the starting offset and the length are enough */ struct batadv_tp_unacked { + /** @seqno: seqno of the unacked packet */ u32 seqno; + + /** @len: length of the packet */ u16 len; + + /** @list: list node for &batadv_tp_vars.unacked_list */ struct list_head list; }; /** * enum batadv_tp_meter_role - Modus in tp meter session - * @BATADV_TP_RECEIVER: Initialized as receiver - * @BATADV_TP_SENDER: Initialized as sender */ enum batadv_tp_meter_role { + /** @BATADV_TP_RECEIVER: Initialized as receiver */ BATADV_TP_RECEIVER, + + /** @BATADV_TP_SENDER: Initialized as sender */ BATADV_TP_SENDER }; /** * struct batadv_tp_vars - tp meter private variables per session - * @list: list node for bat_priv::tp_list - * @timer: timer for ack (receiver) and retry (sender) - * @bat_priv: pointer to the mesh object - * @start_time: start time in jiffies - * @other_end: mac address of remote - * @role: receiver/sender modi - * @sending: sending binary semaphore: 1 if sending, 0 is not - * @reason: reason for a stopped session - * @finish_work: work item for the finishing procedure - * @test_length: test length in milliseconds - * @session: TP session identifier - * @icmp_uid: local ICMP "socket" index - * @dec_cwnd: decimal part of the cwnd used during linear growth - * @cwnd: current size of the congestion window - * @cwnd_lock: lock do protect @cwnd & @dec_cwnd - * @ss_threshold: Slow Start threshold. Once cwnd exceeds this value the - * connection switches to the Congestion Avoidance state - * @last_acked: last acked byte - * @last_sent: last sent byte, not yet acked - * @tot_sent: amount of data sent/ACKed so far - * @dup_acks: duplicate ACKs counter - * @fast_recovery: true if in Fast Recovery mode - * @recover: last sent seqno when entering Fast Recovery - * @rto: sender timeout - * @srtt: smoothed RTT scaled by 2^3 - * @rttvar: RTT variation scaled by 2^2 - * @more_bytes: waiting queue anchor when waiting for more ack/retry timeout - * @prerandom_offset: offset inside the prerandom buffer - * @prerandom_lock: spinlock protecting access to prerandom_offset - * @last_recv: last in-order received packet - * @unacked_list: list of unacked packets (meta-info only) - * @unacked_lock: protect unacked_list - * @last_recv_time: time time (jiffies) a msg was received - * @refcount: number of context where the object is used - * @rcu: struct used for freeing in an RCU-safe manner */ struct batadv_tp_vars { + /** @list: list node for &bat_priv.tp_list */ struct hlist_node list; + + /** @timer: timer for ack (receiver) and retry (sender) */ struct timer_list timer; + + /** @bat_priv: pointer to the mesh object */ struct batadv_priv *bat_priv; + + /** @start_time: start time in jiffies */ unsigned long start_time; + + /** @other_end: mac address of remote */ u8 other_end[ETH_ALEN]; + + /** @role: receiver/sender modi */ enum batadv_tp_meter_role role; + + /** @sending: sending binary semaphore: 1 if sending, 0 is not */ atomic_t sending; + + /** @reason: reason for a stopped session */ enum batadv_tp_meter_reason reason; + + /** @finish_work: work item for the finishing procedure */ struct delayed_work finish_work; + + /** @test_length: test length in milliseconds */ u32 test_length; + + /** @session: TP session identifier */ u8 session[2]; + + /** @icmp_uid: local ICMP "socket" index */ u8 icmp_uid; /* sender variables */ + + /** @dec_cwnd: decimal part of the cwnd used during linear growth */ u16 dec_cwnd; + + /** @cwnd: current size of the congestion window */ u32 cwnd; - spinlock_t cwnd_lock; /* Protects cwnd & dec_cwnd */ + + /** @cwnd_lock: lock do protect @cwnd & @dec_cwnd */ + spinlock_t cwnd_lock; + + /** + * @ss_threshold: Slow Start threshold. Once cwnd exceeds this value the + * connection switches to the Congestion Avoidance state + */ u32 ss_threshold; + + /** @last_acked: last acked byte */ atomic_t last_acked; + + /** @last_sent: last sent byte, not yet acked */ u32 last_sent; + + /** @tot_sent: amount of data sent/ACKed so far */ atomic64_t tot_sent; + + /** @dup_acks: duplicate ACKs counter */ atomic_t dup_acks; + + /** @fast_recovery: true if in Fast Recovery mode */ bool fast_recovery; + + /** @recover: last sent seqno when entering Fast Recovery */ u32 recover; + + /** @rto: sender timeout */ u32 rto; + + /** @srtt: smoothed RTT scaled by 2^3 */ u32 srtt; + + /** @rttvar: RTT variation scaled by 2^2 */ u32 rttvar; + + /** + * @more_bytes: waiting queue anchor when waiting for more ack/retry + * timeout + */ wait_queue_head_t more_bytes; + + /** @prerandom_offset: offset inside the prerandom buffer */ u32 prerandom_offset; - spinlock_t prerandom_lock; /* Protects prerandom_offset */ + + /** @prerandom_lock: spinlock protecting access to prerandom_offset */ + spinlock_t prerandom_lock; /* receiver variables */ + + /** @last_recv: last in-order received packet */ u32 last_recv; + + /** @unacked_list: list of unacked packets (meta-info only) */ struct list_head unacked_list; - spinlock_t unacked_lock; /* Protects unacked_list */ + + /** @unacked_lock: protect unacked_list */ + spinlock_t unacked_lock; + + /** @last_recv_time: time time (jiffies) a msg was received */ unsigned long last_recv_time; + + /** @refcount: number of context where the object is used */ struct kref refcount; + + /** @rcu: struct used for freeing in an RCU-safe manner */ struct rcu_head rcu; }; /** * struct batadv_softif_vlan - per VLAN attributes set - * @bat_priv: pointer to the mesh object - * @vid: VLAN identifier - * @kobj: kobject for sysfs vlan subdirectory - * @ap_isolation: AP isolation state - * @tt: TT private attributes (VLAN specific) - * @list: list node for bat_priv::softif_vlan_list - * @refcount: number of context where this object is currently in use - * @rcu: struct used for freeing in a RCU-safe manner */ struct batadv_softif_vlan { + /** @bat_priv: pointer to the mesh object */ struct batadv_priv *bat_priv; + + /** @vid: VLAN identifier */ unsigned short vid; + + /** @kobj: kobject for sysfs vlan subdirectory */ struct kobject *kobj; + + /** @ap_isolation: AP isolation state */ atomic_t ap_isolation; /* boolean */ + + /** @tt: TT private attributes (VLAN specific) */ struct batadv_vlan_tt tt; + + /** @list: list node for &bat_priv.softif_vlan_list */ struct hlist_node list; + + /** + * @refcount: number of context where this object is currently in use + */ struct kref refcount; + + /** @rcu: struct used for freeing in a RCU-safe manner */ struct rcu_head rcu; }; /** * struct batadv_priv_bat_v - B.A.T.M.A.N. V per soft-interface private data - * @ogm_buff: buffer holding the OGM packet - * @ogm_buff_len: length of the OGM packet buffer - * @ogm_seqno: OGM sequence number - used to identify each OGM - * @ogm_wq: workqueue used to schedule OGM transmissions */ struct batadv_priv_bat_v { + /** @ogm_buff: buffer holding the OGM packet */ unsigned char *ogm_buff; + + /** @ogm_buff_len: length of the OGM packet buffer */ int ogm_buff_len; + + /** @ogm_seqno: OGM sequence number - used to identify each OGM */ atomic_t ogm_seqno; + + /** @ogm_wq: workqueue used to schedule OGM transmissions */ struct delayed_work ogm_wq; }; /** * struct batadv_priv - per mesh interface data - * @mesh_state: current status of the mesh (inactive/active/deactivating) - * @soft_iface: net device which holds this struct as private data - * @bat_counters: mesh internal traffic statistic counters (see batadv_counters) - * @aggregated_ogms: bool indicating whether OGM aggregation is enabled - * @bonding: bool indicating whether traffic bonding is enabled - * @fragmentation: bool indicating whether traffic fragmentation is enabled - * @packet_size_max: max packet size that can be transmitted via - * multiple fragmented skbs or a single frame if fragmentation is disabled - * @frag_seqno: incremental counter to identify chains of egress fragments - * @bridge_loop_avoidance: bool indicating whether bridge loop avoidance is - * enabled - * @distributed_arp_table: bool indicating whether distributed ARP table is - * enabled - * @multicast_mode: Enable or disable multicast optimizations on this node's - * sender/originating side - * @orig_interval: OGM broadcast interval in milliseconds - * @hop_penalty: penalty which will be applied to an OGM's tq-field on every hop - * @log_level: configured log level (see batadv_dbg_level) - * @isolation_mark: the skb->mark value used to match packets for AP isolation - * @isolation_mark_mask: bitmask identifying the bits in skb->mark to be used - * for the isolation mark - * @bcast_seqno: last sent broadcast packet sequence number - * @bcast_queue_left: number of remaining buffered broadcast packet slots - * @batman_queue_left: number of remaining OGM packet slots - * @num_ifaces: number of interfaces assigned to this mesh interface - * @mesh_obj: kobject for sysfs mesh subdirectory - * @debug_dir: dentry for debugfs batman-adv subdirectory - * @forw_bat_list: list of aggregated OGMs that will be forwarded - * @forw_bcast_list: list of broadcast packets that will be rebroadcasted - * @tp_list: list of tp sessions - * @tp_num: number of currently active tp sessions - * @orig_hash: hash table containing mesh participants (orig nodes) - * @forw_bat_list_lock: lock protecting forw_bat_list - * @forw_bcast_list_lock: lock protecting forw_bcast_list - * @tp_list_lock: spinlock protecting @tp_list - * @orig_work: work queue callback item for orig node purging - * @primary_if: one of the hard-interfaces assigned to this mesh interface - * becomes the primary interface - * @algo_ops: routing algorithm used by this mesh interface - * @softif_vlan_list: a list of softif_vlan structs, one per VLAN created on top - * of the mesh interface represented by this object - * @softif_vlan_list_lock: lock protecting softif_vlan_list - * @bla: bridge loope avoidance data - * @debug_log: holding debug logging relevant data - * @gw: gateway data - * @tt: translation table data - * @tvlv: type-version-length-value data - * @dat: distributed arp table data - * @mcast: multicast data - * @network_coding: bool indicating whether network coding is enabled - * @nc: network coding data - * @bat_v: B.A.T.M.A.N. V per soft-interface private data */ struct batadv_priv { + /** + * @mesh_state: current status of the mesh + * (inactive/active/deactivating) + */ atomic_t mesh_state; + + /** @soft_iface: net device which holds this struct as private data */ struct net_device *soft_iface; + + /** + * @bat_counters: mesh internal traffic statistic counters (see + * batadv_counters) + */ u64 __percpu *bat_counters; /* Per cpu counters */ + + /** + * @aggregated_ogms: bool indicating whether OGM aggregation is enabled + */ atomic_t aggregated_ogms; + + /** @bonding: bool indicating whether traffic bonding is enabled */ atomic_t bonding; + + /** + * @fragmentation: bool indicating whether traffic fragmentation is + * enabled + */ atomic_t fragmentation; + + /** + * @packet_size_max: max packet size that can be transmitted via + * multiple fragmented skbs or a single frame if fragmentation is + * disabled + */ atomic_t packet_size_max; + + /** + * @frag_seqno: incremental counter to identify chains of egress + * fragments + */ atomic_t frag_seqno; + #ifdef CONFIG_BATMAN_ADV_BLA + /** + * @bridge_loop_avoidance: bool indicating whether bridge loop + * avoidance is enabled + */ atomic_t bridge_loop_avoidance; #endif + #ifdef CONFIG_BATMAN_ADV_DAT + /** + * @distributed_arp_table: bool indicating whether distributed ARP table + * is enabled + */ atomic_t distributed_arp_table; #endif + #ifdef CONFIG_BATMAN_ADV_MCAST + /** + * @multicast_mode: Enable or disable multicast optimizations on this + * node's sender/originating side + */ atomic_t multicast_mode; #endif + + /** @orig_interval: OGM broadcast interval in milliseconds */ atomic_t orig_interval; + + /** + * @hop_penalty: penalty which will be applied to an OGM's tq-field on + * every hop + */ atomic_t hop_penalty; + #ifdef CONFIG_BATMAN_ADV_DEBUG + /** @log_level: configured log level (see batadv_dbg_level) */ atomic_t log_level; #endif + + /** + * @isolation_mark: the skb->mark value used to match packets for AP + * isolation + */ u32 isolation_mark; + + /** + * @isolation_mark_mask: bitmask identifying the bits in skb->mark to be + * used for the isolation mark + */ u32 isolation_mark_mask; + + /** @bcast_seqno: last sent broadcast packet sequence number */ atomic_t bcast_seqno; + + /** + * @bcast_queue_left: number of remaining buffered broadcast packet + * slots + */ atomic_t bcast_queue_left; + + /** @batman_queue_left: number of remaining OGM packet slots */ atomic_t batman_queue_left; + + /** @num_ifaces: number of interfaces assigned to this mesh interface */ char num_ifaces; + + /** @mesh_obj: kobject for sysfs mesh subdirectory */ struct kobject *mesh_obj; + + /** @debug_dir: dentry for debugfs batman-adv subdirectory */ struct dentry *debug_dir; + + /** @forw_bat_list: list of aggregated OGMs that will be forwarded */ struct hlist_head forw_bat_list; + + /** + * @forw_bcast_list: list of broadcast packets that will be + * rebroadcasted + */ struct hlist_head forw_bcast_list; + + /** @tp_list: list of tp sessions */ struct hlist_head tp_list; + + /** @tp_num: number of currently active tp sessions */ struct batadv_hashtable *orig_hash; - spinlock_t forw_bat_list_lock; /* protects forw_bat_list */ - spinlock_t forw_bcast_list_lock; /* protects forw_bcast_list */ - spinlock_t tp_list_lock; /* protects tp_list */ + + /** @orig_hash: hash table containing mesh participants (orig nodes) */ + spinlock_t forw_bat_list_lock; + + /** @forw_bat_list_lock: lock protecting forw_bat_list */ + spinlock_t forw_bcast_list_lock; + + /** @forw_bcast_list_lock: lock protecting forw_bcast_list */ + spinlock_t tp_list_lock; + + /** @tp_list_lock: spinlock protecting @tp_list */ atomic_t tp_num; + + /** @orig_work: work queue callback item for orig node purging */ struct delayed_work orig_work; + + /** + * @primary_if: one of the hard-interfaces assigned to this mesh + * interface becomes the primary interface + */ struct batadv_hard_iface __rcu *primary_if; /* rcu protected pointer */ + + /** @algo_ops: routing algorithm used by this mesh interface */ struct batadv_algo_ops *algo_ops; + + /** + * @softif_vlan_list: a list of softif_vlan structs, one per VLAN + * created on top of the mesh interface represented by this object + */ struct hlist_head softif_vlan_list; - spinlock_t softif_vlan_list_lock; /* protects softif_vlan_list */ + + /** @softif_vlan_list_lock: lock protecting softif_vlan_list */ + spinlock_t softif_vlan_list_lock; + #ifdef CONFIG_BATMAN_ADV_BLA + /** @bla: bridge loope avoidance data */ struct batadv_priv_bla bla; #endif + #ifdef CONFIG_BATMAN_ADV_DEBUG + /** @debug_log: holding debug logging relevant data */ struct batadv_priv_debug_log *debug_log; #endif + + /** @gw: gateway data */ struct batadv_priv_gw gw; + + /** @tt: translation table data */ struct batadv_priv_tt tt; + + /** @tvlv: type-version-length-value data */ struct batadv_priv_tvlv tvlv; + #ifdef CONFIG_BATMAN_ADV_DAT + /** @dat: distributed arp table data */ struct batadv_priv_dat dat; #endif + #ifdef CONFIG_BATMAN_ADV_MCAST + /** @mcast: multicast data */ struct batadv_priv_mcast mcast; #endif + #ifdef CONFIG_BATMAN_ADV_NC + /** + * @network_coding: bool indicating whether network coding is enabled + */ atomic_t network_coding; + + /** @nc: network coding data */ struct batadv_priv_nc nc; #endif /* CONFIG_BATMAN_ADV_NC */ + #ifdef CONFIG_BATMAN_ADV_BATMAN_V + /** @bat_v: B.A.T.M.A.N. V per soft-interface private data */ struct batadv_priv_bat_v bat_v; #endif }; /** * struct batadv_socket_client - layer2 icmp socket client data - * @queue_list: packet queue for packets destined for this socket client - * @queue_len: number of packets in the packet queue (queue_list) - * @index: socket client's index in the batadv_socket_client_hash - * @lock: lock protecting queue_list, queue_len & index - * @queue_wait: socket client's wait queue - * @bat_priv: pointer to soft_iface this client belongs to */ struct batadv_socket_client { + /** + * @queue_list: packet queue for packets destined for this socket client + */ struct list_head queue_list; + + /** @queue_len: number of packets in the packet queue (queue_list) */ unsigned int queue_len; + + /** @index: socket client's index in the batadv_socket_client_hash */ unsigned char index; - spinlock_t lock; /* protects queue_list, queue_len & index */ + + /** @lock: lock protecting queue_list, queue_len & index */ + spinlock_t lock; + + /** @queue_wait: socket client's wait queue */ wait_queue_head_t queue_wait; + + /** @bat_priv: pointer to soft_iface this client belongs to */ struct batadv_priv *bat_priv; }; /** * struct batadv_socket_packet - layer2 icmp packet for socket client - * @list: list node for batadv_socket_client::queue_list - * @icmp_len: size of the layer2 icmp packet - * @icmp_packet: layer2 icmp packet */ struct batadv_socket_packet { + /** @list: list node for &batadv_socket_client.queue_list */ struct list_head list; + + /** @icmp_len: size of the layer2 icmp packet */ size_t icmp_len; + + /** @icmp_packet: layer2 icmp packet */ u8 icmp_packet[BATADV_ICMP_MAX_PACKET_SIZE]; }; @@ -1153,312 +1740,432 @@ struct batadv_socket_packet { /** * struct batadv_bla_backbone_gw - batman-adv gateway bridged into the LAN - * @orig: originator address of backbone node (mac address of primary iface) - * @vid: vlan id this gateway was detected on - * @hash_entry: hlist node for batadv_priv_bla::backbone_hash - * @bat_priv: pointer to soft_iface this backbone gateway belongs to - * @lasttime: last time we heard of this backbone gw - * @wait_periods: grace time for bridge forward delays and bla group forming at - * bootup phase - no bcast traffic is formwared until it has elapsed - * @request_sent: if this bool is set to true we are out of sync with this - * backbone gateway - no bcast traffic is formwared until the situation was - * resolved - * @crc: crc16 checksum over all claims - * @crc_lock: lock protecting crc - * @report_work: work struct for reporting detected loops - * @refcount: number of contexts the object is used - * @rcu: struct used for freeing in an RCU-safe manner */ struct batadv_bla_backbone_gw { + /** + * @orig: originator address of backbone node (mac address of primary + * iface) + */ u8 orig[ETH_ALEN]; + + /** @vid: vlan id this gateway was detected on */ unsigned short vid; + + /** @hash_entry: hlist node for &batadv_priv_bla.backbone_hash */ struct hlist_node hash_entry; + + /** @bat_priv: pointer to soft_iface this backbone gateway belongs to */ struct batadv_priv *bat_priv; + + /** @lasttime: last time we heard of this backbone gw */ unsigned long lasttime; + + /** + * @wait_periods: grace time for bridge forward delays and bla group + * forming at bootup phase - no bcast traffic is formwared until it has + * elapsed + */ atomic_t wait_periods; + + /** + * @request_sent: if this bool is set to true we are out of sync with + * this backbone gateway - no bcast traffic is formwared until the + * situation was resolved + */ atomic_t request_sent; + + /** @crc: crc16 checksum over all claims */ u16 crc; - spinlock_t crc_lock; /* protects crc */ + + /** @crc_lock: lock protecting crc */ + spinlock_t crc_lock; + + /** @report_work: work struct for reporting detected loops */ struct work_struct report_work; + + /** @refcount: number of contexts the object is used */ struct kref refcount; + + /** @rcu: struct used for freeing in an RCU-safe manner */ struct rcu_head rcu; }; /** * struct batadv_bla_claim - claimed non-mesh client structure - * @addr: mac address of claimed non-mesh client - * @vid: vlan id this client was detected on - * @backbone_gw: pointer to backbone gw claiming this client - * @backbone_lock: lock protecting backbone_gw pointer - * @lasttime: last time we heard of claim (locals only) - * @hash_entry: hlist node for batadv_priv_bla::claim_hash - * @refcount: number of contexts the object is used - * @rcu: struct used for freeing in an RCU-safe manner */ struct batadv_bla_claim { + /** @addr: mac address of claimed non-mesh client */ u8 addr[ETH_ALEN]; + + /** @vid: vlan id this client was detected on */ unsigned short vid; + + /** @backbone_gw: pointer to backbone gw claiming this client */ struct batadv_bla_backbone_gw *backbone_gw; - spinlock_t backbone_lock; /* protects backbone_gw */ + + /** @backbone_lock: lock protecting backbone_gw pointer */ + spinlock_t backbone_lock; + + /** @lasttime: last time we heard of claim (locals only) */ unsigned long lasttime; + + /** @hash_entry: hlist node for &batadv_priv_bla.claim_hash */ struct hlist_node hash_entry; + + /** @refcount: number of contexts the object is used */ struct rcu_head rcu; + + /** @rcu: struct used for freeing in an RCU-safe manner */ struct kref refcount; }; #endif /** * struct batadv_tt_common_entry - tt local & tt global common data - * @addr: mac address of non-mesh client - * @vid: VLAN identifier - * @hash_entry: hlist node for batadv_priv_tt::local_hash or for - * batadv_priv_tt::global_hash - * @flags: various state handling flags (see batadv_tt_client_flags) - * @added_at: timestamp used for purging stale tt common entries - * @refcount: number of contexts the object is used - * @rcu: struct used for freeing in an RCU-safe manner */ struct batadv_tt_common_entry { + /** @addr: mac address of non-mesh client */ u8 addr[ETH_ALEN]; + + /** @vid: VLAN identifier */ unsigned short vid; + + /** + * @hash_entry: hlist node for &batadv_priv_tt.local_hash or for + * &batadv_priv_tt.global_hash + */ struct hlist_node hash_entry; + + /** @flags: various state handling flags (see batadv_tt_client_flags) */ u16 flags; + + /** @added_at: timestamp used for purging stale tt common entries */ unsigned long added_at; + + /** @refcount: number of contexts the object is used */ struct kref refcount; + + /** @rcu: struct used for freeing in an RCU-safe manner */ struct rcu_head rcu; }; /** * struct batadv_tt_local_entry - translation table local entry data - * @common: general translation table data - * @last_seen: timestamp used for purging stale tt local entries - * @vlan: soft-interface vlan of the entry */ struct batadv_tt_local_entry { + /** @common: general translation table data */ struct batadv_tt_common_entry common; + + /** @last_seen: timestamp used for purging stale tt local entries */ unsigned long last_seen; + + /** @vlan: soft-interface vlan of the entry */ struct batadv_softif_vlan *vlan; }; /** * struct batadv_tt_global_entry - translation table global entry data - * @common: general translation table data - * @orig_list: list of orig nodes announcing this non-mesh client - * @orig_list_count: number of items in the orig_list - * @list_lock: lock protecting orig_list - * @roam_at: time at which TT_GLOBAL_ROAM was set */ struct batadv_tt_global_entry { + /** @common: general translation table data */ struct batadv_tt_common_entry common; + + /** @orig_list: list of orig nodes announcing this non-mesh client */ struct hlist_head orig_list; + + /** @orig_list_count: number of items in the orig_list */ atomic_t orig_list_count; - spinlock_t list_lock; /* protects orig_list */ + + /** @list_lock: lock protecting orig_list */ + spinlock_t list_lock; + + /** @roam_at: time at which TT_GLOBAL_ROAM was set */ unsigned long roam_at; }; /** * struct batadv_tt_orig_list_entry - orig node announcing a non-mesh client - * @orig_node: pointer to orig node announcing this non-mesh client - * @ttvn: translation table version number which added the non-mesh client - * @flags: per orig entry TT sync flags - * @list: list node for batadv_tt_global_entry::orig_list - * @refcount: number of contexts the object is used - * @rcu: struct used for freeing in an RCU-safe manner */ struct batadv_tt_orig_list_entry { + /** @orig_node: pointer to orig node announcing this non-mesh client */ struct batadv_orig_node *orig_node; + + /** + * @ttvn: translation table version number which added the non-mesh + * client + */ u8 ttvn; + + /** @flags: per orig entry TT sync flags */ u8 flags; + + /** @list: list node for &batadv_tt_global_entry.orig_list */ struct hlist_node list; + + /** @refcount: number of contexts the object is used */ struct kref refcount; + + /** @rcu: struct used for freeing in an RCU-safe manner */ struct rcu_head rcu; }; /** * struct batadv_tt_change_node - structure for tt changes occurred - * @list: list node for batadv_priv_tt::changes_list - * @change: holds the actual translation table diff data */ struct batadv_tt_change_node { + /** @list: list node for &batadv_priv_tt.changes_list */ struct list_head list; + + /** @change: holds the actual translation table diff data */ struct batadv_tvlv_tt_change change; }; /** * struct batadv_tt_req_node - data to keep track of the tt requests in flight - * @addr: mac address address of the originator this request was sent to - * @issued_at: timestamp used for purging stale tt requests - * @refcount: number of contexts the object is used by - * @list: list node for batadv_priv_tt::req_list */ struct batadv_tt_req_node { + /** + * @addr: mac address address of the originator this request was sent to + */ u8 addr[ETH_ALEN]; + + /** @issued_at: timestamp used for purging stale tt requests */ unsigned long issued_at; + + /** @refcount: number of contexts the object is used by */ struct kref refcount; + + /** @list: list node for &batadv_priv_tt.req_list */ struct hlist_node list; }; /** * struct batadv_tt_roam_node - roaming client data - * @addr: mac address of the client in the roaming phase - * @counter: number of allowed roaming events per client within a single - * OGM interval (changes are committed with each OGM) - * @first_time: timestamp used for purging stale roaming node entries - * @list: list node for batadv_priv_tt::roam_list */ struct batadv_tt_roam_node { + /** @addr: mac address of the client in the roaming phase */ u8 addr[ETH_ALEN]; + + /** + * @counter: number of allowed roaming events per client within a single + * OGM interval (changes are committed with each OGM) + */ atomic_t counter; + + /** + * @first_time: timestamp used for purging stale roaming node entries + */ unsigned long first_time; + + /** @list: list node for &batadv_priv_tt.roam_list */ struct list_head list; }; /** * struct batadv_nc_node - network coding node - * @list: next and prev pointer for the list handling - * @addr: the node's mac address - * @refcount: number of contexts the object is used by - * @rcu: struct used for freeing in an RCU-safe manner - * @orig_node: pointer to corresponding orig node struct - * @last_seen: timestamp of last ogm received from this node */ struct batadv_nc_node { + /** @list: next and prev pointer for the list handling */ struct list_head list; + + /** @addr: the node's mac address */ u8 addr[ETH_ALEN]; + + /** @refcount: number of contexts the object is used by */ struct kref refcount; + + /** @rcu: struct used for freeing in an RCU-safe manner */ struct rcu_head rcu; + + /** @orig_node: pointer to corresponding orig node struct */ struct batadv_orig_node *orig_node; + + /** @last_seen: timestamp of last ogm received from this node */ unsigned long last_seen; }; /** * struct batadv_nc_path - network coding path - * @hash_entry: next and prev pointer for the list handling - * @rcu: struct used for freeing in an RCU-safe manner - * @refcount: number of contexts the object is used by - * @packet_list: list of buffered packets for this path - * @packet_list_lock: access lock for packet list - * @next_hop: next hop (destination) of path - * @prev_hop: previous hop (source) of path - * @last_valid: timestamp for last validation of path */ struct batadv_nc_path { + /** @hash_entry: next and prev pointer for the list handling */ struct hlist_node hash_entry; + + /** @rcu: struct used for freeing in an RCU-safe manner */ struct rcu_head rcu; + + /** @refcount: number of contexts the object is used by */ struct kref refcount; + + /** @packet_list: list of buffered packets for this path */ struct list_head packet_list; - spinlock_t packet_list_lock; /* Protects packet_list */ + + /** @packet_list_lock: access lock for packet list */ + spinlock_t packet_list_lock; + + /** @next_hop: next hop (destination) of path */ u8 next_hop[ETH_ALEN]; + + /** @prev_hop: previous hop (source) of path */ u8 prev_hop[ETH_ALEN]; + + /** @last_valid: timestamp for last validation of path */ unsigned long last_valid; }; /** * struct batadv_nc_packet - network coding packet used when coding and * decoding packets - * @list: next and prev pointer for the list handling - * @packet_id: crc32 checksum of skb data - * @timestamp: field containing the info when the packet was added to path - * @neigh_node: pointer to original next hop neighbor of skb - * @skb: skb which can be encoded or used for decoding - * @nc_path: pointer to path this nc packet is attached to */ struct batadv_nc_packet { + /** @list: next and prev pointer for the list handling */ struct list_head list; + + /** @packet_id: crc32 checksum of skb data */ __be32 packet_id; + + /** + * @timestamp: field containing the info when the packet was added to + * path + */ unsigned long timestamp; + + /** @neigh_node: pointer to original next hop neighbor of skb */ struct batadv_neigh_node *neigh_node; + + /** @skb: skb which can be encoded or used for decoding */ struct sk_buff *skb; + + /** @nc_path: pointer to path this nc packet is attached to */ struct batadv_nc_path *nc_path; }; /** * struct batadv_skb_cb - control buffer structure used to store private data * relevant to batman-adv in the skb->cb buffer in skbs. - * @decoded: Marks a skb as decoded, which is checked when searching for coding - * opportunities in network-coding.c - * @num_bcasts: Counter for broadcast packet retransmissions */ struct batadv_skb_cb { + /** + * @decoded: Marks a skb as decoded, which is checked when searching for + * coding opportunities in network-coding.c + */ bool decoded; + + /** @num_bcasts: Counter for broadcast packet retransmissions */ unsigned int num_bcasts; }; /** * struct batadv_forw_packet - structure for bcast packets to be sent/forwarded - * @list: list node for batadv_priv::forw_{bat,bcast}_list - * @cleanup_list: list node for purging functions - * @send_time: execution time for delayed_work (packet sending) - * @own: bool for locally generated packets (local OGMs are re-scheduled after - * sending) - * @skb: bcast packet's skb buffer - * @packet_len: size of aggregated OGM packet inside the skb buffer - * @direct_link_flags: direct link flags for aggregated OGM packets - * @num_packets: counter for aggregated OGMv1 packets - * @delayed_work: work queue callback item for packet sending - * @if_incoming: pointer to incoming hard-iface or primary iface if - * locally generated packet - * @if_outgoing: packet where the packet should be sent to, or NULL if - * unspecified - * @queue_left: The queue (counter) this packet was applied to */ struct batadv_forw_packet { + /** + * @list: list node for &batadv_priv.forw.bcast_list and + * &batadv_priv.forw.bat_list + */ struct hlist_node list; + + /** @cleanup_list: list node for purging functions */ struct hlist_node cleanup_list; + + /** @send_time: execution time for delayed_work (packet sending) */ unsigned long send_time; + + /** + * @own: bool for locally generated packets (local OGMs are re-scheduled + * after sending) + */ u8 own; + + /** @skb: bcast packet's skb buffer */ struct sk_buff *skb; + + /** @packet_len: size of aggregated OGM packet inside the skb buffer */ u16 packet_len; + + /** @direct_link_flags: direct link flags for aggregated OGM packets */ u32 direct_link_flags; + + /** @num_packets: counter for aggregated OGMv1 packets */ u8 num_packets; + + /** @delayed_work: work queue callback item for packet sending */ struct delayed_work delayed_work; + + /** + * @if_incoming: pointer to incoming hard-iface or primary iface if + * locally generated packet + */ struct batadv_hard_iface *if_incoming; + + /** + * @if_outgoing: packet where the packet should be sent to, or NULL if + * unspecified + */ struct batadv_hard_iface *if_outgoing; + + /** @queue_left: The queue (counter) this packet was applied to */ atomic_t *queue_left; }; /** * struct batadv_algo_iface_ops - mesh algorithm callbacks (interface specific) - * @activate: start routing mechanisms when hard-interface is brought up - * (optional) - * @enable: init routing info when hard-interface is enabled - * @disable: de-init routing info when hard-interface is disabled - * @update_mac: (re-)init mac addresses of the protocol information - * belonging to this hard-interface - * @primary_set: called when primary interface is selected / changed */ struct batadv_algo_iface_ops { + /** + * @activate: start routing mechanisms when hard-interface is brought up + * (optional) + */ void (*activate)(struct batadv_hard_iface *hard_iface); + + /** @enable: init routing info when hard-interface is enabled */ int (*enable)(struct batadv_hard_iface *hard_iface); + + /** @disable: de-init routing info when hard-interface is disabled */ void (*disable)(struct batadv_hard_iface *hard_iface); + + /** + * @update_mac: (re-)init mac addresses of the protocol information + * belonging to this hard-interface + */ void (*update_mac)(struct batadv_hard_iface *hard_iface); + + /** @primary_set: called when primary interface is selected / changed */ void (*primary_set)(struct batadv_hard_iface *hard_iface); }; /** * struct batadv_algo_neigh_ops - mesh algorithm callbacks (neighbour specific) - * @hardif_init: called on creation of single hop entry - * (optional) - * @cmp: compare the metrics of two neighbors for their respective outgoing - * interfaces - * @is_similar_or_better: check if neigh1 is equally similar or better than - * neigh2 for their respective outgoing interface from the metric prospective - * @print: print the single hop neighbor list (optional) - * @dump: dump neighbors to a netlink socket (optional) */ struct batadv_algo_neigh_ops { + /** @hardif_init: called on creation of single hop entry (optional) */ void (*hardif_init)(struct batadv_hardif_neigh_node *neigh); + + /** + * @cmp: compare the metrics of two neighbors for their respective + * outgoing interfaces + */ int (*cmp)(struct batadv_neigh_node *neigh1, struct batadv_hard_iface *if_outgoing1, struct batadv_neigh_node *neigh2, struct batadv_hard_iface *if_outgoing2); + + /** + * @is_similar_or_better: check if neigh1 is equally similar or better + * than neigh2 for their respective outgoing interface from the metric + * prospective + */ bool (*is_similar_or_better)(struct batadv_neigh_node *neigh1, struct batadv_hard_iface *if_outgoing1, struct batadv_neigh_node *neigh2, struct batadv_hard_iface *if_outgoing2); + #ifdef CONFIG_BATMAN_ADV_DEBUGFS + /** @print: print the single hop neighbor list (optional) */ void (*print)(struct batadv_priv *priv, struct seq_file *seq); #endif + + /** @dump: dump neighbors to a netlink socket (optional) */ void (*dump)(struct sk_buff *msg, struct netlink_callback *cb, struct batadv_priv *priv, struct batadv_hard_iface *hard_iface); @@ -1466,24 +2173,36 @@ struct batadv_algo_neigh_ops { /** * struct batadv_algo_orig_ops - mesh algorithm callbacks (originator specific) - * @free: free the resources allocated by the routing algorithm for an orig_node - * object (optional) - * @add_if: ask the routing algorithm to apply the needed changes to the - * orig_node due to a new hard-interface being added into the mesh (optional) - * @del_if: ask the routing algorithm to apply the needed changes to the - * orig_node due to an hard-interface being removed from the mesh (optional) - * @print: print the originator table (optional) - * @dump: dump originators to a netlink socket (optional) */ struct batadv_algo_orig_ops { + /** + * @free: free the resources allocated by the routing algorithm for an + * orig_node object (optional) + */ void (*free)(struct batadv_orig_node *orig_node); + + /** + * @add_if: ask the routing algorithm to apply the needed changes to the + * orig_node due to a new hard-interface being added into the mesh + * (optional) + */ int (*add_if)(struct batadv_orig_node *orig_node, int max_if_num); + + /** + * @del_if: ask the routing algorithm to apply the needed changes to the + * orig_node due to an hard-interface being removed from the mesh + * (optional) + */ int (*del_if)(struct batadv_orig_node *orig_node, int max_if_num, int del_if_num); + #ifdef CONFIG_BATMAN_ADV_DEBUGFS + /** @print: print the originator table (optional) */ void (*print)(struct batadv_priv *priv, struct seq_file *seq, struct batadv_hard_iface *hard_iface); #endif + + /** @dump: dump originators to a netlink socket (optional) */ void (*dump)(struct sk_buff *msg, struct netlink_callback *cb, struct batadv_priv *priv, struct batadv_hard_iface *hard_iface); @@ -1491,158 +2210,213 @@ struct batadv_algo_orig_ops { /** * struct batadv_algo_gw_ops - mesh algorithm callbacks (GW specific) - * @init_sel_class: initialize GW selection class (optional) - * @store_sel_class: parse and stores a new GW selection class (optional) - * @show_sel_class: prints the current GW selection class (optional) - * @get_best_gw_node: select the best GW from the list of available nodes - * (optional) - * @is_eligible: check if a newly discovered GW is a potential candidate for - * the election as best GW (optional) - * @print: print the gateway table (optional) - * @dump: dump gateways to a netlink socket (optional) */ struct batadv_algo_gw_ops { + /** @init_sel_class: initialize GW selection class (optional) */ void (*init_sel_class)(struct batadv_priv *bat_priv); + + /** + * @store_sel_class: parse and stores a new GW selection class + * (optional) + */ ssize_t (*store_sel_class)(struct batadv_priv *bat_priv, char *buff, size_t count); + + /** @show_sel_class: prints the current GW selection class (optional) */ ssize_t (*show_sel_class)(struct batadv_priv *bat_priv, char *buff); + + /** + * @get_best_gw_node: select the best GW from the list of available + * nodes (optional) + */ struct batadv_gw_node *(*get_best_gw_node) (struct batadv_priv *bat_priv); + + /** + * @is_eligible: check if a newly discovered GW is a potential candidate + * for the election as best GW (optional) + */ bool (*is_eligible)(struct batadv_priv *bat_priv, struct batadv_orig_node *curr_gw_orig, struct batadv_orig_node *orig_node); + #ifdef CONFIG_BATMAN_ADV_DEBUGFS + /** @print: print the gateway table (optional) */ void (*print)(struct batadv_priv *bat_priv, struct seq_file *seq); #endif + + /** @dump: dump gateways to a netlink socket (optional) */ void (*dump)(struct sk_buff *msg, struct netlink_callback *cb, struct batadv_priv *priv); }; /** * struct batadv_algo_ops - mesh algorithm callbacks - * @list: list node for the batadv_algo_list - * @name: name of the algorithm - * @iface: callbacks related to interface handling - * @neigh: callbacks related to neighbors handling - * @orig: callbacks related to originators handling - * @gw: callbacks related to GW mode */ struct batadv_algo_ops { + /** @list: list node for the batadv_algo_list */ struct hlist_node list; + + /** @name: name of the algorithm */ char *name; + + /** @iface: callbacks related to interface handling */ struct batadv_algo_iface_ops iface; + + /** @neigh: callbacks related to neighbors handling */ struct batadv_algo_neigh_ops neigh; + + /** @orig: callbacks related to originators handling */ struct batadv_algo_orig_ops orig; + + /** @gw: callbacks related to GW mode */ struct batadv_algo_gw_ops gw; }; /** * struct batadv_dat_entry - it is a single entry of batman-adv ARP backend. It * is used to stored ARP entries needed for the global DAT cache - * @ip: the IPv4 corresponding to this DAT/ARP entry - * @mac_addr: the MAC address associated to the stored IPv4 - * @vid: the vlan ID associated to this entry - * @last_update: time in jiffies when this entry was refreshed last time - * @hash_entry: hlist node for batadv_priv_dat::hash - * @refcount: number of contexts the object is used - * @rcu: struct used for freeing in an RCU-safe manner */ struct batadv_dat_entry { + /** @ip: the IPv4 corresponding to this DAT/ARP entry */ __be32 ip; + + /** @mac_addr: the MAC address associated to the stored IPv4 */ u8 mac_addr[ETH_ALEN]; + + /** @vid: the vlan ID associated to this entry */ unsigned short vid; + + /** + * @last_update: time in jiffies when this entry was refreshed last time + */ unsigned long last_update; + + /** @hash_entry: hlist node for &batadv_priv_dat.hash */ struct hlist_node hash_entry; + + /** @refcount: number of contexts the object is used */ struct kref refcount; + + /** @rcu: struct used for freeing in an RCU-safe manner */ struct rcu_head rcu; }; /** * struct batadv_hw_addr - a list entry for a MAC address - * @list: list node for the linking of entries - * @addr: the MAC address of this list entry */ struct batadv_hw_addr { + /** @list: list node for the linking of entries */ struct hlist_node list; + + /** @addr: the MAC address of this list entry */ unsigned char addr[ETH_ALEN]; }; /** * struct batadv_dat_candidate - candidate destination for DAT operations - * @type: the type of the selected candidate. It can one of the following: - * - BATADV_DAT_CANDIDATE_NOT_FOUND - * - BATADV_DAT_CANDIDATE_ORIG - * @orig_node: if type is BATADV_DAT_CANDIDATE_ORIG this field points to the - * corresponding originator node structure */ struct batadv_dat_candidate { + /** + * @type: the type of the selected candidate. It can one of the + * following: + * - BATADV_DAT_CANDIDATE_NOT_FOUND + * - BATADV_DAT_CANDIDATE_ORIG + */ int type; + + /** + * @orig_node: if type is BATADV_DAT_CANDIDATE_ORIG this field points to + * the corresponding originator node structure + */ struct batadv_orig_node *orig_node; }; /** * struct batadv_tvlv_container - container for tvlv appended to OGMs - * @list: hlist node for batadv_priv_tvlv::container_list - * @tvlv_hdr: tvlv header information needed to construct the tvlv - * @refcount: number of contexts the object is used */ struct batadv_tvlv_container { + /** @list: hlist node for &batadv_priv_tvlv.container_list */ struct hlist_node list; + + /** @tvlv_hdr: tvlv header information needed to construct the tvlv */ struct batadv_tvlv_hdr tvlv_hdr; + + /** @refcount: number of contexts the object is used */ struct kref refcount; }; /** * struct batadv_tvlv_handler - handler for specific tvlv type and version - * @list: hlist node for batadv_priv_tvlv::handler_list - * @ogm_handler: handler callback which is given the tvlv payload to process on - * incoming OGM packets - * @unicast_handler: handler callback which is given the tvlv payload to process - * on incoming unicast tvlv packets - * @type: tvlv type this handler feels responsible for - * @version: tvlv version this handler feels responsible for - * @flags: tvlv handler flags - * @refcount: number of contexts the object is used - * @rcu: struct used for freeing in an RCU-safe manner */ struct batadv_tvlv_handler { + /** @list: hlist node for &batadv_priv_tvlv.handler_list */ struct hlist_node list; + + /** + * @ogm_handler: handler callback which is given the tvlv payload to + * process on incoming OGM packets + */ void (*ogm_handler)(struct batadv_priv *bat_priv, struct batadv_orig_node *orig, u8 flags, void *tvlv_value, u16 tvlv_value_len); + + /** + * @unicast_handler: handler callback which is given the tvlv payload to + * process on incoming unicast tvlv packets + */ int (*unicast_handler)(struct batadv_priv *bat_priv, u8 *src, u8 *dst, void *tvlv_value, u16 tvlv_value_len); + + /** @type: tvlv type this handler feels responsible for */ u8 type; + + /** @version: tvlv version this handler feels responsible for */ u8 version; + + /** @flags: tvlv handler flags */ u8 flags; + + /** @refcount: number of contexts the object is used */ struct kref refcount; + + /** @rcu: struct used for freeing in an RCU-safe manner */ struct rcu_head rcu; }; /** * enum batadv_tvlv_handler_flags - tvlv handler flags definitions - * @BATADV_TVLV_HANDLER_OGM_CIFNOTFND: tvlv ogm processing function will call - * this handler even if its type was not found (with no data) - * @BATADV_TVLV_HANDLER_OGM_CALLED: interval tvlv handling flag - the API marks - * a handler as being called, so it won't be called if the - * BATADV_TVLV_HANDLER_OGM_CIFNOTFND flag was set */ enum batadv_tvlv_handler_flags { + /** + * @BATADV_TVLV_HANDLER_OGM_CIFNOTFND: tvlv ogm processing function + * will call this handler even if its type was not found (with no data) + */ BATADV_TVLV_HANDLER_OGM_CIFNOTFND = BIT(1), + + /** + * @BATADV_TVLV_HANDLER_OGM_CALLED: interval tvlv handling flag - the + * API marks a handler as being called, so it won't be called if the + * BATADV_TVLV_HANDLER_OGM_CIFNOTFND flag was set + */ BATADV_TVLV_HANDLER_OGM_CALLED = BIT(2), }; /** * struct batadv_store_mesh_work - Work queue item to detach add/del interface * from sysfs locks - * @net_dev: netdevice to add/remove to/from batman-adv soft-interface - * @soft_iface_name: name of soft-interface to modify - * @work: work queue item */ struct batadv_store_mesh_work { + /** + * @net_dev: netdevice to add/remove to/from batman-adv soft-interface + */ struct net_device *net_dev; + + /** @soft_iface_name: name of soft-interface to modify */ char soft_iface_name[IFNAMSIZ]; + + /** @work: work queue item */ struct work_struct work; }; diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index 671b907ba678..f897681780db 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -766,43 +766,39 @@ static int __init bt_init(void) return err; err = sock_register(&bt_sock_family_ops); - if (err < 0) { - bt_sysfs_cleanup(); - return err; - } + if (err) + goto cleanup_sysfs; BT_INFO("HCI device and connection manager initialized"); err = hci_sock_init(); - if (err < 0) - goto error; + if (err) + goto unregister_socket; err = l2cap_init(); - if (err < 0) - goto sock_err; + if (err) + goto cleanup_socket; err = sco_init(); - if (err < 0) { - l2cap_exit(); - goto sock_err; - } + if (err) + goto cleanup_cap; err = mgmt_init(); - if (err < 0) { - sco_exit(); - l2cap_exit(); - goto sock_err; - } + if (err) + goto cleanup_sco; return 0; -sock_err: +cleanup_sco: + sco_exit(); +cleanup_cap: + l2cap_exit(); +cleanup_socket: hci_sock_cleanup(); - -error: +unregister_socket: sock_unregister(PF_BLUETOOTH); +cleanup_sysfs: bt_sysfs_cleanup(); - return err; } diff --git a/net/bluetooth/cmtp/capi.c b/net/bluetooth/cmtp/capi.c index bb308224099c..426a92f02db4 100644 --- a/net/bluetooth/cmtp/capi.c +++ b/net/bluetooth/cmtp/capi.c @@ -527,7 +527,6 @@ static int cmtp_proc_open(struct inode *inode, struct file *file) } static const struct file_operations cmtp_proc_fops = { - .owner = THIS_MODULE, .open = cmtp_proc_open, .read = seq_read, .llseek = seq_lseek, diff --git a/net/bluetooth/hci_debugfs.c b/net/bluetooth/hci_debugfs.c index 63df63ebfb24..57403bd567d0 100644 --- a/net/bluetooth/hci_debugfs.c +++ b/net/bluetooth/hci_debugfs.c @@ -88,6 +88,9 @@ static int __name ## _show(struct seq_file *f, void *ptr) \ return 0; \ } \ \ +DEFINE_SHOW_ATTRIBUTE(__name) + +#define DEFINE_SHOW_ATTRIBUTE(__name) \ static int __name ## _open(struct inode *inode, struct file *file) \ { \ return single_open(file, __name ## _show, inode->i_private); \ @@ -106,37 +109,16 @@ static int features_show(struct seq_file *f, void *ptr) u8 p; hci_dev_lock(hdev); - for (p = 0; p < HCI_MAX_PAGES && p <= hdev->max_page; p++) { - seq_printf(f, "%2u: 0x%2.2x 0x%2.2x 0x%2.2x 0x%2.2x " - "0x%2.2x 0x%2.2x 0x%2.2x 0x%2.2x\n", p, - hdev->features[p][0], hdev->features[p][1], - hdev->features[p][2], hdev->features[p][3], - hdev->features[p][4], hdev->features[p][5], - hdev->features[p][6], hdev->features[p][7]); - } + for (p = 0; p < HCI_MAX_PAGES && p <= hdev->max_page; p++) + seq_printf(f, "%2u: %8ph\n", p, hdev->features[p]); if (lmp_le_capable(hdev)) - seq_printf(f, "LE: 0x%2.2x 0x%2.2x 0x%2.2x 0x%2.2x " - "0x%2.2x 0x%2.2x 0x%2.2x 0x%2.2x\n", - hdev->le_features[0], hdev->le_features[1], - hdev->le_features[2], hdev->le_features[3], - hdev->le_features[4], hdev->le_features[5], - hdev->le_features[6], hdev->le_features[7]); + seq_printf(f, "LE: %8ph\n", hdev->le_features); hci_dev_unlock(hdev); return 0; } -static int features_open(struct inode *inode, struct file *file) -{ - return single_open(file, features_show, inode->i_private); -} - -static const struct file_operations features_fops = { - .open = features_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; +DEFINE_SHOW_ATTRIBUTE(features); static int device_id_show(struct seq_file *f, void *ptr) { @@ -150,17 +132,7 @@ static int device_id_show(struct seq_file *f, void *ptr) return 0; } -static int device_id_open(struct inode *inode, struct file *file) -{ - return single_open(file, device_id_show, inode->i_private); -} - -static const struct file_operations device_id_fops = { - .open = device_id_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; +DEFINE_SHOW_ATTRIBUTE(device_id); static int device_list_show(struct seq_file *f, void *ptr) { @@ -180,17 +152,7 @@ static int device_list_show(struct seq_file *f, void *ptr) return 0; } -static int device_list_open(struct inode *inode, struct file *file) -{ - return single_open(file, device_list_show, inode->i_private); -} - -static const struct file_operations device_list_fops = { - .open = device_list_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; +DEFINE_SHOW_ATTRIBUTE(device_list); static int blacklist_show(struct seq_file *f, void *p) { @@ -205,17 +167,7 @@ static int blacklist_show(struct seq_file *f, void *p) return 0; } -static int blacklist_open(struct inode *inode, struct file *file) -{ - return single_open(file, blacklist_show, inode->i_private); -} - -static const struct file_operations blacklist_fops = { - .open = blacklist_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; +DEFINE_SHOW_ATTRIBUTE(blacklist); static int uuids_show(struct seq_file *f, void *p) { @@ -240,17 +192,7 @@ static int uuids_show(struct seq_file *f, void *p) return 0; } -static int uuids_open(struct inode *inode, struct file *file) -{ - return single_open(file, uuids_show, inode->i_private); -} - -static const struct file_operations uuids_fops = { - .open = uuids_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; +DEFINE_SHOW_ATTRIBUTE(uuids); static int remote_oob_show(struct seq_file *f, void *ptr) { @@ -269,17 +211,7 @@ static int remote_oob_show(struct seq_file *f, void *ptr) return 0; } -static int remote_oob_open(struct inode *inode, struct file *file) -{ - return single_open(file, remote_oob_show, inode->i_private); -} - -static const struct file_operations remote_oob_fops = { - .open = remote_oob_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; +DEFINE_SHOW_ATTRIBUTE(remote_oob); static int conn_info_min_age_set(void *data, u64 val) { @@ -443,17 +375,7 @@ static int inquiry_cache_show(struct seq_file *f, void *p) return 0; } -static int inquiry_cache_open(struct inode *inode, struct file *file) -{ - return single_open(file, inquiry_cache_show, inode->i_private); -} - -static const struct file_operations inquiry_cache_fops = { - .open = inquiry_cache_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; +DEFINE_SHOW_ATTRIBUTE(inquiry_cache); static int link_keys_show(struct seq_file *f, void *ptr) { @@ -469,17 +391,7 @@ static int link_keys_show(struct seq_file *f, void *ptr) return 0; } -static int link_keys_open(struct inode *inode, struct file *file) -{ - return single_open(file, link_keys_show, inode->i_private); -} - -static const struct file_operations link_keys_fops = { - .open = link_keys_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; +DEFINE_SHOW_ATTRIBUTE(link_keys); static int dev_class_show(struct seq_file *f, void *ptr) { @@ -493,17 +405,7 @@ static int dev_class_show(struct seq_file *f, void *ptr) return 0; } -static int dev_class_open(struct inode *inode, struct file *file) -{ - return single_open(file, dev_class_show, inode->i_private); -} - -static const struct file_operations dev_class_fops = { - .open = dev_class_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; +DEFINE_SHOW_ATTRIBUTE(dev_class); static int voice_setting_get(void *data, u64 *val) { @@ -692,17 +594,7 @@ static int identity_show(struct seq_file *f, void *p) return 0; } -static int identity_open(struct inode *inode, struct file *file) -{ - return single_open(file, identity_show, inode->i_private); -} - -static const struct file_operations identity_fops = { - .open = identity_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; +DEFINE_SHOW_ATTRIBUTE(identity); static int rpa_timeout_set(void *data, u64 val) { @@ -746,17 +638,7 @@ static int random_address_show(struct seq_file *f, void *p) return 0; } -static int random_address_open(struct inode *inode, struct file *file) -{ - return single_open(file, random_address_show, inode->i_private); -} - -static const struct file_operations random_address_fops = { - .open = random_address_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; +DEFINE_SHOW_ATTRIBUTE(random_address); static int static_address_show(struct seq_file *f, void *p) { @@ -769,17 +651,7 @@ static int static_address_show(struct seq_file *f, void *p) return 0; } -static int static_address_open(struct inode *inode, struct file *file) -{ - return single_open(file, static_address_show, inode->i_private); -} - -static const struct file_operations static_address_fops = { - .open = static_address_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; +DEFINE_SHOW_ATTRIBUTE(static_address); static ssize_t force_static_address_read(struct file *file, char __user *user_buf, @@ -841,17 +713,7 @@ static int white_list_show(struct seq_file *f, void *ptr) return 0; } -static int white_list_open(struct inode *inode, struct file *file) -{ - return single_open(file, white_list_show, inode->i_private); -} - -static const struct file_operations white_list_fops = { - .open = white_list_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; +DEFINE_SHOW_ATTRIBUTE(white_list); static int identity_resolving_keys_show(struct seq_file *f, void *ptr) { @@ -869,18 +731,7 @@ static int identity_resolving_keys_show(struct seq_file *f, void *ptr) return 0; } -static int identity_resolving_keys_open(struct inode *inode, struct file *file) -{ - return single_open(file, identity_resolving_keys_show, - inode->i_private); -} - -static const struct file_operations identity_resolving_keys_fops = { - .open = identity_resolving_keys_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; +DEFINE_SHOW_ATTRIBUTE(identity_resolving_keys); static int long_term_keys_show(struct seq_file *f, void *ptr) { @@ -898,17 +749,7 @@ static int long_term_keys_show(struct seq_file *f, void *ptr) return 0; } -static int long_term_keys_open(struct inode *inode, struct file *file) -{ - return single_open(file, long_term_keys_show, inode->i_private); -} - -static const struct file_operations long_term_keys_fops = { - .open = long_term_keys_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; +DEFINE_SHOW_ATTRIBUTE(long_term_keys); static int conn_min_interval_set(void *data, u64 val) { diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index abc0f3224dd1..3394e6791673 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -919,6 +919,43 @@ static bool adv_use_rpa(struct hci_dev *hdev, uint32_t flags) return true; } +static bool is_advertising_allowed(struct hci_dev *hdev, bool connectable) +{ + /* If there is no connection we are OK to advertise. */ + if (hci_conn_num(hdev, LE_LINK) == 0) + return true; + + /* Check le_states if there is any connection in slave role. */ + if (hdev->conn_hash.le_num_slave > 0) { + /* Slave connection state and non connectable mode bit 20. */ + if (!connectable && !(hdev->le_states[2] & 0x10)) + return false; + + /* Slave connection state and connectable mode bit 38 + * and scannable bit 21. + */ + if (connectable && (!(hdev->le_states[4] & 0x01) || + !(hdev->le_states[2] & 0x40))) + return false; + } + + /* Check le_states if there is any connection in master role. */ + if (hci_conn_num(hdev, LE_LINK) != hdev->conn_hash.le_num_slave) { + /* Master connection state and non connectable mode bit 18. */ + if (!connectable && !(hdev->le_states[2] & 0x02)) + return false; + + /* Master connection state and connectable mode bit 35 and + * scannable 19. + */ + if (connectable && (!(hdev->le_states[4] & 0x10) || + !(hdev->le_states[2] & 0x08))) + return false; + } + + return true; +} + void __hci_req_enable_advertising(struct hci_request *req) { struct hci_dev *hdev = req->hdev; @@ -927,7 +964,15 @@ void __hci_req_enable_advertising(struct hci_request *req) bool connectable; u32 flags; - if (hci_conn_num(hdev, LE_LINK) > 0) + flags = get_adv_instance_flags(hdev, hdev->cur_adv_instance); + + /* If the "connectable" instance flag was not set, then choose between + * ADV_IND and ADV_NONCONN_IND based on the global connectable setting. + */ + connectable = (flags & MGMT_ADV_FLAG_CONNECTABLE) || + mgmt_get_connectable(hdev); + + if (!is_advertising_allowed(hdev, connectable)) return; if (hci_dev_test_flag(hdev, HCI_LE_ADV)) @@ -940,14 +985,6 @@ void __hci_req_enable_advertising(struct hci_request *req) */ hci_dev_clear_flag(hdev, HCI_LE_ADV); - flags = get_adv_instance_flags(hdev, hdev->cur_adv_instance); - - /* If the "connectable" instance flag was not set, then choose between - * ADV_IND and ADV_NONCONN_IND based on the global connectable setting. - */ - connectable = (flags & MGMT_ADV_FLAG_CONNECTABLE) || - mgmt_get_connectable(hdev); - /* Set require_privacy to true only when non-connectable * advertising is used. In that case it is fine to use a * non-resolvable private address. @@ -1985,13 +2022,6 @@ unlock: hci_dev_unlock(hdev); } -static void disable_advertising(struct hci_request *req) -{ - u8 enable = 0x00; - - hci_req_add(req, HCI_OP_LE_SET_ADV_ENABLE, sizeof(enable), &enable); -} - static int active_scan(struct hci_request *req, unsigned long opt) { uint16_t interval = opt; @@ -2017,7 +2047,7 @@ static int active_scan(struct hci_request *req, unsigned long opt) cancel_adv_timeout(hdev); hci_dev_unlock(hdev); - disable_advertising(req); + __hci_req_disable_advertising(req); } /* If controller is scanning, it means the background scanning is diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index af5b8c87f590..1285ca30ab0a 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -125,9 +125,16 @@ static int br_dev_init(struct net_device *dev) if (!br->stats) return -ENOMEM; + err = br_fdb_hash_init(br); + if (err) { + free_percpu(br->stats); + return err; + } + err = br_vlan_init(br); if (err) { free_percpu(br->stats); + br_fdb_hash_fini(br); return err; } @@ -135,6 +142,7 @@ static int br_dev_init(struct net_device *dev) if (err) { free_percpu(br->stats); br_vlan_flush(br); + br_fdb_hash_fini(br); } br_set_lockdep_class(dev); @@ -148,6 +156,7 @@ static void br_dev_uninit(struct net_device *dev) br_multicast_dev_del(br); br_multicast_uninit_stats(br); br_vlan_flush(br); + br_fdb_hash_fini(br); free_percpu(br->stats); } @@ -416,6 +425,7 @@ void br_dev_setup(struct net_device *dev) br->dev = dev; spin_lock_init(&br->lock); INIT_LIST_HEAD(&br->port_list); + INIT_HLIST_HEAD(&br->fdb_list); spin_lock_init(&br->hash_lock); br->bridge_id.prio[0] = 0x80; diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index 4ea5c8bbe286..dc87fbc9a23b 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -28,14 +28,20 @@ #include <trace/events/bridge.h> #include "br_private.h" +static const struct rhashtable_params br_fdb_rht_params = { + .head_offset = offsetof(struct net_bridge_fdb_entry, rhnode), + .key_offset = offsetof(struct net_bridge_fdb_entry, key), + .key_len = sizeof(struct net_bridge_fdb_key), + .automatic_shrinking = true, + .locks_mul = 1, +}; + static struct kmem_cache *br_fdb_cache __read_mostly; static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source, const unsigned char *addr, u16 vid); static void fdb_notify(struct net_bridge *br, const struct net_bridge_fdb_entry *, int); -static u32 fdb_salt __read_mostly; - int __init br_fdb_init(void) { br_fdb_cache = kmem_cache_create("bridge_fdb_cache", @@ -45,7 +51,6 @@ int __init br_fdb_init(void) if (!br_fdb_cache) return -ENOMEM; - get_random_bytes(&fdb_salt, sizeof(fdb_salt)); return 0; } @@ -54,6 +59,15 @@ void br_fdb_fini(void) kmem_cache_destroy(br_fdb_cache); } +int br_fdb_hash_init(struct net_bridge *br) +{ + return rhashtable_init(&br->fdb_hash_tbl, &br_fdb_rht_params); +} + +void br_fdb_hash_fini(struct net_bridge *br) +{ + rhashtable_destroy(&br->fdb_hash_tbl); +} /* if topology_changing then use forward_delay (default 15 sec) * otherwise keep longer (default 5 minutes) @@ -70,13 +84,6 @@ static inline int has_expired(const struct net_bridge *br, time_before_eq(fdb->updated + hold_time(br), jiffies); } -static inline int br_mac_hash(const unsigned char *mac, __u16 vid) -{ - /* use 1 byte of OUI and 3 bytes of NIC */ - u32 key = get_unaligned((u32 *)(mac + 2)); - return jhash_2words(key, vid, fdb_salt) & (BR_HASH_SIZE - 1); -} - static void fdb_rcu_free(struct rcu_head *head) { struct net_bridge_fdb_entry *ent @@ -84,19 +91,18 @@ static void fdb_rcu_free(struct rcu_head *head) kmem_cache_free(br_fdb_cache, ent); } -static struct net_bridge_fdb_entry *fdb_find_rcu(struct hlist_head *head, +static struct net_bridge_fdb_entry *fdb_find_rcu(struct rhashtable *tbl, const unsigned char *addr, __u16 vid) { - struct net_bridge_fdb_entry *f; + struct net_bridge_fdb_key key; WARN_ON_ONCE(!rcu_read_lock_held()); - hlist_for_each_entry_rcu(f, head, hlist) - if (ether_addr_equal(f->addr.addr, addr) && f->vlan_id == vid) - break; + key.vlan_id = vid; + memcpy(key.addr.addr, addr, sizeof(key.addr.addr)); - return f; + return rhashtable_lookup(tbl, &key, br_fdb_rht_params); } /* requires bridge hash_lock */ @@ -104,13 +110,12 @@ static struct net_bridge_fdb_entry *br_fdb_find(struct net_bridge *br, const unsigned char *addr, __u16 vid) { - struct hlist_head *head = &br->hash[br_mac_hash(addr, vid)]; struct net_bridge_fdb_entry *fdb; lockdep_assert_held_once(&br->hash_lock); rcu_read_lock(); - fdb = fdb_find_rcu(head, addr, vid); + fdb = fdb_find_rcu(&br->fdb_hash_tbl, addr, vid); rcu_read_unlock(); return fdb; @@ -120,9 +125,7 @@ struct net_bridge_fdb_entry *br_fdb_find_rcu(struct net_bridge *br, const unsigned char *addr, __u16 vid) { - struct hlist_head *head = &br->hash[br_mac_hash(addr, vid)]; - - return fdb_find_rcu(head, addr, vid); + return fdb_find_rcu(&br->fdb_hash_tbl, addr, vid); } /* When a static FDB entry is added, the mac address from the entry is @@ -175,9 +178,11 @@ static void fdb_delete(struct net_bridge *br, struct net_bridge_fdb_entry *f) trace_fdb_delete(br, f); if (f->is_static) - fdb_del_hw_addr(br, f->addr.addr); + fdb_del_hw_addr(br, f->key.addr.addr); - hlist_del_init_rcu(&f->hlist); + hlist_del_init_rcu(&f->fdb_node); + rhashtable_remove_fast(&br->fdb_hash_tbl, &f->rhnode, + br_fdb_rht_params); fdb_notify(br, f, RTM_DELNEIGH); call_rcu(&f->rcu, fdb_rcu_free); } @@ -187,11 +192,11 @@ static void fdb_delete_local(struct net_bridge *br, const struct net_bridge_port *p, struct net_bridge_fdb_entry *f) { - const unsigned char *addr = f->addr.addr; + const unsigned char *addr = f->key.addr.addr; struct net_bridge_vlan_group *vg; const struct net_bridge_vlan *v; struct net_bridge_port *op; - u16 vid = f->vlan_id; + u16 vid = f->key.vlan_id; /* Maybe another port has same hw addr? */ list_for_each_entry(op, &br->port_list, list) { @@ -233,31 +238,23 @@ void br_fdb_find_delete_local(struct net_bridge *br, void br_fdb_changeaddr(struct net_bridge_port *p, const unsigned char *newaddr) { struct net_bridge_vlan_group *vg; + struct net_bridge_fdb_entry *f; struct net_bridge *br = p->br; struct net_bridge_vlan *v; - int i; spin_lock_bh(&br->hash_lock); - vg = nbp_vlan_group(p); - /* Search all chains since old address/hash is unknown */ - for (i = 0; i < BR_HASH_SIZE; i++) { - struct hlist_node *h; - hlist_for_each(h, &br->hash[i]) { - struct net_bridge_fdb_entry *f; - - f = hlist_entry(h, struct net_bridge_fdb_entry, hlist); - if (f->dst == p && f->is_local && !f->added_by_user) { - /* delete old one */ - fdb_delete_local(br, p, f); - - /* if this port has no vlan information - * configured, we can safely be done at - * this point. - */ - if (!vg || !vg->num_vlans) - goto insert; - } + hlist_for_each_entry(f, &br->fdb_list, fdb_node) { + if (f->dst == p && f->is_local && !f->added_by_user) { + /* delete old one */ + fdb_delete_local(br, p, f); + + /* if this port has no vlan information + * configured, we can safely be done at + * this point. + */ + if (!vg || !vg->num_vlans) + goto insert; } } @@ -316,35 +313,32 @@ void br_fdb_cleanup(struct work_struct *work) { struct net_bridge *br = container_of(work, struct net_bridge, gc_work.work); + struct net_bridge_fdb_entry *f = NULL; unsigned long delay = hold_time(br); unsigned long work_delay = delay; unsigned long now = jiffies; - int i; - for (i = 0; i < BR_HASH_SIZE; i++) { - struct net_bridge_fdb_entry *f; - struct hlist_node *n; + /* this part is tricky, in order to avoid blocking learning and + * consequently forwarding, we rely on rcu to delete objects with + * delayed freeing allowing us to continue traversing + */ + rcu_read_lock(); + hlist_for_each_entry_rcu(f, &br->fdb_list, fdb_node) { + unsigned long this_timer; - if (!br->hash[i].first) + if (f->is_static || f->added_by_external_learn) continue; - - spin_lock_bh(&br->hash_lock); - hlist_for_each_entry_safe(f, n, &br->hash[i], hlist) { - unsigned long this_timer; - - if (f->is_static) - continue; - if (f->added_by_external_learn) - continue; - this_timer = f->updated + delay; - if (time_after(this_timer, now)) - work_delay = min(work_delay, this_timer - now); - else + this_timer = f->updated + delay; + if (time_after(this_timer, now)) { + work_delay = min(work_delay, this_timer - now); + } else { + spin_lock_bh(&br->hash_lock); + if (!hlist_unhashed(&f->fdb_node)) fdb_delete(br, f); + spin_unlock_bh(&br->hash_lock); } - spin_unlock_bh(&br->hash_lock); - cond_resched(); } + rcu_read_unlock(); /* Cleanup minimum 10 milliseconds apart */ work_delay = max_t(unsigned long, work_delay, msecs_to_jiffies(10)); @@ -354,16 +348,13 @@ void br_fdb_cleanup(struct work_struct *work) /* Completely flush all dynamic entries in forwarding database.*/ void br_fdb_flush(struct net_bridge *br) { - int i; + struct net_bridge_fdb_entry *f; + struct hlist_node *tmp; spin_lock_bh(&br->hash_lock); - for (i = 0; i < BR_HASH_SIZE; i++) { - struct net_bridge_fdb_entry *f; - struct hlist_node *n; - hlist_for_each_entry_safe(f, n, &br->hash[i], hlist) { - if (!f->is_static) - fdb_delete(br, f); - } + hlist_for_each_entry_safe(f, tmp, &br->fdb_list, fdb_node) { + if (!f->is_static) + fdb_delete(br, f); } spin_unlock_bh(&br->hash_lock); } @@ -377,27 +368,22 @@ void br_fdb_delete_by_port(struct net_bridge *br, u16 vid, int do_all) { - int i; + struct net_bridge_fdb_entry *f; + struct hlist_node *tmp; spin_lock_bh(&br->hash_lock); - for (i = 0; i < BR_HASH_SIZE; i++) { - struct hlist_node *h, *g; + hlist_for_each_entry_safe(f, tmp, &br->fdb_list, fdb_node) { + if (f->dst != p) + continue; - hlist_for_each_safe(h, g, &br->hash[i]) { - struct net_bridge_fdb_entry *f - = hlist_entry(h, struct net_bridge_fdb_entry, hlist); - if (f->dst != p) + if (!do_all) + if (f->is_static || (vid && f->key.vlan_id != vid)) continue; - if (!do_all) - if (f->is_static || (vid && f->vlan_id != vid)) - continue; - - if (f->is_local) - fdb_delete_local(br, p, f); - else - fdb_delete(br, f); - } + if (f->is_local) + fdb_delete_local(br, p, f); + else + fdb_delete(br, f); } spin_unlock_bh(&br->hash_lock); } @@ -433,52 +419,48 @@ int br_fdb_test_addr(struct net_device *dev, unsigned char *addr) int br_fdb_fillbuf(struct net_bridge *br, void *buf, unsigned long maxnum, unsigned long skip) { - struct __fdb_entry *fe = buf; - int i, num = 0; struct net_bridge_fdb_entry *f; + struct __fdb_entry *fe = buf; + int num = 0; memset(buf, 0, maxnum*sizeof(struct __fdb_entry)); rcu_read_lock(); - for (i = 0; i < BR_HASH_SIZE; i++) { - hlist_for_each_entry_rcu(f, &br->hash[i], hlist) { - if (num >= maxnum) - goto out; + hlist_for_each_entry_rcu(f, &br->fdb_list, fdb_node) { + if (num >= maxnum) + break; - if (has_expired(br, f)) - continue; + if (has_expired(br, f)) + continue; - /* ignore pseudo entry for local MAC address */ - if (!f->dst) - continue; + /* ignore pseudo entry for local MAC address */ + if (!f->dst) + continue; - if (skip) { - --skip; - continue; - } + if (skip) { + --skip; + continue; + } - /* convert from internal format to API */ - memcpy(fe->mac_addr, f->addr.addr, ETH_ALEN); + /* convert from internal format to API */ + memcpy(fe->mac_addr, f->key.addr.addr, ETH_ALEN); - /* due to ABI compat need to split into hi/lo */ - fe->port_no = f->dst->port_no; - fe->port_hi = f->dst->port_no >> 8; + /* due to ABI compat need to split into hi/lo */ + fe->port_no = f->dst->port_no; + fe->port_hi = f->dst->port_no >> 8; - fe->is_local = f->is_local; - if (!f->is_static) - fe->ageing_timer_value = jiffies_delta_to_clock_t(jiffies - f->updated); - ++fe; - ++num; - } + fe->is_local = f->is_local; + if (!f->is_static) + fe->ageing_timer_value = jiffies_delta_to_clock_t(jiffies - f->updated); + ++fe; + ++num; } - - out: rcu_read_unlock(); return num; } -static struct net_bridge_fdb_entry *fdb_create(struct hlist_head *head, +static struct net_bridge_fdb_entry *fdb_create(struct net_bridge *br, struct net_bridge_port *source, const unsigned char *addr, __u16 vid, @@ -489,16 +471,23 @@ static struct net_bridge_fdb_entry *fdb_create(struct hlist_head *head, fdb = kmem_cache_alloc(br_fdb_cache, GFP_ATOMIC); if (fdb) { - memcpy(fdb->addr.addr, addr, ETH_ALEN); + memcpy(fdb->key.addr.addr, addr, ETH_ALEN); fdb->dst = source; - fdb->vlan_id = vid; + fdb->key.vlan_id = vid; fdb->is_local = is_local; fdb->is_static = is_static; fdb->added_by_user = 0; fdb->added_by_external_learn = 0; fdb->offloaded = 0; fdb->updated = fdb->used = jiffies; - hlist_add_head_rcu(&fdb->hlist, head); + if (rhashtable_lookup_insert_fast(&br->fdb_hash_tbl, + &fdb->rhnode, + br_fdb_rht_params)) { + kmem_cache_free(br_fdb_cache, fdb); + fdb = NULL; + } else { + hlist_add_head_rcu(&fdb->fdb_node, &br->fdb_list); + } } return fdb; } @@ -506,7 +495,6 @@ static struct net_bridge_fdb_entry *fdb_create(struct hlist_head *head, static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source, const unsigned char *addr, u16 vid) { - struct hlist_head *head = &br->hash[br_mac_hash(addr, vid)]; struct net_bridge_fdb_entry *fdb; if (!is_valid_ether_addr(addr)) @@ -524,7 +512,7 @@ static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source, fdb_delete(br, fdb); } - fdb = fdb_create(head, source, addr, vid, 1, 1); + fdb = fdb_create(br, source, addr, vid, 1, 1); if (!fdb) return -ENOMEM; @@ -548,7 +536,6 @@ int br_fdb_insert(struct net_bridge *br, struct net_bridge_port *source, void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source, const unsigned char *addr, u16 vid, bool added_by_user) { - struct hlist_head *head = &br->hash[br_mac_hash(addr, vid)]; struct net_bridge_fdb_entry *fdb; bool fdb_modified = false; @@ -561,7 +548,7 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source, source->state == BR_STATE_FORWARDING)) return; - fdb = fdb_find_rcu(head, addr, vid); + fdb = fdb_find_rcu(&br->fdb_hash_tbl, addr, vid); if (likely(fdb)) { /* attempt to update an entry for a local interface */ if (unlikely(fdb->is_local)) { @@ -590,14 +577,13 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source, } } else { spin_lock(&br->hash_lock); - if (likely(!fdb_find_rcu(head, addr, vid))) { - fdb = fdb_create(head, source, addr, vid, 0, 0); - if (fdb) { - if (unlikely(added_by_user)) - fdb->added_by_user = 1; - trace_br_fdb_update(br, source, addr, vid, added_by_user); - fdb_notify(br, fdb, RTM_NEWNEIGH); - } + fdb = fdb_create(br, source, addr, vid, 0, 0); + if (fdb) { + if (unlikely(added_by_user)) + fdb->added_by_user = 1; + trace_br_fdb_update(br, source, addr, vid, + added_by_user); + fdb_notify(br, fdb, RTM_NEWNEIGH); } /* else we lose race and someone else inserts * it first, don't bother updating @@ -646,7 +632,7 @@ static int fdb_fill_info(struct sk_buff *skb, const struct net_bridge *br, if (fdb->added_by_external_learn) ndm->ndm_flags |= NTF_EXT_LEARNED; - if (nla_put(skb, NDA_LLADDR, ETH_ALEN, &fdb->addr)) + if (nla_put(skb, NDA_LLADDR, ETH_ALEN, &fdb->key.addr)) goto nla_put_failure; if (nla_put_u32(skb, NDA_MASTER, br->dev->ifindex)) goto nla_put_failure; @@ -657,7 +643,8 @@ static int fdb_fill_info(struct sk_buff *skb, const struct net_bridge *br, if (nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci)) goto nla_put_failure; - if (fdb->vlan_id && nla_put(skb, NDA_VLAN, sizeof(u16), &fdb->vlan_id)) + if (fdb->key.vlan_id && nla_put(skb, NDA_VLAN, sizeof(u16), + &fdb->key.vlan_id)) goto nla_put_failure; nlmsg_end(skb, nlh); @@ -711,54 +698,48 @@ int br_fdb_dump(struct sk_buff *skb, int *idx) { struct net_bridge *br = netdev_priv(dev); + struct net_bridge_fdb_entry *f; int err = 0; - int i; if (!(dev->priv_flags & IFF_EBRIDGE)) - goto out; + return err; if (!filter_dev) { err = ndo_dflt_fdb_dump(skb, cb, dev, NULL, idx); if (err < 0) - goto out; + return err; } - for (i = 0; i < BR_HASH_SIZE; i++) { - struct net_bridge_fdb_entry *f; - - hlist_for_each_entry_rcu(f, &br->hash[i], hlist) { - - if (*idx < cb->args[2]) + rcu_read_lock(); + hlist_for_each_entry_rcu(f, &br->fdb_list, fdb_node) { + if (*idx < cb->args[2]) + goto skip; + if (filter_dev && (!f->dst || f->dst->dev != filter_dev)) { + if (filter_dev != dev) goto skip; - - if (filter_dev && - (!f->dst || f->dst->dev != filter_dev)) { - if (filter_dev != dev) - goto skip; - /* !f->dst is a special case for bridge - * It means the MAC belongs to the bridge - * Therefore need a little more filtering - * we only want to dump the !f->dst case - */ - if (f->dst) - goto skip; - } - if (!filter_dev && f->dst) + /* !f->dst is a special case for bridge + * It means the MAC belongs to the bridge + * Therefore need a little more filtering + * we only want to dump the !f->dst case + */ + if (f->dst) goto skip; - - err = fdb_fill_info(skb, br, f, - NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, - RTM_NEWNEIGH, - NLM_F_MULTI); - if (err < 0) - goto out; -skip: - *idx += 1; } + if (!filter_dev && f->dst) + goto skip; + + err = fdb_fill_info(skb, br, f, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + RTM_NEWNEIGH, + NLM_F_MULTI); + if (err < 0) + break; +skip: + *idx += 1; } + rcu_read_unlock(); -out: return err; } @@ -766,7 +747,6 @@ out: static int fdb_add_entry(struct net_bridge *br, struct net_bridge_port *source, const __u8 *addr, __u16 state, __u16 flags, __u16 vid) { - struct hlist_head *head = &br->hash[br_mac_hash(addr, vid)]; struct net_bridge_fdb_entry *fdb; bool modified = false; @@ -787,7 +767,7 @@ static int fdb_add_entry(struct net_bridge *br, struct net_bridge_port *source, if (!(flags & NLM_F_CREATE)) return -ENOENT; - fdb = fdb_create(head, source, addr, vid, 0, 0); + fdb = fdb_create(br, source, addr, vid, 0, 0); if (!fdb) return -ENOMEM; @@ -1012,65 +992,60 @@ int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[], int br_fdb_sync_static(struct net_bridge *br, struct net_bridge_port *p) { - struct net_bridge_fdb_entry *fdb, *tmp; - int i; + struct net_bridge_fdb_entry *f, *tmp; int err; ASSERT_RTNL(); - for (i = 0; i < BR_HASH_SIZE; i++) { - hlist_for_each_entry(fdb, &br->hash[i], hlist) { - /* We only care for static entries */ - if (!fdb->is_static) - continue; - - err = dev_uc_add(p->dev, fdb->addr.addr); - if (err) - goto rollback; - } + /* the key here is that static entries change only under rtnl */ + rcu_read_lock(); + hlist_for_each_entry_rcu(f, &br->fdb_list, fdb_node) { + /* We only care for static entries */ + if (!f->is_static) + continue; + err = dev_uc_add(p->dev, f->key.addr.addr); + if (err) + goto rollback; } - return 0; +done: + rcu_read_unlock(); -rollback: - for (i = 0; i < BR_HASH_SIZE; i++) { - hlist_for_each_entry(tmp, &br->hash[i], hlist) { - /* If we reached the fdb that failed, we can stop */ - if (tmp == fdb) - break; - - /* We only care for static entries */ - if (!tmp->is_static) - continue; + return err; - dev_uc_del(p->dev, tmp->addr.addr); - } +rollback: + hlist_for_each_entry_rcu(tmp, &br->fdb_list, fdb_node) { + /* We only care for static entries */ + if (!tmp->is_static) + continue; + if (tmp == f) + break; + dev_uc_del(p->dev, tmp->key.addr.addr); } - return err; + + goto done; } void br_fdb_unsync_static(struct net_bridge *br, struct net_bridge_port *p) { - struct net_bridge_fdb_entry *fdb; - int i; + struct net_bridge_fdb_entry *f; ASSERT_RTNL(); - for (i = 0; i < BR_HASH_SIZE; i++) { - hlist_for_each_entry_rcu(fdb, &br->hash[i], hlist) { - /* We only care for static entries */ - if (!fdb->is_static) - continue; + rcu_read_lock(); + hlist_for_each_entry_rcu(f, &br->fdb_list, fdb_node) { + /* We only care for static entries */ + if (!f->is_static) + continue; - dev_uc_del(p->dev, fdb->addr.addr); - } + dev_uc_del(p->dev, f->key.addr.addr); } + rcu_read_unlock(); } int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p, const unsigned char *addr, u16 vid) { struct net_bridge_fdb_entry *fdb; - struct hlist_head *head; bool modified = false; int err = 0; @@ -1078,10 +1053,9 @@ int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p, spin_lock_bh(&br->hash_lock); - head = &br->hash[br_mac_hash(addr, vid)]; fdb = br_fdb_find(br, addr, vid); if (!fdb) { - fdb = fdb_create(head, p, addr, vid, 0, 0); + fdb = fdb_create(br, p, addr, vid, 0, 0); if (!fdb) { err = -ENOMEM; goto err_unlock; diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index b0f4c734900b..6d9f48bd374a 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c @@ -760,9 +760,9 @@ static int br_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, void br_mdb_init(void) { - rtnl_register(PF_BRIDGE, RTM_GETMDB, NULL, br_mdb_dump, 0); - rtnl_register(PF_BRIDGE, RTM_NEWMDB, br_mdb_add, NULL, 0); - rtnl_register(PF_BRIDGE, RTM_DELMDB, br_mdb_del, NULL, 0); + rtnl_register_module(THIS_MODULE, PF_BRIDGE, RTM_GETMDB, NULL, br_mdb_dump, 0); + rtnl_register_module(THIS_MODULE, PF_BRIDGE, RTM_NEWMDB, br_mdb_add, NULL, 0); + rtnl_register_module(THIS_MODULE, PF_BRIDGE, RTM_DELMDB, br_mdb_del, NULL, 0); } void br_mdb_uninit(void) diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index c2eea1b8737a..27f1d4f2114a 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -991,7 +991,7 @@ int br_nf_hook_thresh(unsigned int hook, struct net *net, unsigned int i; int ret; - e = rcu_dereference(net->nf.hooks[NFPROTO_BRIDGE][hook]); + e = rcu_dereference(net->nf.hooks_bridge[hook]); if (!e) return okfn(net, sk, skb); diff --git a/net/bridge/br_nf_core.c b/net/bridge/br_nf_core.c index 20cbb727df4d..8e2d7cfa4e16 100644 --- a/net/bridge/br_nf_core.c +++ b/net/bridge/br_nf_core.c @@ -78,7 +78,6 @@ void br_netfilter_rtable_init(struct net_bridge *br) atomic_set(&rt->dst.__refcnt, 1); rt->dst.dev = br->dev; - rt->dst.path = &rt->dst; dst_init_metrics(&rt->dst, br_dst_default_metrics, true); rt->dst.flags = DST_NOXFRM | DST_FAKE_RTABLE; rt->dst.ops = &fake_dst_ops; diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 1312b8d20ec3..8e13a64d8c99 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -168,12 +168,17 @@ struct net_bridge_vlan_group { u16 pvid; }; +struct net_bridge_fdb_key { + mac_addr addr; + u16 vlan_id; +}; + struct net_bridge_fdb_entry { - struct hlist_node hlist; + struct rhash_head rhnode; struct net_bridge_port *dst; - mac_addr addr; - __u16 vlan_id; + struct net_bridge_fdb_key key; + struct hlist_node fdb_node; unsigned char is_local:1, is_static:1, added_by_user:1, @@ -315,7 +320,7 @@ struct net_bridge { struct net_bridge_vlan_group __rcu *vlgrp; #endif - struct hlist_head hash[BR_HASH_SIZE]; + struct rhashtable fdb_hash_tbl; #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) union { struct rtable fake_rtable; @@ -405,6 +410,7 @@ struct net_bridge { int offload_fwd_mark; #endif bool neigh_suppress_enabled; + struct hlist_head fdb_list; }; struct br_input_skb_cb { @@ -515,6 +521,8 @@ static inline void br_netpoll_disable(struct net_bridge_port *p) /* br_fdb.c */ int br_fdb_init(void); void br_fdb_fini(void); +int br_fdb_hash_init(struct net_bridge *br); +void br_fdb_hash_fini(struct net_bridge *br); void br_fdb_flush(struct net_bridge *br); void br_fdb_find_delete_local(struct net_bridge *br, const struct net_bridge_port *p, @@ -752,7 +760,7 @@ static inline void br_multicast_flood(struct net_bridge_mdb_entry *mdst, static inline bool br_multicast_is_router(struct net_bridge *br) { - return 0; + return false; } static inline bool br_multicast_querier_exists(struct net_bridge *br, diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c index 9700e0f3307b..ee775f4ff76c 100644 --- a/net/bridge/br_switchdev.c +++ b/net/bridge/br_switchdev.c @@ -121,13 +121,13 @@ br_switchdev_fdb_notify(const struct net_bridge_fdb_entry *fdb, int type) switch (type) { case RTM_DELNEIGH: - br_switchdev_fdb_call_notifiers(false, fdb->addr.addr, - fdb->vlan_id, + br_switchdev_fdb_call_notifiers(false, fdb->key.addr.addr, + fdb->key.vlan_id, fdb->dst->dev); break; case RTM_NEWNEIGH: - br_switchdev_fdb_call_notifiers(true, fdb->addr.addr, - fdb->vlan_id, + br_switchdev_fdb_call_notifiers(true, fdb->key.addr.addr, + fdb->key.vlan_id, fdb->dst->dev); break; } diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c index 723f25eed8ea..b1be0dcfba6b 100644 --- a/net/bridge/br_sysfs_br.c +++ b/net/bridge/br_sysfs_br.c @@ -272,10 +272,7 @@ static ssize_t group_addr_show(struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); - return sprintf(buf, "%x:%x:%x:%x:%x:%x\n", - br->group_addr[0], br->group_addr[1], - br->group_addr[2], br->group_addr[3], - br->group_addr[4], br->group_addr[5]); + return sprintf(buf, "%pM\n", br->group_addr); } static ssize_t group_addr_store(struct device *d, @@ -284,14 +281,11 @@ static ssize_t group_addr_store(struct device *d, { struct net_bridge *br = to_bridge(d); u8 new_addr[6]; - int i; if (!ns_capable(dev_net(br->dev)->user_ns, CAP_NET_ADMIN)) return -EPERM; - if (sscanf(buf, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx", - &new_addr[0], &new_addr[1], &new_addr[2], - &new_addr[3], &new_addr[4], &new_addr[5]) != 6) + if (!mac_pton(buf, new_addr)) return -EINVAL; if (!is_link_local_ether_addr(new_addr)) @@ -306,8 +300,7 @@ static ssize_t group_addr_store(struct device *d, return restart_syscall(); spin_lock_bh(&br->lock); - for (i = 0; i < 6; i++) - br->group_addr[i] = new_addr[i]; + ether_addr_copy(br->group_addr, new_addr); spin_unlock_bh(&br->lock); br->group_addr_set = true; diff --git a/net/bridge/netfilter/Kconfig b/net/bridge/netfilter/Kconfig index e7ef1a1ef3a6..225d1668dfdd 100644 --- a/net/bridge/netfilter/Kconfig +++ b/net/bridge/netfilter/Kconfig @@ -4,6 +4,7 @@ # menuconfig NF_TABLES_BRIDGE depends on BRIDGE && NETFILTER && NF_TABLES + select NETFILTER_FAMILY_BRIDGE tristate "Ethernet Bridge nf_tables support" if NF_TABLES_BRIDGE @@ -29,6 +30,7 @@ endif # NF_TABLES_BRIDGE menuconfig BRIDGE_NF_EBTABLES tristate "Ethernet Bridge tables (ebtables) support" depends on BRIDGE && NETFILTER && NETFILTER_XTABLES + select NETFILTER_FAMILY_BRIDGE help ebtables is a general, extensible frame/packet identification framework. Say 'Y' or 'M' here if you want to do Ethernet diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 37817d25b63d..02c4b409d317 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -2445,7 +2445,6 @@ static int __init ebtables_init(void) return ret; } - printk(KERN_INFO "Ebtables v2.0 registered\n"); return 0; } @@ -2453,7 +2452,6 @@ static void __exit ebtables_fini(void) { nf_unregister_sockopt(&ebt_sockopts); xt_unregister_target(&ebt_standard_target); - printk(KERN_INFO "Ebtables v2.0 unregistered\n"); } EXPORT_SYMBOL(ebt_register_table); diff --git a/net/bridge/netfilter/nf_tables_bridge.c b/net/bridge/netfilter/nf_tables_bridge.c index 97afdc0744e6..5160cf614176 100644 --- a/net/bridge/netfilter/nf_tables_bridge.c +++ b/net/bridge/netfilter/nf_tables_bridge.c @@ -25,63 +25,23 @@ nft_do_chain_bridge(void *priv, { struct nft_pktinfo pkt; + nft_set_pktinfo(&pkt, skb, state); + switch (eth_hdr(skb)->h_proto) { case htons(ETH_P_IP): - nft_set_pktinfo_ipv4_validate(&pkt, skb, state); + nft_set_pktinfo_ipv4_validate(&pkt, skb); break; case htons(ETH_P_IPV6): - nft_set_pktinfo_ipv6_validate(&pkt, skb, state); + nft_set_pktinfo_ipv6_validate(&pkt, skb); break; default: - nft_set_pktinfo_unspec(&pkt, skb, state); + nft_set_pktinfo_unspec(&pkt, skb); break; } return nft_do_chain(&pkt, priv); } -static struct nft_af_info nft_af_bridge __read_mostly = { - .family = NFPROTO_BRIDGE, - .nhooks = NF_BR_NUMHOOKS, - .owner = THIS_MODULE, - .nops = 1, - .hooks = { - [NF_BR_PRE_ROUTING] = nft_do_chain_bridge, - [NF_BR_LOCAL_IN] = nft_do_chain_bridge, - [NF_BR_FORWARD] = nft_do_chain_bridge, - [NF_BR_LOCAL_OUT] = nft_do_chain_bridge, - [NF_BR_POST_ROUTING] = nft_do_chain_bridge, - }, -}; - -static int nf_tables_bridge_init_net(struct net *net) -{ - net->nft.bridge = kmalloc(sizeof(struct nft_af_info), GFP_KERNEL); - if (net->nft.bridge == NULL) - return -ENOMEM; - - memcpy(net->nft.bridge, &nft_af_bridge, sizeof(nft_af_bridge)); - - if (nft_register_afinfo(net, net->nft.bridge) < 0) - goto err; - - return 0; -err: - kfree(net->nft.bridge); - return -ENOMEM; -} - -static void nf_tables_bridge_exit_net(struct net *net) -{ - nft_unregister_afinfo(net, net->nft.bridge); - kfree(net->nft.bridge); -} - -static struct pernet_operations nf_tables_bridge_net_ops = { - .init = nf_tables_bridge_init_net, - .exit = nf_tables_bridge_exit_net, -}; - static const struct nf_chain_type filter_bridge = { .name = "filter", .type = NFT_CHAIN_T_DEFAULT, @@ -92,75 +52,23 @@ static const struct nf_chain_type filter_bridge = { (1 << NF_BR_FORWARD) | (1 << NF_BR_LOCAL_OUT) | (1 << NF_BR_POST_ROUTING), -}; - -static void nf_br_saveroute(const struct sk_buff *skb, - struct nf_queue_entry *entry) -{ -} - -static int nf_br_reroute(struct net *net, struct sk_buff *skb, - const struct nf_queue_entry *entry) -{ - return 0; -} - -static __sum16 nf_br_checksum(struct sk_buff *skb, unsigned int hook, - unsigned int dataoff, u_int8_t protocol) -{ - return 0; -} - -static __sum16 nf_br_checksum_partial(struct sk_buff *skb, unsigned int hook, - unsigned int dataoff, unsigned int len, - u_int8_t protocol) -{ - return 0; -} - -static int nf_br_route(struct net *net, struct dst_entry **dst, - struct flowi *fl, bool strict __always_unused) -{ - return 0; -} - -static const struct nf_afinfo nf_br_afinfo = { - .family = AF_BRIDGE, - .checksum = nf_br_checksum, - .checksum_partial = nf_br_checksum_partial, - .route = nf_br_route, - .saveroute = nf_br_saveroute, - .reroute = nf_br_reroute, - .route_key_size = 0, + .hooks = { + [NF_BR_PRE_ROUTING] = nft_do_chain_bridge, + [NF_BR_LOCAL_IN] = nft_do_chain_bridge, + [NF_BR_FORWARD] = nft_do_chain_bridge, + [NF_BR_LOCAL_OUT] = nft_do_chain_bridge, + [NF_BR_POST_ROUTING] = nft_do_chain_bridge, + }, }; static int __init nf_tables_bridge_init(void) { - int ret; - - nf_register_afinfo(&nf_br_afinfo); - ret = nft_register_chain_type(&filter_bridge); - if (ret < 0) - goto err1; - - ret = register_pernet_subsys(&nf_tables_bridge_net_ops); - if (ret < 0) - goto err2; - - return ret; - -err2: - nft_unregister_chain_type(&filter_bridge); -err1: - nf_unregister_afinfo(&nf_br_afinfo); - return ret; + return nft_register_chain_type(&filter_bridge); } static void __exit nf_tables_bridge_exit(void) { - unregister_pernet_subsys(&nf_tables_bridge_net_ops); nft_unregister_chain_type(&filter_bridge); - nf_unregister_afinfo(&nf_br_afinfo); } module_init(nf_tables_bridge_init); @@ -168,4 +76,4 @@ module_exit(nf_tables_bridge_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); -MODULE_ALIAS_NFT_FAMILY(AF_BRIDGE); +MODULE_ALIAS_NFT_CHAIN(AF_BRIDGE, "filter"); diff --git a/net/caif/cfctrl.c b/net/caif/cfctrl.c index 655ed7032150..a1e85f032108 100644 --- a/net/caif/cfctrl.c +++ b/net/caif/cfctrl.c @@ -352,15 +352,14 @@ static int cfctrl_recv(struct cflayer *layer, struct cfpkt *pkt) u8 cmdrsp; u8 cmd; int ret = -1; - u16 tmp16; u8 len; u8 param[255]; - u8 linkid; + u8 linkid = 0; struct cfctrl *cfctrl = container_obj(layer); struct cfctrl_request_info rsp, *req; - cfpkt_extr_head(pkt, &cmdrsp, 1); + cmdrsp = cfpkt_extr_head_u8(pkt); cmd = cmdrsp & CFCTRL_CMD_MASK; if (cmd != CFCTRL_CMD_LINK_ERR && CFCTRL_RSP_BIT != (CFCTRL_RSP_BIT & cmdrsp) @@ -378,13 +377,12 @@ static int cfctrl_recv(struct cflayer *layer, struct cfpkt *pkt) u8 physlinkid; u8 prio; u8 tmp; - u32 tmp32; u8 *cp; int i; struct cfctrl_link_param linkparam; memset(&linkparam, 0, sizeof(linkparam)); - cfpkt_extr_head(pkt, &tmp, 1); + tmp = cfpkt_extr_head_u8(pkt); serv = tmp & CFCTRL_SRV_MASK; linkparam.linktype = serv; @@ -392,13 +390,13 @@ static int cfctrl_recv(struct cflayer *layer, struct cfpkt *pkt) servtype = tmp >> 4; linkparam.chtype = servtype; - cfpkt_extr_head(pkt, &tmp, 1); + tmp = cfpkt_extr_head_u8(pkt); physlinkid = tmp & 0x07; prio = tmp >> 3; linkparam.priority = prio; linkparam.phyid = physlinkid; - cfpkt_extr_head(pkt, &endpoint, 1); + endpoint = cfpkt_extr_head_u8(pkt); linkparam.endpoint = endpoint & 0x03; switch (serv) { @@ -407,45 +405,43 @@ static int cfctrl_recv(struct cflayer *layer, struct cfpkt *pkt) if (CFCTRL_ERR_BIT & cmdrsp) break; /* Link ID */ - cfpkt_extr_head(pkt, &linkid, 1); + linkid = cfpkt_extr_head_u8(pkt); break; case CFCTRL_SRV_VIDEO: - cfpkt_extr_head(pkt, &tmp, 1); + tmp = cfpkt_extr_head_u8(pkt); linkparam.u.video.connid = tmp; if (CFCTRL_ERR_BIT & cmdrsp) break; /* Link ID */ - cfpkt_extr_head(pkt, &linkid, 1); + linkid = cfpkt_extr_head_u8(pkt); break; case CFCTRL_SRV_DATAGRAM: - cfpkt_extr_head(pkt, &tmp32, 4); linkparam.u.datagram.connid = - le32_to_cpu(tmp32); + cfpkt_extr_head_u32(pkt); if (CFCTRL_ERR_BIT & cmdrsp) break; /* Link ID */ - cfpkt_extr_head(pkt, &linkid, 1); + linkid = cfpkt_extr_head_u8(pkt); break; case CFCTRL_SRV_RFM: /* Construct a frame, convert * DatagramConnectionID * to network format long and copy it out... */ - cfpkt_extr_head(pkt, &tmp32, 4); linkparam.u.rfm.connid = - le32_to_cpu(tmp32); + cfpkt_extr_head_u32(pkt); cp = (u8 *) linkparam.u.rfm.volume; - for (cfpkt_extr_head(pkt, &tmp, 1); + for (tmp = cfpkt_extr_head_u8(pkt); cfpkt_more(pkt) && tmp != '\0'; - cfpkt_extr_head(pkt, &tmp, 1)) + tmp = cfpkt_extr_head_u8(pkt)) *cp++ = tmp; *cp = '\0'; if (CFCTRL_ERR_BIT & cmdrsp) break; /* Link ID */ - cfpkt_extr_head(pkt, &linkid, 1); + linkid = cfpkt_extr_head_u8(pkt); break; case CFCTRL_SRV_UTIL: @@ -454,13 +450,11 @@ static int cfctrl_recv(struct cflayer *layer, struct cfpkt *pkt) * to network format long and copy it out... */ /* Fifosize KB */ - cfpkt_extr_head(pkt, &tmp16, 2); linkparam.u.utility.fifosize_kb = - le16_to_cpu(tmp16); + cfpkt_extr_head_u16(pkt); /* Fifosize bufs */ - cfpkt_extr_head(pkt, &tmp16, 2); linkparam.u.utility.fifosize_bufs = - le16_to_cpu(tmp16); + cfpkt_extr_head_u16(pkt); /* name */ cp = (u8 *) linkparam.u.utility.name; caif_assert(sizeof(linkparam.u.utility.name) @@ -468,24 +462,24 @@ static int cfctrl_recv(struct cflayer *layer, struct cfpkt *pkt) for (i = 0; i < UTILITY_NAME_LENGTH && cfpkt_more(pkt); i++) { - cfpkt_extr_head(pkt, &tmp, 1); + tmp = cfpkt_extr_head_u8(pkt); *cp++ = tmp; } /* Length */ - cfpkt_extr_head(pkt, &len, 1); + len = cfpkt_extr_head_u8(pkt); linkparam.u.utility.paramlen = len; /* Param Data */ cp = linkparam.u.utility.params; while (cfpkt_more(pkt) && len--) { - cfpkt_extr_head(pkt, &tmp, 1); + tmp = cfpkt_extr_head_u8(pkt); *cp++ = tmp; } if (CFCTRL_ERR_BIT & cmdrsp) break; /* Link ID */ - cfpkt_extr_head(pkt, &linkid, 1); + linkid = cfpkt_extr_head_u8(pkt); /* Length */ - cfpkt_extr_head(pkt, &len, 1); + len = cfpkt_extr_head_u8(pkt); /* Param Data */ cfpkt_extr_head(pkt, ¶m, len); break; @@ -522,7 +516,7 @@ static int cfctrl_recv(struct cflayer *layer, struct cfpkt *pkt) } break; case CFCTRL_CMD_LINK_DESTROY: - cfpkt_extr_head(pkt, &linkid, 1); + linkid = cfpkt_extr_head_u8(pkt); cfctrl->res.linkdestroy_rsp(cfctrl->serv.layer.up, linkid); break; case CFCTRL_CMD_LINK_ERR: diff --git a/net/caif/cfpkt_skbuff.c b/net/caif/cfpkt_skbuff.c index 71b6ab240dea..38c2b7a890dd 100644 --- a/net/caif/cfpkt_skbuff.c +++ b/net/caif/cfpkt_skbuff.c @@ -8,7 +8,6 @@ #include <linux/string.h> #include <linux/skbuff.h> -#include <linux/hardirq.h> #include <linux/export.h> #include <net/caif/cfpkt.h> diff --git a/net/caif/chnl_net.c b/net/caif/chnl_net.c index 922ac1d605b3..53ecda10b790 100644 --- a/net/caif/chnl_net.c +++ b/net/caif/chnl_net.c @@ -8,7 +8,6 @@ #define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__ #include <linux/fs.h> -#include <linux/hardirq.h> #include <linux/init.h> #include <linux/module.h> #include <linux/netdevice.h> diff --git a/net/can/Kconfig b/net/can/Kconfig index a15c0e0d1fc7..a4399be54ff4 100644 --- a/net/can/Kconfig +++ b/net/can/Kconfig @@ -11,7 +11,7 @@ menuconfig CAN 1991, mainly for automotive, but now widely used in marine (NMEA2000), industrial, and medical applications. More information on the CAN network protocol family PF_CAN - is contained in <Documentation/networking/can.txt>. + is contained in <Documentation/networking/can.rst>. If you want CAN support you should say Y here and also to the specific driver for your controller(s) below. diff --git a/net/can/af_can.c b/net/can/af_can.c index 4d7f988a3130..6da324550eec 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -321,13 +321,13 @@ EXPORT_SYMBOL(can_send); * af_can rx path */ -static struct dev_rcv_lists *find_dev_rcv_lists(struct net *net, +static struct can_dev_rcv_lists *find_dev_rcv_lists(struct net *net, struct net_device *dev) { if (!dev) return net->can.can_rx_alldev_list; else - return (struct dev_rcv_lists *)dev->ml_priv; + return (struct can_dev_rcv_lists *)dev->ml_priv; } /** @@ -381,7 +381,7 @@ static unsigned int effhash(canid_t can_id) * Reduced can_id to have a preprocessed filter compare value. */ static struct hlist_head *find_rcv_list(canid_t *can_id, canid_t *mask, - struct dev_rcv_lists *d) + struct can_dev_rcv_lists *d) { canid_t inv = *can_id & CAN_INV_FILTER; /* save flag before masking */ @@ -464,7 +464,7 @@ int can_rx_register(struct net *net, struct net_device *dev, canid_t can_id, { struct receiver *r; struct hlist_head *rl; - struct dev_rcv_lists *d; + struct can_dev_rcv_lists *d; struct s_pstats *can_pstats = net->can.can_pstats; int err = 0; @@ -542,7 +542,7 @@ void can_rx_unregister(struct net *net, struct net_device *dev, canid_t can_id, struct receiver *r = NULL; struct hlist_head *rl; struct s_pstats *can_pstats = net->can.can_pstats; - struct dev_rcv_lists *d; + struct can_dev_rcv_lists *d; if (dev && dev->type != ARPHRD_CAN) return; @@ -615,7 +615,7 @@ static inline void deliver(struct sk_buff *skb, struct receiver *r) r->matches++; } -static int can_rcv_filter(struct dev_rcv_lists *d, struct sk_buff *skb) +static int can_rcv_filter(struct can_dev_rcv_lists *d, struct sk_buff *skb) { struct receiver *r; int matches = 0; @@ -682,7 +682,7 @@ static int can_rcv_filter(struct dev_rcv_lists *d, struct sk_buff *skb) static void can_receive(struct sk_buff *skb, struct net_device *dev) { - struct dev_rcv_lists *d; + struct can_dev_rcv_lists *d; struct net *net = dev_net(dev); struct s_stats *can_stats = net->can.can_stats; int matches; @@ -821,7 +821,7 @@ static int can_notifier(struct notifier_block *nb, unsigned long msg, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); - struct dev_rcv_lists *d; + struct can_dev_rcv_lists *d; if (dev->type != ARPHRD_CAN) return NOTIFY_DONE; @@ -866,7 +866,7 @@ static int can_pernet_init(struct net *net) { spin_lock_init(&net->can.can_rcvlists_lock); net->can.can_rx_alldev_list = - kzalloc(sizeof(struct dev_rcv_lists), GFP_KERNEL); + kzalloc(sizeof(struct can_dev_rcv_lists), GFP_KERNEL); if (!net->can.can_rx_alldev_list) goto out; net->can.can_stats = kzalloc(sizeof(struct s_stats), GFP_KERNEL); @@ -912,7 +912,7 @@ static void can_pernet_exit(struct net *net) rcu_read_lock(); for_each_netdev_rcu(net, dev) { if (dev->type == ARPHRD_CAN && dev->ml_priv) { - struct dev_rcv_lists *d = dev->ml_priv; + struct can_dev_rcv_lists *d = dev->ml_priv; BUG_ON(d->entries); kfree(d); diff --git a/net/can/af_can.h b/net/can/af_can.h index eca6463c6213..9cb3719632bd 100644 --- a/net/can/af_can.h +++ b/net/can/af_can.h @@ -67,7 +67,7 @@ struct receiver { enum { RX_ERR, RX_ALL, RX_FIL, RX_INV, RX_MAX }; /* per device receive filters linked at dev->ml_priv */ -struct dev_rcv_lists { +struct can_dev_rcv_lists { struct hlist_head rx[RX_MAX]; struct hlist_head rx_sff[CAN_SFF_RCV_ARRAY_SZ]; struct hlist_head rx_eff[CAN_EFF_RCV_ARRAY_SZ]; diff --git a/net/can/bcm.c b/net/can/bcm.c index 13690334efa3..ac5e5e34fee3 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -246,7 +246,6 @@ static int bcm_proc_open(struct inode *inode, struct file *file) } static const struct file_operations bcm_proc_fops = { - .owner = THIS_MODULE, .open = bcm_proc_open, .read = seq_read, .llseek = seq_lseek, diff --git a/net/can/gw.c b/net/can/gw.c index 73a02af4b5d7..398dd0395ad9 100644 --- a/net/can/gw.c +++ b/net/can/gw.c @@ -1014,6 +1014,8 @@ static struct pernet_operations cangw_pernet_ops = { static __init int cgw_module_init(void) { + int ret; + /* sanitize given module parameter */ max_hops = clamp_t(unsigned int, max_hops, CGW_MIN_HOPS, CGW_MAX_HOPS); @@ -1031,15 +1033,19 @@ static __init int cgw_module_init(void) notifier.notifier_call = cgw_notifier; register_netdevice_notifier(¬ifier); - if (__rtnl_register(PF_CAN, RTM_GETROUTE, NULL, cgw_dump_jobs, 0)) { + ret = rtnl_register_module(THIS_MODULE, PF_CAN, RTM_GETROUTE, + NULL, cgw_dump_jobs, 0); + if (ret) { unregister_netdevice_notifier(¬ifier); kmem_cache_destroy(cgw_cache); return -ENOBUFS; } - /* Only the first call to __rtnl_register can fail */ - __rtnl_register(PF_CAN, RTM_NEWROUTE, cgw_create_job, NULL, 0); - __rtnl_register(PF_CAN, RTM_DELROUTE, cgw_remove_job, NULL, 0); + /* Only the first call to rtnl_register_module can fail */ + rtnl_register_module(THIS_MODULE, PF_CAN, RTM_NEWROUTE, + cgw_create_job, NULL, 0); + rtnl_register_module(THIS_MODULE, PF_CAN, RTM_DELROUTE, + cgw_remove_job, NULL, 0); return 0; } diff --git a/net/can/proc.c b/net/can/proc.c index 0c59f876fe6f..fdf704e9bb8c 100644 --- a/net/can/proc.c +++ b/net/can/proc.c @@ -276,7 +276,6 @@ static int can_stats_proc_open(struct inode *inode, struct file *file) } static const struct file_operations can_stats_proc_fops = { - .owner = THIS_MODULE, .open = can_stats_proc_open, .read = seq_read, .llseek = seq_lseek, @@ -310,7 +309,6 @@ static int can_reset_stats_proc_open(struct inode *inode, struct file *file) } static const struct file_operations can_reset_stats_proc_fops = { - .owner = THIS_MODULE, .open = can_reset_stats_proc_open, .read = seq_read, .llseek = seq_lseek, @@ -329,7 +327,6 @@ static int can_version_proc_open(struct inode *inode, struct file *file) } static const struct file_operations can_version_proc_fops = { - .owner = THIS_MODULE, .open = can_version_proc_open, .read = seq_read, .llseek = seq_lseek, @@ -338,7 +335,7 @@ static const struct file_operations can_version_proc_fops = { static inline void can_rcvlist_proc_show_one(struct seq_file *m, int idx, struct net_device *dev, - struct dev_rcv_lists *d) + struct can_dev_rcv_lists *d) { if (!hlist_empty(&d->rx[idx])) { can_print_recv_banner(m); @@ -353,7 +350,7 @@ static int can_rcvlist_proc_show(struct seq_file *m, void *v) /* double cast to prevent GCC warning */ int idx = (int)(long)PDE_DATA(m->file->f_inode); struct net_device *dev; - struct dev_rcv_lists *d; + struct can_dev_rcv_lists *d; struct net *net = m->private; seq_printf(m, "\nreceive list '%s':\n", rx_list_name[idx]); @@ -382,7 +379,6 @@ static int can_rcvlist_proc_open(struct inode *inode, struct file *file) } static const struct file_operations can_rcvlist_proc_fops = { - .owner = THIS_MODULE, .open = can_rcvlist_proc_open, .read = seq_read, .llseek = seq_lseek, @@ -417,7 +413,7 @@ static inline void can_rcvlist_proc_show_array(struct seq_file *m, static int can_rcvlist_sff_proc_show(struct seq_file *m, void *v) { struct net_device *dev; - struct dev_rcv_lists *d; + struct can_dev_rcv_lists *d; struct net *net = m->private; /* RX_SFF */ @@ -450,7 +446,6 @@ static int can_rcvlist_sff_proc_open(struct inode *inode, struct file *file) } static const struct file_operations can_rcvlist_sff_proc_fops = { - .owner = THIS_MODULE, .open = can_rcvlist_sff_proc_open, .read = seq_read, .llseek = seq_lseek, @@ -461,7 +456,7 @@ static const struct file_operations can_rcvlist_sff_proc_fops = { static int can_rcvlist_eff_proc_show(struct seq_file *m, void *v) { struct net_device *dev; - struct dev_rcv_lists *d; + struct can_dev_rcv_lists *d; struct net *net = m->private; /* RX_EFF */ @@ -494,7 +489,6 @@ static int can_rcvlist_eff_proc_open(struct inode *inode, struct file *file) } static const struct file_operations can_rcvlist_eff_proc_fops = { - .owner = THIS_MODULE, .open = can_rcvlist_eff_proc_open, .read = seq_read, .llseek = seq_lseek, diff --git a/net/can/raw.c b/net/can/raw.c index 864c80dbdb72..f2ecc43376a1 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -401,6 +401,8 @@ static int raw_bind(struct socket *sock, struct sockaddr *uaddr, int len) if (len < sizeof(*addr)) return -EINVAL; + if (addr->can_family != AF_CAN) + return -EINVAL; lock_sock(sk); diff --git a/net/core/Makefile b/net/core/Makefile index 1fd0a9c88b1b..6dbbba8c57ae 100644 --- a/net/core/Makefile +++ b/net/core/Makefile @@ -11,7 +11,7 @@ obj-$(CONFIG_SYSCTL) += sysctl_net_core.o obj-y += dev.o ethtool.o dev_addr_lists.o dst.o netevent.o \ neighbour.o rtnetlink.o utils.o link_watch.o filter.o \ sock_diag.o dev_ioctl.o tso.o sock_reuseport.o \ - fib_notifier.o + fib_notifier.o xdp.o obj-y += net-sysfs.o obj-$(CONFIG_PROC_FS) += net-procfs.o diff --git a/net/core/dev.c b/net/core/dev.c index 613fb4066be7..dda9d7b9a840 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1554,6 +1554,23 @@ void dev_disable_lro(struct net_device *dev) } EXPORT_SYMBOL(dev_disable_lro); +/** + * dev_disable_gro_hw - disable HW Generic Receive Offload on a device + * @dev: device + * + * Disable HW Generic Receive Offload (GRO_HW) on a net device. Must be + * called under RTNL. This is needed if Generic XDP is installed on + * the device. + */ +static void dev_disable_gro_hw(struct net_device *dev) +{ + dev->wanted_features &= ~NETIF_F_GRO_HW; + netdev_update_features(dev); + + if (unlikely(dev->features & NETIF_F_GRO_HW)) + netdev_WARN(dev, "failed to disable GRO_HW!\n"); +} + static int call_netdevice_notifier(struct notifier_block *nb, unsigned long val, struct net_device *dev) { @@ -1677,7 +1694,6 @@ EXPORT_SYMBOL(unregister_netdevice_notifier); /** * call_netdevice_notifiers_info - call all network notifier blocks * @val: value passed unmodified to notifier function - * @dev: net_device pointer passed unmodified to notifier function * @info: notifier information data * * Call all network notifier blocks. Parameters and return value @@ -2815,7 +2831,7 @@ struct sk_buff *__skb_gso_segment(struct sk_buff *skb, segs = skb_mac_gso_segment(skb, features); - if (unlikely(skb_needs_check(skb, tx_path))) + if (unlikely(skb_needs_check(skb, tx_path) && !IS_ERR(segs))) skb_warn_bad_offload(skb); return segs; @@ -3054,7 +3070,7 @@ int skb_csum_hwoffload_help(struct sk_buff *skb, } EXPORT_SYMBOL(skb_csum_hwoffload_help); -static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device *dev) +static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device *dev, bool *again) { netdev_features_t features; @@ -3078,9 +3094,6 @@ static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device __skb_linearize(skb)) goto out_kfree_skb; - if (validate_xmit_xfrm(skb, features)) - goto out_kfree_skb; - /* If packet is not checksummed and device does not * support checksumming for this protocol, complete * checksumming here. @@ -3097,6 +3110,8 @@ static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device } } + skb = validate_xmit_xfrm(skb, features, again); + return skb; out_kfree_skb: @@ -3106,7 +3121,7 @@ out_null: return NULL; } -struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *dev) +struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *dev, bool *again) { struct sk_buff *next, *head = NULL, *tail; @@ -3117,7 +3132,7 @@ struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *d /* in case skb wont be segmented, point to itself */ skb->prev = skb; - skb = validate_xmit_skb(skb, dev); + skb = validate_xmit_skb(skb, dev, again); if (!skb) continue; @@ -3185,6 +3200,21 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, int rc; qdisc_calculate_pkt_len(skb, q); + + if (q->flags & TCQ_F_NOLOCK) { + if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) { + __qdisc_drop(skb, &to_free); + rc = NET_XMIT_DROP; + } else { + rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK; + __qdisc_run(q); + } + + if (unlikely(to_free)) + kfree_skb_list(to_free); + return rc; + } + /* * Heuristic to force contended enqueues to serialize on a * separate lock before trying to get qdisc main lock. @@ -3215,9 +3245,9 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, contended = false; } __qdisc_run(q); - } else - qdisc_run_end(q); + } + qdisc_run_end(q); rc = NET_XMIT_SUCCESS; } else { rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK; @@ -3227,6 +3257,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, contended = false; } __qdisc_run(q); + qdisc_run_end(q); } } spin_unlock(root_lock); @@ -3399,8 +3430,7 @@ struct netdev_queue *netdev_pick_tx(struct net_device *dev, else queue_index = __netdev_pick_tx(dev, skb); - if (!accel_priv) - queue_index = netdev_cap_txqueue(dev, queue_index); + queue_index = netdev_cap_txqueue(dev, queue_index); } skb_set_queue_mapping(skb, queue_index); @@ -3439,6 +3469,7 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv) struct netdev_queue *txq; struct Qdisc *q; int rc = -ENOMEM; + bool again = false; skb_reset_mac_header(skb); @@ -3500,7 +3531,7 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv) XMIT_RECURSION_LIMIT)) goto recursion_alert; - skb = validate_xmit_skb(skb, dev); + skb = validate_xmit_skb(skb, dev, &again); if (!skb) goto out; @@ -3896,9 +3927,33 @@ drop: return NET_RX_DROP; } +static struct netdev_rx_queue *netif_get_rxqueue(struct sk_buff *skb) +{ + struct net_device *dev = skb->dev; + struct netdev_rx_queue *rxqueue; + + rxqueue = dev->_rx; + + if (skb_rx_queue_recorded(skb)) { + u16 index = skb_get_rx_queue(skb); + + if (unlikely(index >= dev->real_num_rx_queues)) { + WARN_ONCE(dev->real_num_rx_queues > 1, + "%s received packet on queue %u, but number " + "of RX queues is %u\n", + dev->name, index, dev->real_num_rx_queues); + + return rxqueue; /* Return first rxqueue */ + } + rxqueue += index; + } + return rxqueue; +} + static u32 netif_receive_generic_xdp(struct sk_buff *skb, struct bpf_prog *xdp_prog) { + struct netdev_rx_queue *rxqueue; u32 metalen, act = XDP_DROP; struct xdp_buff xdp; void *orig_data; @@ -3942,6 +3997,9 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb, xdp.data_hard_start = skb->data - skb_headroom(skb); orig_data = xdp.data; + rxqueue = netif_get_rxqueue(skb); + xdp.rxq = &rxqueue->xdp_rxq; + act = bpf_prog_run_xdp(xdp_prog, &xdp); off = xdp.data - orig_data; @@ -4166,21 +4224,26 @@ static __latent_entropy void net_tx_action(struct softirq_action *h) while (head) { struct Qdisc *q = head; - spinlock_t *root_lock; + spinlock_t *root_lock = NULL; head = head->next_sched; - root_lock = qdisc_lock(q); - spin_lock(root_lock); + if (!(q->flags & TCQ_F_NOLOCK)) { + root_lock = qdisc_lock(q); + spin_lock(root_lock); + } /* We need to make sure head->next_sched is read * before clearing __QDISC_STATE_SCHED */ smp_mb__before_atomic(); clear_bit(__QDISC_STATE_SCHED, &q->state); qdisc_run(q); - spin_unlock(root_lock); + if (root_lock) + spin_unlock(root_lock); } } + + xfrm_dev_backlog(sd); } #if IS_ENABLED(CONFIG_BRIDGE) && IS_ENABLED(CONFIG_ATM_LANE) @@ -4568,6 +4631,7 @@ static int generic_xdp_install(struct net_device *dev, struct netdev_bpf *xdp) } else if (new && !old) { static_key_slow_inc(&generic_xdp_needed); dev_disable_lro(dev); + dev_disable_gro_hw(dev); } break; @@ -6371,6 +6435,7 @@ rollback: * netdev_upper_dev_link - Add a link to the upper device * @dev: device * @upper_dev: new upper device + * @extack: netlink extended ack * * Adds a link to device which is upper to this one. The caller must hold * the RTNL lock. On a failure a negative errno code is returned. @@ -6392,6 +6457,7 @@ EXPORT_SYMBOL(netdev_upper_dev_link); * @upper_dev: new upper device * @upper_priv: upper device private * @upper_info: upper info to be passed down via notifier + * @extack: netlink extended ack * * Adds a link to device which is upper to this one. In this case, only * one master upper device can be linked, although other non-master devices @@ -6982,6 +7048,35 @@ int dev_set_mtu(struct net_device *dev, int new_mtu) EXPORT_SYMBOL(dev_set_mtu); /** + * dev_change_tx_queue_len - Change TX queue length of a netdevice + * @dev: device + * @new_len: new tx queue length + */ +int dev_change_tx_queue_len(struct net_device *dev, unsigned long new_len) +{ + unsigned int orig_len = dev->tx_queue_len; + int res; + + if (new_len != (unsigned int)new_len) + return -ERANGE; + + if (new_len != orig_len) { + dev->tx_queue_len = new_len; + res = call_netdevice_notifiers(NETDEV_CHANGE_TX_QUEUE_LEN, dev); + res = notifier_to_errno(res); + if (res) { + netdev_err(dev, + "refused to change device tx_queue_len\n"); + dev->tx_queue_len = orig_len; + return res; + } + return dev_qdisc_change_tx_queue_len(dev); + } + + return 0; +} + +/** * dev_set_group - Change group this device belongs to * @dev: device * @new_group: group this device should belong to @@ -7096,17 +7191,21 @@ int dev_change_proto_down(struct net_device *dev, bool proto_down) } EXPORT_SYMBOL(dev_change_proto_down); -u8 __dev_xdp_attached(struct net_device *dev, bpf_op_t bpf_op, u32 *prog_id) +void __dev_xdp_query(struct net_device *dev, bpf_op_t bpf_op, + struct netdev_bpf *xdp) { - struct netdev_bpf xdp; - - memset(&xdp, 0, sizeof(xdp)); - xdp.command = XDP_QUERY_PROG; + memset(xdp, 0, sizeof(*xdp)); + xdp->command = XDP_QUERY_PROG; /* Query must always succeed. */ - WARN_ON(bpf_op(dev, &xdp) < 0); - if (prog_id) - *prog_id = xdp.prog_id; + WARN_ON(bpf_op(dev, xdp) < 0); +} + +static u8 __dev_xdp_attached(struct net_device *dev, bpf_op_t bpf_op) +{ + struct netdev_bpf xdp; + + __dev_xdp_query(dev, bpf_op, &xdp); return xdp.prog_attached; } @@ -7129,6 +7228,27 @@ static int dev_xdp_install(struct net_device *dev, bpf_op_t bpf_op, return bpf_op(dev, &xdp); } +static void dev_xdp_uninstall(struct net_device *dev) +{ + struct netdev_bpf xdp; + bpf_op_t ndo_bpf; + + /* Remove generic XDP */ + WARN_ON(dev_xdp_install(dev, generic_xdp_install, NULL, 0, NULL)); + + /* Remove from the driver */ + ndo_bpf = dev->netdev_ops->ndo_bpf; + if (!ndo_bpf) + return; + + __dev_xdp_query(dev, ndo_bpf, &xdp); + if (xdp.prog_attached == XDP_ATTACHED_NONE) + return; + + /* Program removal should always succeed */ + WARN_ON(dev_xdp_install(dev, ndo_bpf, NULL, xdp.prog_flags, NULL)); +} + /** * dev_change_xdp_fd - set or clear a bpf program for a device rx path * @dev: device @@ -7157,10 +7277,10 @@ int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack, bpf_chk = generic_xdp_install; if (fd >= 0) { - if (bpf_chk && __dev_xdp_attached(dev, bpf_chk, NULL)) + if (bpf_chk && __dev_xdp_attached(dev, bpf_chk)) return -EEXIST; if ((flags & XDP_FLAGS_UPDATE_IF_NOEXIST) && - __dev_xdp_attached(dev, bpf_op, NULL)) + __dev_xdp_attached(dev, bpf_op)) return -EBUSY; prog = bpf_prog_get_type_dev(fd, BPF_PROG_TYPE_XDP, @@ -7259,6 +7379,7 @@ static void rollback_registered_many(struct list_head *head) /* Shutdown queueing discipline. */ dev_shutdown(dev); + dev_xdp_uninstall(dev); /* Notify protocols, that we are about to destroy * this device. They should clean all the things. @@ -7268,7 +7389,7 @@ static void rollback_registered_many(struct list_head *head) if (!dev->rtnl_link_ops || dev->rtnl_link_state == RTNL_LINK_INITIALIZED) skb = rtmsg_ifinfo_build_skb(RTM_DELLINK, dev, ~0U, 0, - GFP_KERNEL, NULL); + GFP_KERNEL, NULL, 0); /* * Flush the unicast and multicast chains @@ -7402,6 +7523,18 @@ static netdev_features_t netdev_fix_features(struct net_device *dev, features &= ~dev->gso_partial_features; } + if (!(features & NETIF_F_RXCSUM)) { + /* NETIF_F_GRO_HW implies doing RXCSUM since every packet + * successfully merged by hardware must also have the + * checksum verified by hardware. If the user does not + * want to enable RXCSUM, logically, we should disable GRO_HW. + */ + if (features & NETIF_F_GRO_HW) { + netdev_dbg(dev, "Dropping NETIF_F_GRO_HW since no RXCSUM feature.\n"); + features &= ~NETIF_F_GRO_HW; + } + } + return features; } @@ -7535,12 +7668,12 @@ void netif_stacked_transfer_operstate(const struct net_device *rootdev, } EXPORT_SYMBOL(netif_stacked_transfer_operstate); -#ifdef CONFIG_SYSFS static int netif_alloc_rx_queues(struct net_device *dev) { unsigned int i, count = dev->num_rx_queues; struct netdev_rx_queue *rx; size_t sz = count * sizeof(*rx); + int err = 0; BUG_ON(count < 1); @@ -7550,11 +7683,38 @@ static int netif_alloc_rx_queues(struct net_device *dev) dev->_rx = rx; - for (i = 0; i < count; i++) + for (i = 0; i < count; i++) { rx[i].dev = dev; + + /* XDP RX-queue setup */ + err = xdp_rxq_info_reg(&rx[i].xdp_rxq, dev, i); + if (err < 0) + goto err_rxq_info; + } return 0; + +err_rxq_info: + /* Rollback successful reg's and free other resources */ + while (i--) + xdp_rxq_info_unreg(&rx[i].xdp_rxq); + kvfree(dev->_rx); + dev->_rx = NULL; + return err; +} + +static void netif_free_rx_queues(struct net_device *dev) +{ + unsigned int i, count = dev->num_rx_queues; + + /* netif_alloc_rx_queues alloc failed, resources have been unreg'ed */ + if (!dev->_rx) + return; + + for (i = 0; i < count; i++) + xdp_rxq_info_unreg(&dev->_rx[i].xdp_rxq); + + kvfree(dev->_rx); } -#endif static void netdev_init_one_queue(struct net_device *dev, struct netdev_queue *queue, void *_unused) @@ -8115,12 +8275,10 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, return NULL; } -#ifdef CONFIG_SYSFS if (rxqs < 1) { pr_err("alloc_netdev: Unable to allocate device with zero RX queues\n"); return NULL; } -#endif alloc_size = sizeof(struct net_device); if (sizeof_priv) { @@ -8177,12 +8335,10 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, if (netif_alloc_netdev_queues(dev)) goto free_all; -#ifdef CONFIG_SYSFS dev->num_rx_queues = rxqs; dev->real_num_rx_queues = rxqs; if (netif_alloc_rx_queues(dev)) goto free_all; -#endif strcpy(dev->name, name); dev->name_assign_type = name_assign_type; @@ -8218,13 +8374,10 @@ EXPORT_SYMBOL(alloc_netdev_mqs); void free_netdev(struct net_device *dev) { struct napi_struct *p, *n; - struct bpf_prog *prog; might_sleep(); netif_free_tx_queues(dev); -#ifdef CONFIG_SYSFS - kvfree(dev->_rx); -#endif + netif_free_rx_queues(dev); kfree(rcu_dereference_protected(dev->ingress_queue, 1)); @@ -8237,12 +8390,6 @@ void free_netdev(struct net_device *dev) free_percpu(dev->pcpu_refcnt); dev->pcpu_refcnt = NULL; - prog = rcu_dereference_protected(dev->xdp_prog, 1); - if (prog) { - bpf_prog_put(prog); - static_key_slow_dec(&generic_xdp_needed); - } - /* Compatibility with error handling in drivers */ if (dev->reg_state == NETREG_UNINITIALIZED) { netdev_freemem(dev); @@ -8355,7 +8502,7 @@ EXPORT_SYMBOL(unregister_netdev); int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat) { - int err, new_nsid; + int err, new_nsid, new_ifindex; ASSERT_RTNL(); @@ -8411,11 +8558,16 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char call_netdevice_notifiers(NETDEV_UNREGISTER, dev); rcu_barrier(); call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev); - if (dev->rtnl_link_ops && dev->rtnl_link_ops->get_link_net) - new_nsid = peernet2id_alloc(dev_net(dev), net); + + new_nsid = peernet2id_alloc(dev_net(dev), net); + /* If there is an ifindex conflict assign a new one */ + if (__dev_get_by_index(net, dev->ifindex)) + new_ifindex = dev_new_index(net); else - new_nsid = peernet2id(dev_net(dev), net); - rtmsg_ifinfo_newnet(RTM_DELLINK, dev, ~0U, GFP_KERNEL, &new_nsid); + new_ifindex = dev->ifindex; + + rtmsg_ifinfo_newnet(RTM_DELLINK, dev, ~0U, GFP_KERNEL, &new_nsid, + new_ifindex); /* * Flush the unicast and multicast chains @@ -8429,10 +8581,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char /* Actually switch the network namespace */ dev_net_set(dev, net); - - /* If there is an ifindex conflict assign a new one */ - if (__dev_get_by_index(net, dev->ifindex)) - dev->ifindex = dev_new_index(net); + dev->ifindex = new_ifindex; /* Send a netdev-add uevent to the new namespace */ kobject_uevent(&dev->dev.kobj, KOBJ_ADD); @@ -8830,6 +8979,9 @@ static int __init net_dev_init(void) skb_queue_head_init(&sd->input_pkt_queue); skb_queue_head_init(&sd->process_queue); +#ifdef CONFIG_XFRM_OFFLOAD + skb_queue_head_init(&sd->xfrm_backlog); +#endif INIT_LIST_HEAD(&sd->poll_list); sd->output_queue_tailp = &sd->output_queue; #ifdef CONFIG_RPS diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c index 7e690d0ccd05..0ab1af04296c 100644 --- a/net/core/dev_ioctl.c +++ b/net/core/dev_ioctl.c @@ -18,26 +18,10 @@ * match. --pb */ -static int dev_ifname(struct net *net, struct ifreq __user *arg) +static int dev_ifname(struct net *net, struct ifreq *ifr) { - struct ifreq ifr; - int error; - - /* - * Fetch the caller's info block. - */ - - if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) - return -EFAULT; - ifr.ifr_name[IFNAMSIZ-1] = 0; - - error = netdev_get_name(net, ifr.ifr_name, ifr.ifr_ifindex); - if (error) - return error; - - if (copy_to_user(arg, &ifr, sizeof(struct ifreq))) - return -EFAULT; - return 0; + ifr->ifr_name[IFNAMSIZ-1] = 0; + return netdev_get_name(net, ifr->ifr_name, ifr->ifr_ifindex); } static gifconf_func_t *gifconf_list[NPROTO]; @@ -66,9 +50,8 @@ EXPORT_SYMBOL(register_gifconf); * Thus we will need a 'compatibility mode'. */ -static int dev_ifconf(struct net *net, char __user *arg) +int dev_ifconf(struct net *net, struct ifconf *ifc, int size) { - struct ifconf ifc; struct net_device *dev; char __user *pos; int len; @@ -79,11 +62,8 @@ static int dev_ifconf(struct net *net, char __user *arg) * Fetch the caller's info block. */ - if (copy_from_user(&ifc, arg, sizeof(struct ifconf))) - return -EFAULT; - - pos = ifc.ifc_buf; - len = ifc.ifc_len; + pos = ifc->ifc_buf; + len = ifc->ifc_len; /* * Loop over the interfaces, and write an info block for each. @@ -95,10 +75,10 @@ static int dev_ifconf(struct net *net, char __user *arg) if (gifconf_list[i]) { int done; if (!pos) - done = gifconf_list[i](dev, NULL, 0); + done = gifconf_list[i](dev, NULL, 0, size); else done = gifconf_list[i](dev, pos + total, - len - total); + len - total, size); if (done < 0) return -EFAULT; total += done; @@ -109,12 +89,12 @@ static int dev_ifconf(struct net *net, char __user *arg) /* * All done. Write the updated control block back to the caller. */ - ifc.ifc_len = total; + ifc->ifc_len = total; /* * Both BSD and Solaris return 0 here, so we do too. */ - return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0; + return 0; } /* @@ -406,53 +386,24 @@ EXPORT_SYMBOL(dev_load); * positive or a negative errno code on error. */ -int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg) +int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, bool *need_copyout) { - struct ifreq ifr; int ret; char *colon; - /* One special case: SIOCGIFCONF takes ifconf argument - and requires shared lock, because it sleeps writing - to user space. - */ - - if (cmd == SIOCGIFCONF) { - rtnl_lock(); - ret = dev_ifconf(net, (char __user *) arg); - rtnl_unlock(); - return ret; - } + if (need_copyout) + *need_copyout = true; if (cmd == SIOCGIFNAME) - return dev_ifname(net, (struct ifreq __user *)arg); - - /* - * Take care of Wireless Extensions. Unfortunately struct iwreq - * isn't a proper subset of struct ifreq (it's 8 byte shorter) - * so we need to treat it specially, otherwise applications may - * fault if the struct they're passing happens to land at the - * end of a mapped page. - */ - if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) { - struct iwreq iwr; - - if (copy_from_user(&iwr, arg, sizeof(iwr))) - return -EFAULT; - - iwr.ifr_name[sizeof(iwr.ifr_name) - 1] = 0; + return dev_ifname(net, ifr); - return wext_handle_ioctl(net, &iwr, cmd, arg); - } - - if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) - return -EFAULT; - - ifr.ifr_name[IFNAMSIZ-1] = 0; + ifr->ifr_name[IFNAMSIZ-1] = 0; - colon = strchr(ifr.ifr_name, ':'); + colon = strchr(ifr->ifr_name, ':'); if (colon) *colon = 0; + dev_load(net, ifr->ifr_name); + /* * See which interface the caller is talking about. */ @@ -472,31 +423,19 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg) case SIOCGIFMAP: case SIOCGIFINDEX: case SIOCGIFTXQLEN: - dev_load(net, ifr.ifr_name); rcu_read_lock(); - ret = dev_ifsioc_locked(net, &ifr, cmd); + ret = dev_ifsioc_locked(net, ifr, cmd); rcu_read_unlock(); - if (!ret) { - if (colon) - *colon = ':'; - if (copy_to_user(arg, &ifr, - sizeof(struct ifreq))) - ret = -EFAULT; - } + if (colon) + *colon = ':'; return ret; case SIOCETHTOOL: - dev_load(net, ifr.ifr_name); rtnl_lock(); - ret = dev_ethtool(net, &ifr); + ret = dev_ethtool(net, ifr); rtnl_unlock(); - if (!ret) { - if (colon) - *colon = ':'; - if (copy_to_user(arg, &ifr, - sizeof(struct ifreq))) - ret = -EFAULT; - } + if (colon) + *colon = ':'; return ret; /* @@ -510,17 +449,11 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg) case SIOCSIFNAME: if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) return -EPERM; - dev_load(net, ifr.ifr_name); rtnl_lock(); - ret = dev_ifsioc(net, &ifr, cmd); + ret = dev_ifsioc(net, ifr, cmd); rtnl_unlock(); - if (!ret) { - if (colon) - *colon = ':'; - if (copy_to_user(arg, &ifr, - sizeof(struct ifreq))) - ret = -EFAULT; - } + if (colon) + *colon = ':'; return ret; /* @@ -561,10 +494,11 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg) /* fall through */ case SIOCBONDSLAVEINFOQUERY: case SIOCBONDINFOQUERY: - dev_load(net, ifr.ifr_name); rtnl_lock(); - ret = dev_ifsioc(net, &ifr, cmd); + ret = dev_ifsioc(net, ifr, cmd); rtnl_unlock(); + if (need_copyout) + *need_copyout = false; return ret; case SIOCGIFMEM: @@ -584,13 +518,9 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg) cmd == SIOCGHWTSTAMP || (cmd >= SIOCDEVPRIVATE && cmd <= SIOCDEVPRIVATE + 15)) { - dev_load(net, ifr.ifr_name); rtnl_lock(); - ret = dev_ifsioc(net, &ifr, cmd); + ret = dev_ifsioc(net, ifr, cmd); rtnl_unlock(); - if (!ret && copy_to_user(arg, &ifr, - sizeof(struct ifreq))) - ret = -EFAULT; return ret; } return -ENOTTY; diff --git a/net/core/devlink.c b/net/core/devlink.c index 7d430c1d9c3e..18d385ed8237 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -92,12 +92,6 @@ static LIST_HEAD(devlink_list); */ static DEFINE_MUTEX(devlink_mutex); -/* devlink_port_mutex - * - * Shared lock to guard lists of ports in all devlink devices. - */ -static DEFINE_MUTEX(devlink_port_mutex); - static struct net *devlink_net(const struct devlink *devlink) { return read_pnet(&devlink->_net); @@ -335,15 +329,18 @@ devlink_sb_tc_index_get_from_info(struct devlink_sb *devlink_sb, #define DEVLINK_NL_FLAG_NEED_DEVLINK BIT(0) #define DEVLINK_NL_FLAG_NEED_PORT BIT(1) #define DEVLINK_NL_FLAG_NEED_SB BIT(2) -#define DEVLINK_NL_FLAG_LOCK_PORTS BIT(3) - /* port is not needed but we need to ensure they don't - * change in the middle of command - */ + +/* The per devlink instance lock is taken by default in the pre-doit + * operation, yet several commands do not require this. The global + * devlink lock is taken and protects from disruption by user-calls. + */ +#define DEVLINK_NL_FLAG_NO_LOCK BIT(3) static int devlink_nl_pre_doit(const struct genl_ops *ops, struct sk_buff *skb, struct genl_info *info) { struct devlink *devlink; + int err; mutex_lock(&devlink_mutex); devlink = devlink_get_from_info(info); @@ -351,44 +348,47 @@ static int devlink_nl_pre_doit(const struct genl_ops *ops, mutex_unlock(&devlink_mutex); return PTR_ERR(devlink); } + if (~ops->internal_flags & DEVLINK_NL_FLAG_NO_LOCK) + mutex_lock(&devlink->lock); if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_DEVLINK) { info->user_ptr[0] = devlink; } else if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_PORT) { struct devlink_port *devlink_port; - mutex_lock(&devlink_port_mutex); devlink_port = devlink_port_get_from_info(devlink, info); if (IS_ERR(devlink_port)) { - mutex_unlock(&devlink_port_mutex); - mutex_unlock(&devlink_mutex); - return PTR_ERR(devlink_port); + err = PTR_ERR(devlink_port); + goto unlock; } info->user_ptr[0] = devlink_port; } - if (ops->internal_flags & DEVLINK_NL_FLAG_LOCK_PORTS) { - mutex_lock(&devlink_port_mutex); - } if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_SB) { struct devlink_sb *devlink_sb; devlink_sb = devlink_sb_get_from_info(devlink, info); if (IS_ERR(devlink_sb)) { - if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_PORT) - mutex_unlock(&devlink_port_mutex); - mutex_unlock(&devlink_mutex); - return PTR_ERR(devlink_sb); + err = PTR_ERR(devlink_sb); + goto unlock; } info->user_ptr[1] = devlink_sb; } return 0; + +unlock: + if (~ops->internal_flags & DEVLINK_NL_FLAG_NO_LOCK) + mutex_unlock(&devlink->lock); + mutex_unlock(&devlink_mutex); + return err; } static void devlink_nl_post_doit(const struct genl_ops *ops, struct sk_buff *skb, struct genl_info *info) { - if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_PORT || - ops->internal_flags & DEVLINK_NL_FLAG_LOCK_PORTS) - mutex_unlock(&devlink_port_mutex); + struct devlink *devlink; + + devlink = devlink_get_from_info(info); + if (~ops->internal_flags & DEVLINK_NL_FLAG_NO_LOCK) + mutex_unlock(&devlink->lock); mutex_unlock(&devlink_mutex); } @@ -614,10 +614,10 @@ static int devlink_nl_cmd_port_get_dumpit(struct sk_buff *msg, int err; mutex_lock(&devlink_mutex); - mutex_lock(&devlink_port_mutex); list_for_each_entry(devlink, &devlink_list, list) { if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) continue; + mutex_lock(&devlink->lock); list_for_each_entry(devlink_port, &devlink->port_list, list) { if (idx < start) { idx++; @@ -628,13 +628,15 @@ static int devlink_nl_cmd_port_get_dumpit(struct sk_buff *msg, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI); - if (err) + if (err) { + mutex_unlock(&devlink->lock); goto out; + } idx++; } + mutex_unlock(&devlink->lock); } out: - mutex_unlock(&devlink_port_mutex); mutex_unlock(&devlink_mutex); cb->args[0] = idx; @@ -801,6 +803,7 @@ static int devlink_nl_cmd_sb_get_dumpit(struct sk_buff *msg, list_for_each_entry(devlink, &devlink_list, list) { if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) continue; + mutex_lock(&devlink->lock); list_for_each_entry(devlink_sb, &devlink->sb_list, list) { if (idx < start) { idx++; @@ -811,10 +814,13 @@ static int devlink_nl_cmd_sb_get_dumpit(struct sk_buff *msg, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI); - if (err) + if (err) { + mutex_unlock(&devlink->lock); goto out; + } idx++; } + mutex_unlock(&devlink->lock); } out: mutex_unlock(&devlink_mutex); @@ -935,14 +941,18 @@ static int devlink_nl_cmd_sb_pool_get_dumpit(struct sk_buff *msg, if (!net_eq(devlink_net(devlink), sock_net(msg->sk)) || !devlink->ops || !devlink->ops->sb_pool_get) continue; + mutex_lock(&devlink->lock); list_for_each_entry(devlink_sb, &devlink->sb_list, list) { err = __sb_pool_get_dumpit(msg, start, &idx, devlink, devlink_sb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq); - if (err && err != -EOPNOTSUPP) + if (err && err != -EOPNOTSUPP) { + mutex_unlock(&devlink->lock); goto out; + } } + mutex_unlock(&devlink->lock); } out: mutex_unlock(&devlink_mutex); @@ -1123,22 +1133,24 @@ static int devlink_nl_cmd_sb_port_pool_get_dumpit(struct sk_buff *msg, int err; mutex_lock(&devlink_mutex); - mutex_lock(&devlink_port_mutex); list_for_each_entry(devlink, &devlink_list, list) { if (!net_eq(devlink_net(devlink), sock_net(msg->sk)) || !devlink->ops || !devlink->ops->sb_port_pool_get) continue; + mutex_lock(&devlink->lock); list_for_each_entry(devlink_sb, &devlink->sb_list, list) { err = __sb_port_pool_get_dumpit(msg, start, &idx, devlink, devlink_sb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq); - if (err && err != -EOPNOTSUPP) + if (err && err != -EOPNOTSUPP) { + mutex_unlock(&devlink->lock); goto out; + } } + mutex_unlock(&devlink->lock); } out: - mutex_unlock(&devlink_port_mutex); mutex_unlock(&devlink_mutex); cb->args[0] = idx; @@ -1347,23 +1359,26 @@ devlink_nl_cmd_sb_tc_pool_bind_get_dumpit(struct sk_buff *msg, int err; mutex_lock(&devlink_mutex); - mutex_lock(&devlink_port_mutex); list_for_each_entry(devlink, &devlink_list, list) { if (!net_eq(devlink_net(devlink), sock_net(msg->sk)) || !devlink->ops || !devlink->ops->sb_tc_pool_bind_get) continue; + + mutex_lock(&devlink->lock); list_for_each_entry(devlink_sb, &devlink->sb_list, list) { err = __sb_tc_pool_bind_get_dumpit(msg, start, &idx, devlink, devlink_sb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq); - if (err && err != -EOPNOTSUPP) + if (err && err != -EOPNOTSUPP) { + mutex_unlock(&devlink->lock); goto out; + } } + mutex_unlock(&devlink->lock); } out: - mutex_unlock(&devlink_port_mutex); mutex_unlock(&devlink_mutex); cb->args[0] = idx; @@ -1679,6 +1694,12 @@ static int devlink_dpipe_table_put(struct sk_buff *skb, table->counters_enabled)) goto nla_put_failure; + if (table->resource_valid) { + nla_put_u64_64bit(skb, DEVLINK_ATTR_DPIPE_TABLE_RESOURCE_ID, + table->resource_id, DEVLINK_ATTR_PAD); + nla_put_u64_64bit(skb, DEVLINK_ATTR_DPIPE_TABLE_RESOURCE_UNITS, + table->resource_units, DEVLINK_ATTR_PAD); + } if (devlink_dpipe_matches_put(table, skb)) goto nla_put_failure; @@ -2273,6 +2294,273 @@ static int devlink_nl_cmd_dpipe_table_counters_set(struct sk_buff *skb, counters_enable); } +static struct devlink_resource * +devlink_resource_find(struct devlink *devlink, + struct devlink_resource *resource, u64 resource_id) +{ + struct list_head *resource_list; + + if (resource) + resource_list = &resource->resource_list; + else + resource_list = &devlink->resource_list; + + list_for_each_entry(resource, resource_list, list) { + struct devlink_resource *child_resource; + + if (resource->id == resource_id) + return resource; + + child_resource = devlink_resource_find(devlink, resource, + resource_id); + if (child_resource) + return child_resource; + } + return NULL; +} + +static void +devlink_resource_validate_children(struct devlink_resource *resource) +{ + struct devlink_resource *child_resource; + bool size_valid = true; + u64 parts_size = 0; + + if (list_empty(&resource->resource_list)) + goto out; + + list_for_each_entry(child_resource, &resource->resource_list, list) + parts_size += child_resource->size_new; + + if (parts_size > resource->size) + size_valid = false; +out: + resource->size_valid = size_valid; +} + +static int devlink_nl_cmd_resource_set(struct sk_buff *skb, + struct genl_info *info) +{ + struct devlink *devlink = info->user_ptr[0]; + struct devlink_resource *resource; + u64 resource_id; + u64 size; + int err; + + if (!info->attrs[DEVLINK_ATTR_RESOURCE_ID] || + !info->attrs[DEVLINK_ATTR_RESOURCE_SIZE]) + return -EINVAL; + resource_id = nla_get_u64(info->attrs[DEVLINK_ATTR_RESOURCE_ID]); + + resource = devlink_resource_find(devlink, NULL, resource_id); + if (!resource) + return -EINVAL; + + if (!resource->resource_ops->size_validate) + return -EINVAL; + + size = nla_get_u64(info->attrs[DEVLINK_ATTR_RESOURCE_SIZE]); + err = resource->resource_ops->size_validate(devlink, size, + info->extack); + if (err) + return err; + + resource->size_new = size; + devlink_resource_validate_children(resource); + if (resource->parent) + devlink_resource_validate_children(resource->parent); + return 0; +} + +static void +devlink_resource_size_params_put(struct devlink_resource *resource, + struct sk_buff *skb) +{ + struct devlink_resource_size_params *size_params; + + size_params = resource->size_params; + nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_SIZE_GRAN, + size_params->size_granularity, DEVLINK_ATTR_PAD); + nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_SIZE_MAX, + size_params->size_max, DEVLINK_ATTR_PAD); + nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_SIZE_MIN, + size_params->size_min, DEVLINK_ATTR_PAD); + nla_put_u8(skb, DEVLINK_ATTR_RESOURCE_UNIT, size_params->unit); +} + +static int devlink_resource_put(struct devlink *devlink, struct sk_buff *skb, + struct devlink_resource *resource) +{ + struct devlink_resource *child_resource; + struct nlattr *child_resource_attr; + struct nlattr *resource_attr; + + resource_attr = nla_nest_start(skb, DEVLINK_ATTR_RESOURCE); + if (!resource_attr) + return -EMSGSIZE; + + if (nla_put_string(skb, DEVLINK_ATTR_RESOURCE_NAME, resource->name) || + nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_SIZE, resource->size, + DEVLINK_ATTR_PAD) || + nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_ID, resource->id, + DEVLINK_ATTR_PAD)) + goto nla_put_failure; + if (resource->size != resource->size_new) + nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_SIZE_NEW, + resource->size_new, DEVLINK_ATTR_PAD); + if (resource->resource_ops && resource->resource_ops->occ_get) + nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_OCC, + resource->resource_ops->occ_get(devlink), + DEVLINK_ATTR_PAD); + devlink_resource_size_params_put(resource, skb); + if (list_empty(&resource->resource_list)) + goto out; + + if (nla_put_u8(skb, DEVLINK_ATTR_RESOURCE_SIZE_VALID, + resource->size_valid)) + goto nla_put_failure; + + child_resource_attr = nla_nest_start(skb, DEVLINK_ATTR_RESOURCE_LIST); + if (!child_resource_attr) + goto nla_put_failure; + + list_for_each_entry(child_resource, &resource->resource_list, list) { + if (devlink_resource_put(devlink, skb, child_resource)) + goto resource_put_failure; + } + + nla_nest_end(skb, child_resource_attr); +out: + nla_nest_end(skb, resource_attr); + return 0; + +resource_put_failure: + nla_nest_cancel(skb, child_resource_attr); +nla_put_failure: + nla_nest_cancel(skb, resource_attr); + return -EMSGSIZE; +} + +static int devlink_resource_fill(struct genl_info *info, + enum devlink_command cmd, int flags) +{ + struct devlink *devlink = info->user_ptr[0]; + struct devlink_resource *resource; + struct nlattr *resources_attr; + struct sk_buff *skb = NULL; + struct nlmsghdr *nlh; + bool incomplete; + void *hdr; + int i; + int err; + + resource = list_first_entry(&devlink->resource_list, + struct devlink_resource, list); +start_again: + err = devlink_dpipe_send_and_alloc_skb(&skb, info); + if (err) + return err; + + hdr = genlmsg_put(skb, info->snd_portid, info->snd_seq, + &devlink_nl_family, NLM_F_MULTI, cmd); + if (!hdr) { + nlmsg_free(skb); + return -EMSGSIZE; + } + + if (devlink_nl_put_handle(skb, devlink)) + goto nla_put_failure; + + resources_attr = nla_nest_start(skb, DEVLINK_ATTR_RESOURCE_LIST); + if (!resources_attr) + goto nla_put_failure; + + incomplete = false; + i = 0; + list_for_each_entry_from(resource, &devlink->resource_list, list) { + err = devlink_resource_put(devlink, skb, resource); + if (err) { + if (!i) + goto err_resource_put; + incomplete = true; + break; + } + i++; + } + nla_nest_end(skb, resources_attr); + genlmsg_end(skb, hdr); + if (incomplete) + goto start_again; +send_done: + nlh = nlmsg_put(skb, info->snd_portid, info->snd_seq, + NLMSG_DONE, 0, flags | NLM_F_MULTI); + if (!nlh) { + err = devlink_dpipe_send_and_alloc_skb(&skb, info); + if (err) + goto err_skb_send_alloc; + goto send_done; + } + return genlmsg_reply(skb, info); + +nla_put_failure: + err = -EMSGSIZE; +err_resource_put: +err_skb_send_alloc: + genlmsg_cancel(skb, hdr); + nlmsg_free(skb); + return err; +} + +static int devlink_nl_cmd_resource_dump(struct sk_buff *skb, + struct genl_info *info) +{ + struct devlink *devlink = info->user_ptr[0]; + + if (list_empty(&devlink->resource_list)) + return -EOPNOTSUPP; + + return devlink_resource_fill(info, DEVLINK_CMD_RESOURCE_DUMP, 0); +} + +static int +devlink_resources_validate(struct devlink *devlink, + struct devlink_resource *resource, + struct genl_info *info) +{ + struct list_head *resource_list; + int err = 0; + + if (resource) + resource_list = &resource->resource_list; + else + resource_list = &devlink->resource_list; + + list_for_each_entry(resource, resource_list, list) { + if (!resource->size_valid) + return -EINVAL; + err = devlink_resources_validate(devlink, resource, info); + if (err) + return err; + } + return err; +} + +static int devlink_nl_cmd_reload(struct sk_buff *skb, struct genl_info *info) +{ + struct devlink *devlink = info->user_ptr[0]; + int err; + + if (!devlink->ops->reload) + return -EOPNOTSUPP; + + err = devlink_resources_validate(devlink, NULL, info); + if (err) { + NL_SET_ERR_MSG_MOD(info->extack, "resources size validation failed"); + return err; + } + return devlink->ops->reload(devlink); +} + static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING }, @@ -2291,6 +2579,8 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = { [DEVLINK_ATTR_ESWITCH_ENCAP_MODE] = { .type = NLA_U8 }, [DEVLINK_ATTR_DPIPE_TABLE_NAME] = { .type = NLA_NUL_STRING }, [DEVLINK_ATTR_DPIPE_TABLE_COUNTERS_ENABLED] = { .type = NLA_U8 }, + [DEVLINK_ATTR_RESOURCE_ID] = { .type = NLA_U64}, + [DEVLINK_ATTR_RESOURCE_SIZE] = { .type = NLA_U64}, }; static const struct genl_ops devlink_nl_ops[] = { @@ -2322,14 +2612,16 @@ static const struct genl_ops devlink_nl_ops[] = { .doit = devlink_nl_cmd_port_split_doit, .policy = devlink_nl_policy, .flags = GENL_ADMIN_PERM, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | + DEVLINK_NL_FLAG_NO_LOCK, }, { .cmd = DEVLINK_CMD_PORT_UNSPLIT, .doit = devlink_nl_cmd_port_unsplit_doit, .policy = devlink_nl_policy, .flags = GENL_ADMIN_PERM, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | + DEVLINK_NL_FLAG_NO_LOCK, }, { .cmd = DEVLINK_CMD_SB_GET, @@ -2397,8 +2689,7 @@ static const struct genl_ops devlink_nl_ops[] = { .policy = devlink_nl_policy, .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | - DEVLINK_NL_FLAG_NEED_SB | - DEVLINK_NL_FLAG_LOCK_PORTS, + DEVLINK_NL_FLAG_NEED_SB, }, { .cmd = DEVLINK_CMD_SB_OCC_MAX_CLEAR, @@ -2406,8 +2697,7 @@ static const struct genl_ops devlink_nl_ops[] = { .policy = devlink_nl_policy, .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | - DEVLINK_NL_FLAG_NEED_SB | - DEVLINK_NL_FLAG_LOCK_PORTS, + DEVLINK_NL_FLAG_NEED_SB, }, { .cmd = DEVLINK_CMD_ESWITCH_GET, @@ -2451,6 +2741,28 @@ static const struct genl_ops devlink_nl_ops[] = { .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, }, + { + .cmd = DEVLINK_CMD_RESOURCE_SET, + .doit = devlink_nl_cmd_resource_set, + .policy = devlink_nl_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, + }, + { + .cmd = DEVLINK_CMD_RESOURCE_DUMP, + .doit = devlink_nl_cmd_resource_dump, + .policy = devlink_nl_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, + }, + { + .cmd = DEVLINK_CMD_RELOAD, + .doit = devlink_nl_cmd_reload, + .policy = devlink_nl_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | + DEVLINK_NL_FLAG_NO_LOCK, + }, }; static struct genl_family devlink_nl_family __ro_after_init = { @@ -2488,6 +2800,8 @@ struct devlink *devlink_alloc(const struct devlink_ops *ops, size_t priv_size) INIT_LIST_HEAD(&devlink->port_list); INIT_LIST_HEAD(&devlink->sb_list); INIT_LIST_HEAD_RCU(&devlink->dpipe_table_list); + INIT_LIST_HEAD(&devlink->resource_list); + mutex_init(&devlink->lock); return devlink; } EXPORT_SYMBOL_GPL(devlink_alloc); @@ -2550,16 +2864,16 @@ int devlink_port_register(struct devlink *devlink, struct devlink_port *devlink_port, unsigned int port_index) { - mutex_lock(&devlink_port_mutex); + mutex_lock(&devlink->lock); if (devlink_port_index_exists(devlink, port_index)) { - mutex_unlock(&devlink_port_mutex); + mutex_unlock(&devlink->lock); return -EEXIST; } devlink_port->devlink = devlink; devlink_port->index = port_index; devlink_port->registered = true; list_add_tail(&devlink_port->list, &devlink->port_list); - mutex_unlock(&devlink_port_mutex); + mutex_unlock(&devlink->lock); devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW); return 0; } @@ -2572,10 +2886,12 @@ EXPORT_SYMBOL_GPL(devlink_port_register); */ void devlink_port_unregister(struct devlink_port *devlink_port) { + struct devlink *devlink = devlink_port->devlink; + devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_DEL); - mutex_lock(&devlink_port_mutex); + mutex_lock(&devlink->lock); list_del(&devlink_port->list); - mutex_unlock(&devlink_port_mutex); + mutex_unlock(&devlink->lock); } EXPORT_SYMBOL_GPL(devlink_port_unregister); @@ -2651,7 +2967,7 @@ int devlink_sb_register(struct devlink *devlink, unsigned int sb_index, struct devlink_sb *devlink_sb; int err = 0; - mutex_lock(&devlink_mutex); + mutex_lock(&devlink->lock); if (devlink_sb_index_exists(devlink, sb_index)) { err = -EEXIST; goto unlock; @@ -2670,7 +2986,7 @@ int devlink_sb_register(struct devlink *devlink, unsigned int sb_index, devlink_sb->egress_tc_count = egress_tc_count; list_add_tail(&devlink_sb->list, &devlink->sb_list); unlock: - mutex_unlock(&devlink_mutex); + mutex_unlock(&devlink->lock); return err; } EXPORT_SYMBOL_GPL(devlink_sb_register); @@ -2679,11 +2995,11 @@ void devlink_sb_unregister(struct devlink *devlink, unsigned int sb_index) { struct devlink_sb *devlink_sb; - mutex_lock(&devlink_mutex); + mutex_lock(&devlink->lock); devlink_sb = devlink_sb_get_by_index(devlink, sb_index); WARN_ON(!devlink_sb); list_del(&devlink_sb->list); - mutex_unlock(&devlink_mutex); + mutex_unlock(&devlink->lock); kfree(devlink_sb); } EXPORT_SYMBOL_GPL(devlink_sb_unregister); @@ -2699,9 +3015,9 @@ EXPORT_SYMBOL_GPL(devlink_sb_unregister); int devlink_dpipe_headers_register(struct devlink *devlink, struct devlink_dpipe_headers *dpipe_headers) { - mutex_lock(&devlink_mutex); + mutex_lock(&devlink->lock); devlink->dpipe_headers = dpipe_headers; - mutex_unlock(&devlink_mutex); + mutex_unlock(&devlink->lock); return 0; } EXPORT_SYMBOL_GPL(devlink_dpipe_headers_register); @@ -2715,9 +3031,9 @@ EXPORT_SYMBOL_GPL(devlink_dpipe_headers_register); */ void devlink_dpipe_headers_unregister(struct devlink *devlink) { - mutex_lock(&devlink_mutex); + mutex_lock(&devlink->lock); devlink->dpipe_headers = NULL; - mutex_unlock(&devlink_mutex); + mutex_unlock(&devlink->lock); } EXPORT_SYMBOL_GPL(devlink_dpipe_headers_unregister); @@ -2783,9 +3099,9 @@ int devlink_dpipe_table_register(struct devlink *devlink, table->priv = priv; table->counter_control_extern = counter_control_extern; - mutex_lock(&devlink_mutex); + mutex_lock(&devlink->lock); list_add_tail_rcu(&table->list, &devlink->dpipe_table_list); - mutex_unlock(&devlink_mutex); + mutex_unlock(&devlink->lock); return 0; } EXPORT_SYMBOL_GPL(devlink_dpipe_table_register); @@ -2801,20 +3117,182 @@ void devlink_dpipe_table_unregister(struct devlink *devlink, { struct devlink_dpipe_table *table; - mutex_lock(&devlink_mutex); + mutex_lock(&devlink->lock); table = devlink_dpipe_table_find(&devlink->dpipe_table_list, table_name); if (!table) goto unlock; list_del_rcu(&table->list); - mutex_unlock(&devlink_mutex); + mutex_unlock(&devlink->lock); kfree_rcu(table, rcu); return; unlock: - mutex_unlock(&devlink_mutex); + mutex_unlock(&devlink->lock); } EXPORT_SYMBOL_GPL(devlink_dpipe_table_unregister); +/** + * devlink_resource_register - devlink resource register + * + * @devlink: devlink + * @resource_name: resource's name + * @top_hierarchy: top hierarchy + * @reload_required: reload is required for new configuration to + * apply + * @resource_size: resource's size + * @resource_id: resource's id + * @parent_reosurce_id: resource's parent id + * @size params: size parameters + * @resource_ops: resource ops + */ +int devlink_resource_register(struct devlink *devlink, + const char *resource_name, + bool top_hierarchy, + u64 resource_size, + u64 resource_id, + u64 parent_resource_id, + struct devlink_resource_size_params *size_params, + const struct devlink_resource_ops *resource_ops) +{ + struct devlink_resource *resource; + struct list_head *resource_list; + int err = 0; + + mutex_lock(&devlink->lock); + resource = devlink_resource_find(devlink, NULL, resource_id); + if (resource) { + err = -EINVAL; + goto out; + } + + resource = kzalloc(sizeof(*resource), GFP_KERNEL); + if (!resource) { + err = -ENOMEM; + goto out; + } + + if (top_hierarchy) { + resource_list = &devlink->resource_list; + } else { + struct devlink_resource *parent_resource; + + parent_resource = devlink_resource_find(devlink, NULL, + parent_resource_id); + if (parent_resource) { + resource_list = &parent_resource->resource_list; + resource->parent = parent_resource; + } else { + kfree(resource); + err = -EINVAL; + goto out; + } + } + + resource->name = resource_name; + resource->size = resource_size; + resource->size_new = resource_size; + resource->id = resource_id; + resource->resource_ops = resource_ops; + resource->size_valid = true; + resource->size_params = size_params; + INIT_LIST_HEAD(&resource->resource_list); + list_add_tail(&resource->list, resource_list); +out: + mutex_unlock(&devlink->lock); + return err; +} +EXPORT_SYMBOL_GPL(devlink_resource_register); + +/** + * devlink_resources_unregister - free all resources + * + * @devlink: devlink + * @resource: resource + */ +void devlink_resources_unregister(struct devlink *devlink, + struct devlink_resource *resource) +{ + struct devlink_resource *tmp, *child_resource; + struct list_head *resource_list; + + if (resource) + resource_list = &resource->resource_list; + else + resource_list = &devlink->resource_list; + + if (!resource) + mutex_lock(&devlink->lock); + + list_for_each_entry_safe(child_resource, tmp, resource_list, list) { + devlink_resources_unregister(devlink, child_resource); + list_del(&child_resource->list); + kfree(child_resource); + } + + if (!resource) + mutex_unlock(&devlink->lock); +} +EXPORT_SYMBOL_GPL(devlink_resources_unregister); + +/** + * devlink_resource_size_get - get and update size + * + * @devlink: devlink + * @resource_id: the requested resource id + * @p_resource_size: ptr to update + */ +int devlink_resource_size_get(struct devlink *devlink, + u64 resource_id, + u64 *p_resource_size) +{ + struct devlink_resource *resource; + int err = 0; + + mutex_lock(&devlink->lock); + resource = devlink_resource_find(devlink, NULL, resource_id); + if (!resource) { + err = -EINVAL; + goto out; + } + *p_resource_size = resource->size_new; + resource->size = resource->size_new; +out: + mutex_unlock(&devlink->lock); + return err; +} +EXPORT_SYMBOL_GPL(devlink_resource_size_get); + +/** + * devlink_dpipe_table_resource_set - set the resource id + * + * @devlink: devlink + * @table_name: table name + * @resource_id: resource id + * @resource_units: number of resource's units consumed per table's entry + */ +int devlink_dpipe_table_resource_set(struct devlink *devlink, + const char *table_name, u64 resource_id, + u64 resource_units) +{ + struct devlink_dpipe_table *table; + int err = 0; + + mutex_lock(&devlink->lock); + table = devlink_dpipe_table_find(&devlink->dpipe_table_list, + table_name); + if (!table) { + err = -EINVAL; + goto out; + } + table->resource_id = resource_id; + table->resource_units = resource_units; + table->resource_valid = true; +out: + mutex_unlock(&devlink->lock); + return err; +} +EXPORT_SYMBOL_GPL(devlink_dpipe_table_resource_set); + static int __init devlink_module_init(void) { return genl_register_family(&devlink_nl_family); diff --git a/net/core/dst.c b/net/core/dst.c index 662a2d4a3d19..007aa0b08291 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -21,6 +21,7 @@ #include <linux/sched.h> #include <linux/prefetch.h> #include <net/lwtunnel.h> +#include <net/xfrm.h> #include <net/dst.h> #include <net/dst_metadata.h> @@ -62,15 +63,12 @@ void dst_init(struct dst_entry *dst, struct dst_ops *ops, struct net_device *dev, int initial_ref, int initial_obsolete, unsigned short flags) { - dst->child = NULL; dst->dev = dev; if (dev) dev_hold(dev); dst->ops = ops; dst_init_metrics(dst, dst_default_metrics.metrics, true); dst->expires = 0UL; - dst->path = dst; - dst->from = NULL; #ifdef CONFIG_XFRM dst->xfrm = NULL; #endif @@ -88,7 +86,6 @@ void dst_init(struct dst_entry *dst, struct dst_ops *ops, dst->__use = 0; dst->lastuse = jiffies; dst->flags = flags; - dst->next = NULL; if (!(flags & DST_NOCOUNT)) dst_entries_add(ops, 1); } @@ -116,12 +113,17 @@ EXPORT_SYMBOL(dst_alloc); struct dst_entry *dst_destroy(struct dst_entry * dst) { - struct dst_entry *child; + struct dst_entry *child = NULL; smp_rmb(); - child = dst->child; +#ifdef CONFIG_XFRM + if (dst->xfrm) { + struct xfrm_dst *xdst = (struct xfrm_dst *) dst; + child = xdst->child; + } +#endif if (!(dst->flags & DST_NOCOUNT)) dst_entries_add(dst->ops, -1); diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 8225416911ae..107b122c8969 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -73,6 +73,7 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] [NETIF_F_LLTX_BIT] = "tx-lockless", [NETIF_F_NETNS_LOCAL_BIT] = "netns-local", [NETIF_F_GRO_BIT] = "rx-gro", + [NETIF_F_GRO_HW_BIT] = "rx-gro-hw", [NETIF_F_LRO_BIT] = "rx-lro", [NETIF_F_TSO_BIT] = "tx-tcp-segmentation", @@ -1692,14 +1693,23 @@ static int ethtool_get_ringparam(struct net_device *dev, void __user *useraddr) static int ethtool_set_ringparam(struct net_device *dev, void __user *useraddr) { - struct ethtool_ringparam ringparam; + struct ethtool_ringparam ringparam, max = { .cmd = ETHTOOL_GRINGPARAM }; - if (!dev->ethtool_ops->set_ringparam) + if (!dev->ethtool_ops->set_ringparam || !dev->ethtool_ops->get_ringparam) return -EOPNOTSUPP; if (copy_from_user(&ringparam, useraddr, sizeof(ringparam))) return -EFAULT; + dev->ethtool_ops->get_ringparam(dev, &max); + + /* ensure new ring parameters are within the maximums */ + if (ringparam.rx_pending > max.rx_max_pending || + ringparam.rx_mini_pending > max.rx_mini_max_pending || + ringparam.rx_jumbo_pending > max.rx_jumbo_max_pending || + ringparam.tx_pending > max.tx_max_pending) + return -EINVAL; + return dev->ethtool_ops->set_ringparam(dev, &ringparam); } diff --git a/net/core/filter.c b/net/core/filter.c index 1c0eb436671f..08ab4c65a998 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -401,8 +401,8 @@ do_pass: /* Classic BPF expects A and X to be reset first. These need * to be guaranteed to be the first two instructions. */ - *new_insn++ = BPF_ALU64_REG(BPF_XOR, BPF_REG_A, BPF_REG_A); - *new_insn++ = BPF_ALU64_REG(BPF_XOR, BPF_REG_X, BPF_REG_X); + *new_insn++ = BPF_ALU32_REG(BPF_XOR, BPF_REG_A, BPF_REG_A); + *new_insn++ = BPF_ALU32_REG(BPF_XOR, BPF_REG_X, BPF_REG_X); /* All programs must keep CTX in callee saved BPF_REG_CTX. * In eBPF case it's done by the compiler, here we need to @@ -459,8 +459,15 @@ do_pass: break; if (fp->code == (BPF_ALU | BPF_DIV | BPF_X) || - fp->code == (BPF_ALU | BPF_MOD | BPF_X)) + fp->code == (BPF_ALU | BPF_MOD | BPF_X)) { *insn++ = BPF_MOV32_REG(BPF_REG_X, BPF_REG_X); + /* Error with exception code on div/mod by 0. + * For cBPF programs, this was always return 0. + */ + *insn++ = BPF_JMP_IMM(BPF_JNE, BPF_REG_X, 0, 2); + *insn++ = BPF_ALU32_REG(BPF_XOR, BPF_REG_A, BPF_REG_A); + *insn++ = BPF_EXIT_INSN(); + } *insn = BPF_RAW_INSN(fp->code, BPF_REG_A, BPF_REG_X, 0, fp->k); break; @@ -2686,8 +2693,9 @@ static int __xdp_generic_ok_fwd_dev(struct sk_buff *skb, struct net_device *fwd) return 0; } -int xdp_do_generic_redirect_map(struct net_device *dev, struct sk_buff *skb, - struct bpf_prog *xdp_prog) +static int xdp_do_generic_redirect_map(struct net_device *dev, + struct sk_buff *skb, + struct bpf_prog *xdp_prog) { struct redirect_info *ri = this_cpu_ptr(&redirect_info); unsigned long map_owner = ri->map_owner; @@ -2864,7 +2872,7 @@ static const struct bpf_func_proto bpf_skb_event_output_proto = { .arg2_type = ARG_CONST_MAP_PTR, .arg3_type = ARG_ANYTHING, .arg4_type = ARG_PTR_TO_MEM, - .arg5_type = ARG_CONST_SIZE, + .arg5_type = ARG_CONST_SIZE_OR_ZERO, }; static unsigned short bpf_tunnel_key_af(u64 flags) @@ -3015,6 +3023,8 @@ BPF_CALL_4(bpf_skb_set_tunnel_key, struct sk_buff *, skb, info->key.tun_flags = TUNNEL_KEY | TUNNEL_CSUM | TUNNEL_NOCACHE; if (flags & BPF_F_DONT_FRAGMENT) info->key.tun_flags |= TUNNEL_DONT_FRAGMENT; + if (flags & BPF_F_ZERO_CSUM_TX) + info->key.tun_flags &= ~TUNNEL_CSUM; info->key.tun_id = cpu_to_be64(from->tunnel_id); info->key.tos = from->tunnel_tos; @@ -3028,8 +3038,6 @@ BPF_CALL_4(bpf_skb_set_tunnel_key, struct sk_buff *, skb, IPV6_FLOWLABEL_MASK; } else { info->key.u.ipv4.dst = cpu_to_be32(from->remote_ipv4); - if (flags & BPF_F_ZERO_CSUM_TX) - info->key.tun_flags &= ~TUNNEL_CSUM; } return 0; @@ -3153,7 +3161,7 @@ static const struct bpf_func_proto bpf_xdp_event_output_proto = { .arg2_type = ARG_CONST_MAP_PTR, .arg3_type = ARG_ANYTHING, .arg4_type = ARG_PTR_TO_MEM, - .arg5_type = ARG_CONST_SIZE, + .arg5_type = ARG_CONST_SIZE_OR_ZERO, }; BPF_CALL_1(bpf_get_socket_cookie, struct sk_buff *, skb) @@ -3231,6 +3239,29 @@ BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock, ret = -EINVAL; } #ifdef CONFIG_INET +#if IS_ENABLED(CONFIG_IPV6) + } else if (level == SOL_IPV6) { + if (optlen != sizeof(int) || sk->sk_family != AF_INET6) + return -EINVAL; + + val = *((int *)optval); + /* Only some options are supported */ + switch (optname) { + case IPV6_TCLASS: + if (val < -1 || val > 0xff) { + ret = -EINVAL; + } else { + struct ipv6_pinfo *np = inet6_sk(sk); + + if (val == -1) + val = 0; + np->tclass = val; + } + break; + default: + ret = -EINVAL; + } +#endif } else if (level == SOL_TCP && sk->sk_prot->setsockopt == tcp_setsockopt) { if (optname == TCP_CONGESTION) { @@ -3240,7 +3271,8 @@ BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock, strncpy(name, optval, min_t(long, optlen, TCP_CA_NAME_MAX-1)); name[TCP_CA_NAME_MAX-1] = 0; - ret = tcp_set_congestion_control(sk, name, false, reinit); + ret = tcp_set_congestion_control(sk, name, false, + reinit); } else { struct tcp_sock *tp = tcp_sk(sk); @@ -3306,6 +3338,22 @@ BPF_CALL_5(bpf_getsockopt, struct bpf_sock_ops_kern *, bpf_sock, } else { goto err_clear; } +#if IS_ENABLED(CONFIG_IPV6) + } else if (level == SOL_IPV6) { + struct ipv6_pinfo *np = inet6_sk(sk); + + if (optlen != sizeof(int) || sk->sk_family != AF_INET6) + goto err_clear; + + /* Only some options are supported */ + switch (optname) { + case IPV6_TCLASS: + *((int *)optval) = (int)np->tclass; + break; + default: + goto err_clear; + } +#endif } else { goto err_clear; } @@ -3327,6 +3375,33 @@ static const struct bpf_func_proto bpf_getsockopt_proto = { .arg5_type = ARG_CONST_SIZE, }; +BPF_CALL_2(bpf_sock_ops_cb_flags_set, struct bpf_sock_ops_kern *, bpf_sock, + int, argval) +{ + struct sock *sk = bpf_sock->sk; + int val = argval & BPF_SOCK_OPS_ALL_CB_FLAGS; + + if (!sk_fullsock(sk)) + return -EINVAL; + +#ifdef CONFIG_INET + if (val) + tcp_sk(sk)->bpf_sock_ops_cb_flags = val; + + return argval & (~BPF_SOCK_OPS_ALL_CB_FLAGS); +#else + return -EINVAL; +#endif +} + +static const struct bpf_func_proto bpf_sock_ops_cb_flags_set_proto = { + .func = bpf_sock_ops_cb_flags_set, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, +}; + static const struct bpf_func_proto * bpf_base_func_proto(enum bpf_func_id func_id) { @@ -3459,6 +3534,8 @@ xdp_func_proto(enum bpf_func_id func_id) return &bpf_xdp_event_output_proto; case BPF_FUNC_get_smp_processor_id: return &bpf_get_smp_processor_id_proto; + case BPF_FUNC_csum_diff: + return &bpf_csum_diff_proto; case BPF_FUNC_xdp_adjust_head: return &bpf_xdp_adjust_head_proto; case BPF_FUNC_xdp_adjust_meta: @@ -3507,6 +3584,8 @@ static const struct bpf_func_proto * return &bpf_setsockopt_proto; case BPF_FUNC_getsockopt: return &bpf_getsockopt_proto; + case BPF_FUNC_sock_ops_cb_flags_set: + return &bpf_sock_ops_cb_flags_set_proto; case BPF_FUNC_sock_map_update: return &bpf_sock_map_update_proto; default: @@ -3823,34 +3902,44 @@ void bpf_warn_invalid_xdp_action(u32 act) } EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_action); -static bool __is_valid_sock_ops_access(int off, int size) +static bool sock_ops_is_valid_access(int off, int size, + enum bpf_access_type type, + struct bpf_insn_access_aux *info) { + const int size_default = sizeof(__u32); + if (off < 0 || off >= sizeof(struct bpf_sock_ops)) return false; + /* The verifier guarantees that size > 0. */ if (off % size != 0) return false; - if (size != sizeof(__u32)) - return false; - return true; -} - -static bool sock_ops_is_valid_access(int off, int size, - enum bpf_access_type type, - struct bpf_insn_access_aux *info) -{ if (type == BPF_WRITE) { switch (off) { - case offsetof(struct bpf_sock_ops, op) ... - offsetof(struct bpf_sock_ops, replylong[3]): + case offsetof(struct bpf_sock_ops, reply): + case offsetof(struct bpf_sock_ops, sk_txhash): + if (size != size_default) + return false; break; default: return false; } + } else { + switch (off) { + case bpf_ctx_range_till(struct bpf_sock_ops, bytes_received, + bytes_acked): + if (size != sizeof(__u64)) + return false; + break; + default: + if (size != size_default) + return false; + break; + } } - return __is_valid_sock_ops_access(off, size); + return true; } static int sk_skb_prologue(struct bpf_insn *insn_buf, bool direct_write, @@ -4305,6 +4394,24 @@ static u32 xdp_convert_ctx_access(enum bpf_access_type type, si->dst_reg, si->src_reg, offsetof(struct xdp_buff, data_end)); break; + case offsetof(struct xdp_md, ingress_ifindex): + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, rxq), + si->dst_reg, si->src_reg, + offsetof(struct xdp_buff, rxq)); + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_rxq_info, dev), + si->dst_reg, si->dst_reg, + offsetof(struct xdp_rxq_info, dev)); + *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, + offsetof(struct net_device, ifindex)); + break; + case offsetof(struct xdp_md, rx_queue_index): + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, rxq), + si->dst_reg, si->src_reg, + offsetof(struct xdp_buff, rxq)); + *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, + offsetof(struct xdp_rxq_info, + queue_index)); + break; } return insn - insn_buf; @@ -4439,6 +4546,211 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type, *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg, offsetof(struct sock_common, skc_num)); break; + + case offsetof(struct bpf_sock_ops, is_fullsock): + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( + struct bpf_sock_ops_kern, + is_fullsock), + si->dst_reg, si->src_reg, + offsetof(struct bpf_sock_ops_kern, + is_fullsock)); + break; + + case offsetof(struct bpf_sock_ops, state): + BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_state) != 1); + + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( + struct bpf_sock_ops_kern, sk), + si->dst_reg, si->src_reg, + offsetof(struct bpf_sock_ops_kern, sk)); + *insn++ = BPF_LDX_MEM(BPF_B, si->dst_reg, si->dst_reg, + offsetof(struct sock_common, skc_state)); + break; + + case offsetof(struct bpf_sock_ops, rtt_min): + BUILD_BUG_ON(FIELD_SIZEOF(struct tcp_sock, rtt_min) != + sizeof(struct minmax)); + BUILD_BUG_ON(sizeof(struct minmax) < + sizeof(struct minmax_sample)); + + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( + struct bpf_sock_ops_kern, sk), + si->dst_reg, si->src_reg, + offsetof(struct bpf_sock_ops_kern, sk)); + *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, + offsetof(struct tcp_sock, rtt_min) + + FIELD_SIZEOF(struct minmax_sample, t)); + break; + +/* Helper macro for adding read access to tcp_sock or sock fields. */ +#define SOCK_OPS_GET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ) \ + do { \ + BUILD_BUG_ON(FIELD_SIZEOF(OBJ, OBJ_FIELD) > \ + FIELD_SIZEOF(struct bpf_sock_ops, BPF_FIELD)); \ + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \ + struct bpf_sock_ops_kern, \ + is_fullsock), \ + si->dst_reg, si->src_reg, \ + offsetof(struct bpf_sock_ops_kern, \ + is_fullsock)); \ + *insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 2); \ + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \ + struct bpf_sock_ops_kern, sk),\ + si->dst_reg, si->src_reg, \ + offsetof(struct bpf_sock_ops_kern, sk));\ + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(OBJ, \ + OBJ_FIELD), \ + si->dst_reg, si->dst_reg, \ + offsetof(OBJ, OBJ_FIELD)); \ + } while (0) + +/* Helper macro for adding write access to tcp_sock or sock fields. + * The macro is called with two registers, dst_reg which contains a pointer + * to ctx (context) and src_reg which contains the value that should be + * stored. However, we need an additional register since we cannot overwrite + * dst_reg because it may be used later in the program. + * Instead we "borrow" one of the other register. We first save its value + * into a new (temp) field in bpf_sock_ops_kern, use it, and then restore + * it at the end of the macro. + */ +#define SOCK_OPS_SET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ) \ + do { \ + int reg = BPF_REG_9; \ + BUILD_BUG_ON(FIELD_SIZEOF(OBJ, OBJ_FIELD) > \ + FIELD_SIZEOF(struct bpf_sock_ops, BPF_FIELD)); \ + if (si->dst_reg == reg || si->src_reg == reg) \ + reg--; \ + if (si->dst_reg == reg || si->src_reg == reg) \ + reg--; \ + *insn++ = BPF_STX_MEM(BPF_DW, si->dst_reg, reg, \ + offsetof(struct bpf_sock_ops_kern, \ + temp)); \ + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \ + struct bpf_sock_ops_kern, \ + is_fullsock), \ + reg, si->dst_reg, \ + offsetof(struct bpf_sock_ops_kern, \ + is_fullsock)); \ + *insn++ = BPF_JMP_IMM(BPF_JEQ, reg, 0, 2); \ + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \ + struct bpf_sock_ops_kern, sk),\ + reg, si->dst_reg, \ + offsetof(struct bpf_sock_ops_kern, sk));\ + *insn++ = BPF_STX_MEM(BPF_FIELD_SIZEOF(OBJ, OBJ_FIELD), \ + reg, si->src_reg, \ + offsetof(OBJ, OBJ_FIELD)); \ + *insn++ = BPF_LDX_MEM(BPF_DW, reg, si->dst_reg, \ + offsetof(struct bpf_sock_ops_kern, \ + temp)); \ + } while (0) + +#define SOCK_OPS_GET_OR_SET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ, TYPE) \ + do { \ + if (TYPE == BPF_WRITE) \ + SOCK_OPS_SET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ); \ + else \ + SOCK_OPS_GET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ); \ + } while (0) + + case offsetof(struct bpf_sock_ops, snd_cwnd): + SOCK_OPS_GET_FIELD(snd_cwnd, snd_cwnd, struct tcp_sock); + break; + + case offsetof(struct bpf_sock_ops, srtt_us): + SOCK_OPS_GET_FIELD(srtt_us, srtt_us, struct tcp_sock); + break; + + case offsetof(struct bpf_sock_ops, bpf_sock_ops_cb_flags): + SOCK_OPS_GET_FIELD(bpf_sock_ops_cb_flags, bpf_sock_ops_cb_flags, + struct tcp_sock); + break; + + case offsetof(struct bpf_sock_ops, snd_ssthresh): + SOCK_OPS_GET_FIELD(snd_ssthresh, snd_ssthresh, struct tcp_sock); + break; + + case offsetof(struct bpf_sock_ops, rcv_nxt): + SOCK_OPS_GET_FIELD(rcv_nxt, rcv_nxt, struct tcp_sock); + break; + + case offsetof(struct bpf_sock_ops, snd_nxt): + SOCK_OPS_GET_FIELD(snd_nxt, snd_nxt, struct tcp_sock); + break; + + case offsetof(struct bpf_sock_ops, snd_una): + SOCK_OPS_GET_FIELD(snd_una, snd_una, struct tcp_sock); + break; + + case offsetof(struct bpf_sock_ops, mss_cache): + SOCK_OPS_GET_FIELD(mss_cache, mss_cache, struct tcp_sock); + break; + + case offsetof(struct bpf_sock_ops, ecn_flags): + SOCK_OPS_GET_FIELD(ecn_flags, ecn_flags, struct tcp_sock); + break; + + case offsetof(struct bpf_sock_ops, rate_delivered): + SOCK_OPS_GET_FIELD(rate_delivered, rate_delivered, + struct tcp_sock); + break; + + case offsetof(struct bpf_sock_ops, rate_interval_us): + SOCK_OPS_GET_FIELD(rate_interval_us, rate_interval_us, + struct tcp_sock); + break; + + case offsetof(struct bpf_sock_ops, packets_out): + SOCK_OPS_GET_FIELD(packets_out, packets_out, struct tcp_sock); + break; + + case offsetof(struct bpf_sock_ops, retrans_out): + SOCK_OPS_GET_FIELD(retrans_out, retrans_out, struct tcp_sock); + break; + + case offsetof(struct bpf_sock_ops, total_retrans): + SOCK_OPS_GET_FIELD(total_retrans, total_retrans, + struct tcp_sock); + break; + + case offsetof(struct bpf_sock_ops, segs_in): + SOCK_OPS_GET_FIELD(segs_in, segs_in, struct tcp_sock); + break; + + case offsetof(struct bpf_sock_ops, data_segs_in): + SOCK_OPS_GET_FIELD(data_segs_in, data_segs_in, struct tcp_sock); + break; + + case offsetof(struct bpf_sock_ops, segs_out): + SOCK_OPS_GET_FIELD(segs_out, segs_out, struct tcp_sock); + break; + + case offsetof(struct bpf_sock_ops, data_segs_out): + SOCK_OPS_GET_FIELD(data_segs_out, data_segs_out, + struct tcp_sock); + break; + + case offsetof(struct bpf_sock_ops, lost_out): + SOCK_OPS_GET_FIELD(lost_out, lost_out, struct tcp_sock); + break; + + case offsetof(struct bpf_sock_ops, sacked_out): + SOCK_OPS_GET_FIELD(sacked_out, sacked_out, struct tcp_sock); + break; + + case offsetof(struct bpf_sock_ops, sk_txhash): + SOCK_OPS_GET_OR_SET_FIELD(sk_txhash, sk_txhash, + struct sock, type); + break; + + case offsetof(struct bpf_sock_ops, bytes_received): + SOCK_OPS_GET_FIELD(bytes_received, bytes_received, + struct tcp_sock); + break; + + case offsetof(struct bpf_sock_ops, bytes_acked): + SOCK_OPS_GET_FIELD(bytes_acked, bytes_acked, struct tcp_sock); + break; + } return insn - insn_buf; } @@ -4475,6 +4787,7 @@ const struct bpf_verifier_ops sk_filter_verifier_ops = { }; const struct bpf_prog_ops sk_filter_prog_ops = { + .test_run = bpf_prog_test_run_skb, }; const struct bpf_verifier_ops tc_cls_act_verifier_ops = { diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 544bddf08e13..559db9ea8d86 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -24,6 +24,7 @@ #include <linux/tcp.h> #include <net/flow_dissector.h> #include <scsi/fc/fc_fcoe.h> +#include <uapi/linux/batadv_packet.h> static void dissector_set_key(struct flow_dissector *flow_dissector, enum flow_dissector_key_id key_id) @@ -133,10 +134,10 @@ skb_flow_dissect_set_enc_addr_type(enum flow_dissector_key_id type, ctrl->addr_type = type; } -static void -__skb_flow_dissect_tunnel_info(const struct sk_buff *skb, - struct flow_dissector *flow_dissector, - void *target_container) +void +skb_flow_dissect_tunnel_info(const struct sk_buff *skb, + struct flow_dissector *flow_dissector, + void *target_container) { struct ip_tunnel_info *info; struct ip_tunnel_key *key; @@ -212,6 +213,7 @@ __skb_flow_dissect_tunnel_info(const struct sk_buff *skb, tp->dst = key->tp_dst; } } +EXPORT_SYMBOL(skb_flow_dissect_tunnel_info); static enum flow_dissect_ret __skb_flow_dissect_mpls(const struct sk_buff *skb, @@ -436,6 +438,57 @@ __skb_flow_dissect_gre(const struct sk_buff *skb, return FLOW_DISSECT_RET_PROTO_AGAIN; } +/** + * __skb_flow_dissect_batadv() - dissect batman-adv header + * @skb: sk_buff to with the batman-adv header + * @key_control: flow dissectors control key + * @data: raw buffer pointer to the packet, if NULL use skb->data + * @p_proto: pointer used to update the protocol to process next + * @p_nhoff: pointer used to update inner network header offset + * @hlen: packet header length + * @flags: any combination of FLOW_DISSECTOR_F_* + * + * ETH_P_BATMAN packets are tried to be dissected. Only + * &struct batadv_unicast packets are actually processed because they contain an + * inner ethernet header and are usually followed by actual network header. This + * allows the flow dissector to continue processing the packet. + * + * Return: FLOW_DISSECT_RET_PROTO_AGAIN when &struct batadv_unicast was found, + * FLOW_DISSECT_RET_OUT_GOOD when dissector should stop after encapsulation, + * otherwise FLOW_DISSECT_RET_OUT_BAD + */ +static enum flow_dissect_ret +__skb_flow_dissect_batadv(const struct sk_buff *skb, + struct flow_dissector_key_control *key_control, + void *data, __be16 *p_proto, int *p_nhoff, int hlen, + unsigned int flags) +{ + struct { + struct batadv_unicast_packet batadv_unicast; + struct ethhdr eth; + } *hdr, _hdr; + + hdr = __skb_header_pointer(skb, *p_nhoff, sizeof(_hdr), data, hlen, + &_hdr); + if (!hdr) + return FLOW_DISSECT_RET_OUT_BAD; + + if (hdr->batadv_unicast.version != BATADV_COMPAT_VERSION) + return FLOW_DISSECT_RET_OUT_BAD; + + if (hdr->batadv_unicast.packet_type != BATADV_UNICAST) + return FLOW_DISSECT_RET_OUT_BAD; + + *p_proto = hdr->eth.h_proto; + *p_nhoff += sizeof(*hdr); + + key_control->flags |= FLOW_DIS_ENCAPSULATION; + if (flags & FLOW_DISSECTOR_F_STOP_AT_ENCAP) + return FLOW_DISSECT_RET_OUT_GOOD; + + return FLOW_DISSECT_RET_PROTO_AGAIN; +} + static void __skb_flow_dissect_tcp(const struct sk_buff *skb, struct flow_dissector *flow_dissector, @@ -576,9 +629,6 @@ bool __skb_flow_dissect(const struct sk_buff *skb, FLOW_DISSECTOR_KEY_BASIC, target_container); - __skb_flow_dissect_tunnel_info(skb, flow_dissector, - target_container); - if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ETH_ADDRS)) { struct ethhdr *eth = eth_hdr(skb); @@ -817,6 +867,11 @@ proto_again: nhoff, hlen); break; + case htons(ETH_P_BATMAN): + fdret = __skb_flow_dissect_batadv(skb, key_control, data, + &proto, &nhoff, hlen, flags); + break; + default: fdret = FLOW_DISSECT_RET_OUT_BAD; break; diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c index 9834cfa21b21..0a3f88f08727 100644 --- a/net/core/gen_estimator.c +++ b/net/core/gen_estimator.c @@ -159,7 +159,11 @@ int gen_new_estimator(struct gnet_stats_basic_packed *bstats, est->intvl_log = intvl_log; est->cpu_bstats = cpu_bstats; + if (stats_lock) + local_bh_disable(); est_fetch_counters(est, &b); + if (stats_lock) + local_bh_enable(); est->last_bytes = b.bytes; est->last_packets = b.packets; old = rcu_dereference_protected(*rate_est, 1); diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c index 87f28557b329..b2b2323bdc84 100644 --- a/net/core/gen_stats.c +++ b/net/core/gen_stats.c @@ -252,10 +252,10 @@ __gnet_stats_copy_queue_cpu(struct gnet_stats_queue *qstats, } } -static void __gnet_stats_copy_queue(struct gnet_stats_queue *qstats, - const struct gnet_stats_queue __percpu *cpu, - const struct gnet_stats_queue *q, - __u32 qlen) +void __gnet_stats_copy_queue(struct gnet_stats_queue *qstats, + const struct gnet_stats_queue __percpu *cpu, + const struct gnet_stats_queue *q, + __u32 qlen) { if (cpu) { __gnet_stats_copy_queue_cpu(qstats, cpu); @@ -269,6 +269,7 @@ static void __gnet_stats_copy_queue(struct gnet_stats_queue *qstats, qstats->qlen = qlen; } +EXPORT_SYMBOL(__gnet_stats_copy_queue); /** * gnet_stats_copy_queue - copy queue statistics into statistics TLV diff --git a/net/core/link_watch.c b/net/core/link_watch.c index 982861607f88..e38e641e98d5 100644 --- a/net/core/link_watch.c +++ b/net/core/link_watch.c @@ -92,7 +92,7 @@ static bool linkwatch_urgent_event(struct net_device *dev) if (dev->ifindex != dev_get_iflink(dev)) return true; - if (dev->priv_flags & IFF_TEAM_PORT) + if (netif_is_lag_port(dev) || netif_is_lag_master(dev)) return true; return netif_carrier_ok(dev) && qdisc_tx_changing(dev); diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 7f831711b6e0..7b7a14abba28 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -2862,7 +2862,6 @@ static int neigh_stat_seq_open(struct inode *inode, struct file *file) }; static const struct file_operations neigh_stat_seq_fops = { - .owner = THIS_MODULE, .open = neigh_stat_seq_open, .read = seq_read, .llseek = seq_lseek, diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c index 615ccab55f38..e010bb800d7b 100644 --- a/net/core/net-procfs.c +++ b/net/core/net-procfs.c @@ -182,7 +182,6 @@ static int dev_seq_open(struct inode *inode, struct file *file) } static const struct file_operations dev_seq_fops = { - .owner = THIS_MODULE, .open = dev_seq_open, .read = seq_read, .llseek = seq_lseek, @@ -202,7 +201,6 @@ static int softnet_seq_open(struct inode *inode, struct file *file) } static const struct file_operations softnet_seq_fops = { - .owner = THIS_MODULE, .open = softnet_seq_open, .read = seq_read, .llseek = seq_lseek, @@ -306,7 +304,6 @@ static int ptype_seq_open(struct inode *inode, struct file *file) } static const struct file_operations ptype_seq_fops = { - .owner = THIS_MODULE, .open = ptype_seq_open, .read = seq_read, .llseek = seq_lseek, @@ -387,7 +384,6 @@ static int dev_mc_seq_open(struct inode *inode, struct file *file) } static const struct file_operations dev_mc_seq_fops = { - .owner = THIS_MODULE, .open = dev_mc_seq_open, .read = seq_read, .llseek = seq_lseek, diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 799b75268291..60a5ad2c33ee 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -295,10 +295,31 @@ static ssize_t carrier_changes_show(struct device *dev, struct net_device *netdev = to_net_dev(dev); return sprintf(buf, fmt_dec, - atomic_read(&netdev->carrier_changes)); + atomic_read(&netdev->carrier_up_count) + + atomic_read(&netdev->carrier_down_count)); } static DEVICE_ATTR_RO(carrier_changes); +static ssize_t carrier_up_count_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct net_device *netdev = to_net_dev(dev); + + return sprintf(buf, fmt_dec, atomic_read(&netdev->carrier_up_count)); +} +static DEVICE_ATTR_RO(carrier_up_count); + +static ssize_t carrier_down_count_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct net_device *netdev = to_net_dev(dev); + + return sprintf(buf, fmt_dec, atomic_read(&netdev->carrier_down_count)); +} +static DEVICE_ATTR_RO(carrier_down_count); + /* read-write attributes */ static int change_mtu(struct net_device *dev, unsigned long new_mtu) @@ -325,29 +346,6 @@ static ssize_t flags_store(struct device *dev, struct device_attribute *attr, } NETDEVICE_SHOW_RW(flags, fmt_hex); -static int change_tx_queue_len(struct net_device *dev, unsigned long new_len) -{ - unsigned int orig_len = dev->tx_queue_len; - int res; - - if (new_len != (unsigned int)new_len) - return -ERANGE; - - if (new_len != orig_len) { - dev->tx_queue_len = new_len; - res = call_netdevice_notifiers(NETDEV_CHANGE_TX_QUEUE_LEN, dev); - res = notifier_to_errno(res); - if (res) { - netdev_err(dev, - "refused to change device tx_queue_len\n"); - dev->tx_queue_len = orig_len; - return -EFAULT; - } - } - - return 0; -} - static ssize_t tx_queue_len_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) @@ -355,7 +353,7 @@ static ssize_t tx_queue_len_store(struct device *dev, if (!capable(CAP_NET_ADMIN)) return -EPERM; - return netdev_store(dev, attr, buf, len, change_tx_queue_len); + return netdev_store(dev, attr, buf, len, dev_change_tx_queue_len); } NETDEVICE_SHOW_RW(tx_queue_len, fmt_dec); @@ -547,6 +545,8 @@ static struct attribute *net_class_attrs[] __ro_after_init = { &dev_attr_phys_port_name.attr, &dev_attr_phys_switch_id.attr, &dev_attr_proto_down.attr, + &dev_attr_carrier_up_count.attr, + &dev_attr_carrier_down_count.attr, NULL, }; ATTRIBUTE_GROUPS(net_class); @@ -961,7 +961,7 @@ net_rx_queue_update_kobjects(struct net_device *dev, int old_num, int new_num) while (--i >= new_num) { struct kobject *kobj = &dev->_rx[i].kobj; - if (!atomic_read(&dev_net(dev)->count)) + if (!refcount_read(&dev_net(dev)->count)) kobj->uevent_suppress = 1; if (dev->sysfs_rx_queue_group) sysfs_remove_group(kobj, dev->sysfs_rx_queue_group); @@ -1367,7 +1367,7 @@ netdev_queue_update_kobjects(struct net_device *dev, int old_num, int new_num) while (--i >= new_num) { struct netdev_queue *queue = dev->_tx + i; - if (!atomic_read(&dev_net(dev)->count)) + if (!refcount_read(&dev_net(dev)->count)) queue->kobj.uevent_suppress = 1; #ifdef CONFIG_BQL sysfs_remove_group(&queue->kobj, &dql_group); @@ -1558,7 +1558,7 @@ void netdev_unregister_kobject(struct net_device *ndev) { struct device *dev = &ndev->dev; - if (!atomic_read(&dev_net(ndev)->count)) + if (!refcount_read(&dev_net(ndev)->count)) dev_set_uevent_suppress(dev, 1); kobject_get(&dev->kobj); diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 60a71be75aea..3cad5f51afd3 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -35,7 +35,7 @@ LIST_HEAD(net_namespace_list); EXPORT_SYMBOL_GPL(net_namespace_list); struct net init_net = { - .count = ATOMIC_INIT(1), + .count = REFCOUNT_INIT(1), .dev_base_head = LIST_HEAD_INIT(init_net.dev_base_head), }; EXPORT_SYMBOL(init_net); @@ -221,17 +221,26 @@ static void rtnl_net_notifyid(struct net *net, int cmd, int id); */ int peernet2id_alloc(struct net *net, struct net *peer) { - bool alloc; + bool alloc = false, alive = false; int id; - if (atomic_read(&net->count) == 0) + if (refcount_read(&net->count) == 0) return NETNSA_NSID_NOT_ASSIGNED; spin_lock_bh(&net->nsid_lock); - alloc = atomic_read(&peer->count) == 0 ? false : true; + /* + * When peer is obtained from RCU lists, we may race with + * its cleanup. Check whether it's alive, and this guarantees + * we never hash a peer back to net->netns_ids, after it has + * just been idr_remove()'d from there in cleanup_net(). + */ + if (maybe_get_net(peer)) + alive = alloc = true; id = __peernet2id_alloc(net, peer, &alloc); spin_unlock_bh(&net->nsid_lock); if (alloc && id >= 0) rtnl_net_notifyid(net, RTM_NEWNSID, id); + if (alive) + put_net(peer); return id; } EXPORT_SYMBOL_GPL(peernet2id_alloc); @@ -264,11 +273,9 @@ struct net *get_net_ns_by_id(struct net *net, int id) return NULL; rcu_read_lock(); - spin_lock_bh(&net->nsid_lock); peer = idr_find(&net->netns_ids, id); if (peer) peer = maybe_get_net(peer); - spin_unlock_bh(&net->nsid_lock); rcu_read_unlock(); return peer; @@ -284,7 +291,7 @@ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns) int error = 0; LIST_HEAD(net_exit_list); - atomic_set(&net->count, 1); + refcount_set(&net->count, 1); refcount_set(&net->passive, 1); net->dev_base_seq = 1; net->user_ns = user_ns; @@ -432,13 +439,40 @@ struct net *copy_net_ns(unsigned long flags, return net; } +static void unhash_nsid(struct net *net, struct net *last) +{ + struct net *tmp; + /* This function is only called from cleanup_net() work, + * and this work is the only process, that may delete + * a net from net_namespace_list. So, when the below + * is executing, the list may only grow. Thus, we do not + * use for_each_net_rcu() or rtnl_lock(). + */ + for_each_net(tmp) { + int id; + + spin_lock_bh(&tmp->nsid_lock); + id = __peernet2id(tmp, net); + if (id >= 0) + idr_remove(&tmp->netns_ids, id); + spin_unlock_bh(&tmp->nsid_lock); + if (id >= 0) + rtnl_net_notifyid(tmp, RTM_DELNSID, id); + if (tmp == last) + break; + } + spin_lock_bh(&net->nsid_lock); + idr_destroy(&net->netns_ids); + spin_unlock_bh(&net->nsid_lock); +} + static DEFINE_SPINLOCK(cleanup_list_lock); static LIST_HEAD(cleanup_list); /* Must hold cleanup_list_lock to touch */ static void cleanup_net(struct work_struct *work) { const struct pernet_operations *ops; - struct net *net, *tmp; + struct net *net, *tmp, *last; struct list_head net_kill_list; LIST_HEAD(net_exit_list); @@ -451,26 +485,25 @@ static void cleanup_net(struct work_struct *work) /* Don't let anyone else find us. */ rtnl_lock(); - list_for_each_entry(net, &net_kill_list, cleanup_list) { + list_for_each_entry(net, &net_kill_list, cleanup_list) list_del_rcu(&net->list); - list_add_tail(&net->exit_list, &net_exit_list); - for_each_net(tmp) { - int id; - - spin_lock_bh(&tmp->nsid_lock); - id = __peernet2id(tmp, net); - if (id >= 0) - idr_remove(&tmp->netns_ids, id); - spin_unlock_bh(&tmp->nsid_lock); - if (id >= 0) - rtnl_net_notifyid(tmp, RTM_DELNSID, id); - } - spin_lock_bh(&net->nsid_lock); - idr_destroy(&net->netns_ids); - spin_unlock_bh(&net->nsid_lock); + /* Cache last net. After we unlock rtnl, no one new net + * added to net_namespace_list can assign nsid pointer + * to a net from net_kill_list (see peernet2id_alloc()). + * So, we skip them in unhash_nsid(). + * + * Note, that unhash_nsid() does not delete nsid links + * between net_kill_list's nets, as they've already + * deleted from net_namespace_list. But, this would be + * useless anyway, as netns_ids are destroyed there. + */ + last = list_last_entry(&net_namespace_list, struct net, list); + rtnl_unlock(); + list_for_each_entry(net, &net_kill_list, cleanup_list) { + unhash_nsid(net, last); + list_add_tail(&net->exit_list, &net_exit_list); } - rtnl_unlock(); /* * Another CPU might be rcu-iterating the list, wait for it. diff --git a/net/core/pktgen.c b/net/core/pktgen.c index f95a15086225..b8ab5c829511 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -184,25 +184,44 @@ #define func_enter() pr_debug("entering %s\n", __func__); +#define PKT_FLAGS \ + pf(IPV6) /* Interface in IPV6 Mode */ \ + pf(IPSRC_RND) /* IP-Src Random */ \ + pf(IPDST_RND) /* IP-Dst Random */ \ + pf(TXSIZE_RND) /* Transmit size is random */ \ + pf(UDPSRC_RND) /* UDP-Src Random */ \ + pf(UDPDST_RND) /* UDP-Dst Random */ \ + pf(UDPCSUM) /* Include UDP checksum */ \ + pf(NO_TIMESTAMP) /* Don't timestamp packets (default TS) */ \ + pf(MPLS_RND) /* Random MPLS labels */ \ + pf(QUEUE_MAP_RND) /* queue map Random */ \ + pf(QUEUE_MAP_CPU) /* queue map mirrors smp_processor_id() */ \ + pf(FLOW_SEQ) /* Sequential flows */ \ + pf(IPSEC) /* ipsec on for flows */ \ + pf(MACSRC_RND) /* MAC-Src Random */ \ + pf(MACDST_RND) /* MAC-Dst Random */ \ + pf(VID_RND) /* Random VLAN ID */ \ + pf(SVID_RND) /* Random SVLAN ID */ \ + pf(NODE) /* Node memory alloc*/ \ + +#define pf(flag) flag##_SHIFT, +enum pkt_flags { + PKT_FLAGS +}; +#undef pf + /* Device flag bits */ -#define F_IPSRC_RND (1<<0) /* IP-Src Random */ -#define F_IPDST_RND (1<<1) /* IP-Dst Random */ -#define F_UDPSRC_RND (1<<2) /* UDP-Src Random */ -#define F_UDPDST_RND (1<<3) /* UDP-Dst Random */ -#define F_MACSRC_RND (1<<4) /* MAC-Src Random */ -#define F_MACDST_RND (1<<5) /* MAC-Dst Random */ -#define F_TXSIZE_RND (1<<6) /* Transmit size is random */ -#define F_IPV6 (1<<7) /* Interface in IPV6 Mode */ -#define F_MPLS_RND (1<<8) /* Random MPLS labels */ -#define F_VID_RND (1<<9) /* Random VLAN ID */ -#define F_SVID_RND (1<<10) /* Random SVLAN ID */ -#define F_FLOW_SEQ (1<<11) /* Sequential flows */ -#define F_IPSEC_ON (1<<12) /* ipsec on for flows */ -#define F_QUEUE_MAP_RND (1<<13) /* queue map Random */ -#define F_QUEUE_MAP_CPU (1<<14) /* queue map mirrors smp_processor_id() */ -#define F_NODE (1<<15) /* Node memory alloc*/ -#define F_UDPCSUM (1<<16) /* Include UDP checksum */ -#define F_NO_TIMESTAMP (1<<17) /* Don't timestamp packets (default TS) */ +#define pf(flag) static const __u32 F_##flag = (1<<flag##_SHIFT); +PKT_FLAGS +#undef pf + +#define pf(flag) __stringify(flag), +static char *pkt_flag_names[] = { + PKT_FLAGS +}; +#undef pf + +#define NR_PKT_FLAGS ARRAY_SIZE(pkt_flag_names) /* Thread control flag bits */ #define T_STOP (1<<0) /* Stop run */ @@ -399,7 +418,7 @@ struct pktgen_dev { __u8 ipsmode; /* IPSEC mode (config) */ __u8 ipsproto; /* IPSEC type (config) */ __u32 spi; - struct dst_entry dst; + struct xfrm_dst xdst; struct dst_ops dstops; #endif char result[512]; @@ -523,7 +542,6 @@ static int pgctrl_open(struct inode *inode, struct file *file) } static const struct file_operations pktgen_fops = { - .owner = THIS_MODULE, .open = pgctrl_open, .read = seq_read, .llseek = seq_lseek, @@ -535,6 +553,7 @@ static int pktgen_if_show(struct seq_file *seq, void *v) { const struct pktgen_dev *pkt_dev = seq->private; ktime_t stopped; + unsigned int i; u64 idle; seq_printf(seq, @@ -596,7 +615,6 @@ static int pktgen_if_show(struct seq_file *seq, void *v) pkt_dev->src_mac_count, pkt_dev->dst_mac_count); if (pkt_dev->nr_labels) { - unsigned int i; seq_puts(seq, " mpls: "); for (i = 0; i < pkt_dev->nr_labels; i++) seq_printf(seq, "%08x%s", ntohl(pkt_dev->labels[i]), @@ -632,68 +650,21 @@ static int pktgen_if_show(struct seq_file *seq, void *v) seq_puts(seq, " Flags: "); - if (pkt_dev->flags & F_IPV6) - seq_puts(seq, "IPV6 "); - - if (pkt_dev->flags & F_IPSRC_RND) - seq_puts(seq, "IPSRC_RND "); - - if (pkt_dev->flags & F_IPDST_RND) - seq_puts(seq, "IPDST_RND "); - - if (pkt_dev->flags & F_TXSIZE_RND) - seq_puts(seq, "TXSIZE_RND "); - - if (pkt_dev->flags & F_UDPSRC_RND) - seq_puts(seq, "UDPSRC_RND "); - - if (pkt_dev->flags & F_UDPDST_RND) - seq_puts(seq, "UDPDST_RND "); - - if (pkt_dev->flags & F_UDPCSUM) - seq_puts(seq, "UDPCSUM "); - - if (pkt_dev->flags & F_NO_TIMESTAMP) - seq_puts(seq, "NO_TIMESTAMP "); - - if (pkt_dev->flags & F_MPLS_RND) - seq_puts(seq, "MPLS_RND "); - - if (pkt_dev->flags & F_QUEUE_MAP_RND) - seq_puts(seq, "QUEUE_MAP_RND "); + for (i = 0; i < NR_PKT_FLAGS; i++) { + if (i == F_FLOW_SEQ) + if (!pkt_dev->cflows) + continue; - if (pkt_dev->flags & F_QUEUE_MAP_CPU) - seq_puts(seq, "QUEUE_MAP_CPU "); - - if (pkt_dev->cflows) { - if (pkt_dev->flags & F_FLOW_SEQ) - seq_puts(seq, "FLOW_SEQ "); /*in sequence flows*/ - else - seq_puts(seq, "FLOW_RND "); - } + if (pkt_dev->flags & (1 << i)) + seq_printf(seq, "%s ", pkt_flag_names[i]); + else if (i == F_FLOW_SEQ) + seq_puts(seq, "FLOW_RND "); #ifdef CONFIG_XFRM - if (pkt_dev->flags & F_IPSEC_ON) { - seq_puts(seq, "IPSEC "); - if (pkt_dev->spi) + if (i == F_IPSEC && pkt_dev->spi) seq_printf(seq, "spi:%u", pkt_dev->spi); - } #endif - - if (pkt_dev->flags & F_MACSRC_RND) - seq_puts(seq, "MACSRC_RND "); - - if (pkt_dev->flags & F_MACDST_RND) - seq_puts(seq, "MACDST_RND "); - - if (pkt_dev->flags & F_VID_RND) - seq_puts(seq, "VID_RND "); - - if (pkt_dev->flags & F_SVID_RND) - seq_puts(seq, "SVID_RND "); - - if (pkt_dev->flags & F_NODE) - seq_puts(seq, "NODE_ALLOC "); + } seq_puts(seq, "\n"); @@ -859,6 +830,35 @@ static ssize_t get_labels(const char __user *buffer, struct pktgen_dev *pkt_dev) return i; } +static __u32 pktgen_read_flag(const char *f, bool *disable) +{ + __u32 i; + + if (f[0] == '!') { + *disable = true; + f++; + } + + for (i = 0; i < NR_PKT_FLAGS; i++) { + if (!IS_ENABLED(CONFIG_XFRM) && i == IPSEC_SHIFT) + continue; + + /* allow only disabling ipv6 flag */ + if (!*disable && i == IPV6_SHIFT) + continue; + + if (strcmp(f, pkt_flag_names[i]) == 0) + return 1 << i; + } + + if (strcmp(f, "FLOW_RND") == 0) { + *disable = !*disable; + return F_FLOW_SEQ; + } + + return 0; +} + static ssize_t pktgen_if_write(struct file *file, const char __user * user_buffer, size_t count, loff_t * offset) @@ -1216,7 +1216,10 @@ static ssize_t pktgen_if_write(struct file *file, return count; } if (!strcmp(name, "flag")) { + __u32 flag; char f[32]; + bool disable = false; + memset(f, 0, 32); len = strn_len(&user_buffer[i], sizeof(f) - 1); if (len < 0) @@ -1225,107 +1228,15 @@ static ssize_t pktgen_if_write(struct file *file, if (copy_from_user(f, &user_buffer[i], len)) return -EFAULT; i += len; - if (strcmp(f, "IPSRC_RND") == 0) - pkt_dev->flags |= F_IPSRC_RND; - - else if (strcmp(f, "!IPSRC_RND") == 0) - pkt_dev->flags &= ~F_IPSRC_RND; - - else if (strcmp(f, "TXSIZE_RND") == 0) - pkt_dev->flags |= F_TXSIZE_RND; - - else if (strcmp(f, "!TXSIZE_RND") == 0) - pkt_dev->flags &= ~F_TXSIZE_RND; - - else if (strcmp(f, "IPDST_RND") == 0) - pkt_dev->flags |= F_IPDST_RND; - - else if (strcmp(f, "!IPDST_RND") == 0) - pkt_dev->flags &= ~F_IPDST_RND; - - else if (strcmp(f, "UDPSRC_RND") == 0) - pkt_dev->flags |= F_UDPSRC_RND; - - else if (strcmp(f, "!UDPSRC_RND") == 0) - pkt_dev->flags &= ~F_UDPSRC_RND; - - else if (strcmp(f, "UDPDST_RND") == 0) - pkt_dev->flags |= F_UDPDST_RND; - - else if (strcmp(f, "!UDPDST_RND") == 0) - pkt_dev->flags &= ~F_UDPDST_RND; - - else if (strcmp(f, "MACSRC_RND") == 0) - pkt_dev->flags |= F_MACSRC_RND; - - else if (strcmp(f, "!MACSRC_RND") == 0) - pkt_dev->flags &= ~F_MACSRC_RND; - - else if (strcmp(f, "MACDST_RND") == 0) - pkt_dev->flags |= F_MACDST_RND; - - else if (strcmp(f, "!MACDST_RND") == 0) - pkt_dev->flags &= ~F_MACDST_RND; - - else if (strcmp(f, "MPLS_RND") == 0) - pkt_dev->flags |= F_MPLS_RND; - - else if (strcmp(f, "!MPLS_RND") == 0) - pkt_dev->flags &= ~F_MPLS_RND; - else if (strcmp(f, "VID_RND") == 0) - pkt_dev->flags |= F_VID_RND; + flag = pktgen_read_flag(f, &disable); - else if (strcmp(f, "!VID_RND") == 0) - pkt_dev->flags &= ~F_VID_RND; - - else if (strcmp(f, "SVID_RND") == 0) - pkt_dev->flags |= F_SVID_RND; - - else if (strcmp(f, "!SVID_RND") == 0) - pkt_dev->flags &= ~F_SVID_RND; - - else if (strcmp(f, "FLOW_SEQ") == 0) - pkt_dev->flags |= F_FLOW_SEQ; - - else if (strcmp(f, "QUEUE_MAP_RND") == 0) - pkt_dev->flags |= F_QUEUE_MAP_RND; - - else if (strcmp(f, "!QUEUE_MAP_RND") == 0) - pkt_dev->flags &= ~F_QUEUE_MAP_RND; - - else if (strcmp(f, "QUEUE_MAP_CPU") == 0) - pkt_dev->flags |= F_QUEUE_MAP_CPU; - - else if (strcmp(f, "!QUEUE_MAP_CPU") == 0) - pkt_dev->flags &= ~F_QUEUE_MAP_CPU; -#ifdef CONFIG_XFRM - else if (strcmp(f, "IPSEC") == 0) - pkt_dev->flags |= F_IPSEC_ON; -#endif - - else if (strcmp(f, "!IPV6") == 0) - pkt_dev->flags &= ~F_IPV6; - - else if (strcmp(f, "NODE_ALLOC") == 0) - pkt_dev->flags |= F_NODE; - - else if (strcmp(f, "!NODE_ALLOC") == 0) - pkt_dev->flags &= ~F_NODE; - - else if (strcmp(f, "UDPCSUM") == 0) - pkt_dev->flags |= F_UDPCSUM; - - else if (strcmp(f, "!UDPCSUM") == 0) - pkt_dev->flags &= ~F_UDPCSUM; - - else if (strcmp(f, "NO_TIMESTAMP") == 0) - pkt_dev->flags |= F_NO_TIMESTAMP; - - else if (strcmp(f, "!NO_TIMESTAMP") == 0) - pkt_dev->flags &= ~F_NO_TIMESTAMP; - - else { + if (flag) { + if (disable) + pkt_dev->flags &= ~flag; + else + pkt_dev->flags |= flag; + } else { sprintf(pg_result, "Flag -:%s:- unknown\nAvailable flags, (prepend ! to un-set flag):\n%s", f, @@ -1804,7 +1715,6 @@ static int pktgen_if_open(struct inode *inode, struct file *file) } static const struct file_operations pktgen_if_fops = { - .owner = THIS_MODULE, .open = pktgen_if_open, .read = seq_read, .llseek = seq_lseek, @@ -1942,7 +1852,6 @@ static int pktgen_thread_open(struct inode *inode, struct file *file) } static const struct file_operations pktgen_thread_fops = { - .owner = THIS_MODULE, .open = pktgen_thread_open, .read = seq_read, .llseek = seq_lseek, @@ -2544,7 +2453,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev) pkt_dev->flows[flow].cur_daddr = pkt_dev->cur_daddr; #ifdef CONFIG_XFRM - if (pkt_dev->flags & F_IPSEC_ON) + if (pkt_dev->flags & F_IPSEC) get_ipsec_sa(pkt_dev, flow); #endif pkt_dev->nflows++; @@ -2609,7 +2518,7 @@ static int pktgen_output_ipsec(struct sk_buff *skb, struct pktgen_dev *pkt_dev) * supports both transport/tunnel mode + ESP/AH type. */ if ((x->props.mode == XFRM_MODE_TUNNEL) && (pkt_dev->spi != 0)) - skb->_skb_refdst = (unsigned long)&pkt_dev->dst | SKB_DST_NOREF; + skb->_skb_refdst = (unsigned long)&pkt_dev->xdst.u.dst | SKB_DST_NOREF; rcu_read_lock_bh(); err = x->outer_mode->output(x, skb); @@ -2649,7 +2558,7 @@ static void free_SAs(struct pktgen_dev *pkt_dev) static int process_ipsec(struct pktgen_dev *pkt_dev, struct sk_buff *skb, __be16 protocol) { - if (pkt_dev->flags & F_IPSEC_ON) { + if (pkt_dev->flags & F_IPSEC) { struct xfrm_state *x = pkt_dev->flows[pkt_dev->curfl].x; int nhead = 0; if (x) { @@ -3742,10 +3651,10 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname) * performance under such circumstance. */ pkt_dev->dstops.family = AF_INET; - pkt_dev->dst.dev = pkt_dev->odev; - dst_init_metrics(&pkt_dev->dst, pktgen_dst_metrics, false); - pkt_dev->dst.child = &pkt_dev->dst; - pkt_dev->dst.ops = &pkt_dev->dstops; + pkt_dev->xdst.u.dst.dev = pkt_dev->odev; + dst_init_metrics(&pkt_dev->xdst.u.dst, pktgen_dst_metrics, false); + pkt_dev->xdst.child = &pkt_dev->xdst.u.dst; + pkt_dev->xdst.u.dst.ops = &pkt_dev->dstops; #endif return add_dev_to_thread(t, pkt_dev); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 778d7f03404a..204297dffd2a 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -62,7 +62,9 @@ struct rtnl_link { rtnl_doit_func doit; rtnl_dumpit_func dumpit; + struct module *owner; unsigned int flags; + struct rcu_head rcu; }; static DEFINE_MUTEX(rtnl_mutex); @@ -127,8 +129,7 @@ bool lockdep_rtnl_is_held(void) EXPORT_SYMBOL(lockdep_rtnl_is_held); #endif /* #ifdef CONFIG_PROVE_LOCKING */ -static struct rtnl_link __rcu *rtnl_msg_handlers[RTNL_FAMILY_MAX + 1]; -static refcount_t rtnl_msg_handlers_ref[RTNL_FAMILY_MAX + 1]; +static struct rtnl_link *__rcu *rtnl_msg_handlers[RTNL_FAMILY_MAX + 1]; static inline int rtm_msgindex(int msgtype) { @@ -144,72 +145,127 @@ static inline int rtm_msgindex(int msgtype) return msgindex; } -/** - * __rtnl_register - Register a rtnetlink message type - * @protocol: Protocol family or PF_UNSPEC - * @msgtype: rtnetlink message type - * @doit: Function pointer called for each request message - * @dumpit: Function pointer called for each dump request (NLM_F_DUMP) message - * @flags: rtnl_link_flags to modifiy behaviour of doit/dumpit functions - * - * Registers the specified function pointers (at least one of them has - * to be non-NULL) to be called whenever a request message for the - * specified protocol family and message type is received. - * - * The special protocol family PF_UNSPEC may be used to define fallback - * function pointers for the case when no entry for the specific protocol - * family exists. - * - * Returns 0 on success or a negative error code. - */ -int __rtnl_register(int protocol, int msgtype, - rtnl_doit_func doit, rtnl_dumpit_func dumpit, - unsigned int flags) +static struct rtnl_link *rtnl_get_link(int protocol, int msgtype) +{ + struct rtnl_link **tab; + + if (protocol >= ARRAY_SIZE(rtnl_msg_handlers)) + protocol = PF_UNSPEC; + + tab = rcu_dereference_rtnl(rtnl_msg_handlers[protocol]); + if (!tab) + tab = rcu_dereference_rtnl(rtnl_msg_handlers[PF_UNSPEC]); + + return tab[msgtype]; +} + +static int rtnl_register_internal(struct module *owner, + int protocol, int msgtype, + rtnl_doit_func doit, rtnl_dumpit_func dumpit, + unsigned int flags) { - struct rtnl_link *tab; + struct rtnl_link *link, *old; + struct rtnl_link __rcu **tab; int msgindex; + int ret = -ENOBUFS; BUG_ON(protocol < 0 || protocol > RTNL_FAMILY_MAX); msgindex = rtm_msgindex(msgtype); - tab = rcu_dereference_raw(rtnl_msg_handlers[protocol]); + rtnl_lock(); + tab = rtnl_msg_handlers[protocol]; if (tab == NULL) { - tab = kcalloc(RTM_NR_MSGTYPES, sizeof(*tab), GFP_KERNEL); - if (tab == NULL) - return -ENOBUFS; + tab = kcalloc(RTM_NR_MSGTYPES, sizeof(void *), GFP_KERNEL); + if (!tab) + goto unlock; + /* ensures we see the 0 stores */ rcu_assign_pointer(rtnl_msg_handlers[protocol], tab); } + old = rtnl_dereference(tab[msgindex]); + if (old) { + link = kmemdup(old, sizeof(*old), GFP_KERNEL); + if (!link) + goto unlock; + } else { + link = kzalloc(sizeof(*link), GFP_KERNEL); + if (!link) + goto unlock; + } + + WARN_ON(link->owner && link->owner != owner); + link->owner = owner; + + WARN_ON(doit && link->doit && link->doit != doit); if (doit) - tab[msgindex].doit = doit; + link->doit = doit; + WARN_ON(dumpit && link->dumpit && link->dumpit != dumpit); if (dumpit) - tab[msgindex].dumpit = dumpit; - tab[msgindex].flags |= flags; + link->dumpit = dumpit; - return 0; + link->flags |= flags; + + /* publish protocol:msgtype */ + rcu_assign_pointer(tab[msgindex], link); + ret = 0; + if (old) + kfree_rcu(old, rcu); +unlock: + rtnl_unlock(); + return ret; } -EXPORT_SYMBOL_GPL(__rtnl_register); + +/** + * rtnl_register_module - Register a rtnetlink message type + * + * @owner: module registering the hook (THIS_MODULE) + * @protocol: Protocol family or PF_UNSPEC + * @msgtype: rtnetlink message type + * @doit: Function pointer called for each request message + * @dumpit: Function pointer called for each dump request (NLM_F_DUMP) message + * @flags: rtnl_link_flags to modifiy behaviour of doit/dumpit functions + * + * Like rtnl_register, but for use by removable modules. + */ +int rtnl_register_module(struct module *owner, + int protocol, int msgtype, + rtnl_doit_func doit, rtnl_dumpit_func dumpit, + unsigned int flags) +{ + return rtnl_register_internal(owner, protocol, msgtype, + doit, dumpit, flags); +} +EXPORT_SYMBOL_GPL(rtnl_register_module); /** * rtnl_register - Register a rtnetlink message type + * @protocol: Protocol family or PF_UNSPEC + * @msgtype: rtnetlink message type + * @doit: Function pointer called for each request message + * @dumpit: Function pointer called for each dump request (NLM_F_DUMP) message + * @flags: rtnl_link_flags to modifiy behaviour of doit/dumpit functions + * + * Registers the specified function pointers (at least one of them has + * to be non-NULL) to be called whenever a request message for the + * specified protocol family and message type is received. * - * Identical to __rtnl_register() but panics on failure. This is useful - * as failure of this function is very unlikely, it can only happen due - * to lack of memory when allocating the chain to store all message - * handlers for a protocol. Meant for use in init functions where lack - * of memory implies no sense in continuing. + * The special protocol family PF_UNSPEC may be used to define fallback + * function pointers for the case when no entry for the specific protocol + * family exists. */ void rtnl_register(int protocol, int msgtype, rtnl_doit_func doit, rtnl_dumpit_func dumpit, unsigned int flags) { - if (__rtnl_register(protocol, msgtype, doit, dumpit, flags) < 0) - panic("Unable to register rtnetlink message handler, " - "protocol = %d, message type = %d\n", - protocol, msgtype); + int err; + + err = rtnl_register_internal(NULL, protocol, msgtype, doit, dumpit, + flags); + if (err) + pr_err("Unable to register rtnetlink message handler, " + "protocol = %d, message type = %d\n", protocol, msgtype); } -EXPORT_SYMBOL_GPL(rtnl_register); /** * rtnl_unregister - Unregister a rtnetlink message type @@ -220,24 +276,25 @@ EXPORT_SYMBOL_GPL(rtnl_register); */ int rtnl_unregister(int protocol, int msgtype) { - struct rtnl_link *handlers; + struct rtnl_link **tab, *link; int msgindex; BUG_ON(protocol < 0 || protocol > RTNL_FAMILY_MAX); msgindex = rtm_msgindex(msgtype); rtnl_lock(); - handlers = rtnl_dereference(rtnl_msg_handlers[protocol]); - if (!handlers) { + tab = rtnl_dereference(rtnl_msg_handlers[protocol]); + if (!tab) { rtnl_unlock(); return -ENOENT; } - handlers[msgindex].doit = NULL; - handlers[msgindex].dumpit = NULL; - handlers[msgindex].flags = 0; + link = tab[msgindex]; + rcu_assign_pointer(tab[msgindex], NULL); rtnl_unlock(); + kfree_rcu(link, rcu); + return 0; } EXPORT_SYMBOL_GPL(rtnl_unregister); @@ -251,20 +308,27 @@ EXPORT_SYMBOL_GPL(rtnl_unregister); */ void rtnl_unregister_all(int protocol) { - struct rtnl_link *handlers; + struct rtnl_link **tab, *link; + int msgindex; BUG_ON(protocol < 0 || protocol > RTNL_FAMILY_MAX); rtnl_lock(); - handlers = rtnl_dereference(rtnl_msg_handlers[protocol]); + tab = rtnl_msg_handlers[protocol]; RCU_INIT_POINTER(rtnl_msg_handlers[protocol], NULL); + for (msgindex = 0; msgindex < RTM_NR_MSGTYPES; msgindex++) { + link = tab[msgindex]; + if (!link) + continue; + + rcu_assign_pointer(tab[msgindex], NULL); + kfree_rcu(link, rcu); + } rtnl_unlock(); synchronize_net(); - while (refcount_read(&rtnl_msg_handlers_ref[protocol]) > 1) - schedule(); - kfree(handlers); + kfree(tab); } EXPORT_SYMBOL_GPL(rtnl_unregister_all); @@ -840,6 +904,10 @@ static inline int rtnl_vfinfo_size(const struct net_device *dev, nla_total_size_64bit(sizeof(__u64)) + /* IFLA_VF_STATS_MULTICAST */ nla_total_size_64bit(sizeof(__u64)) + + /* IFLA_VF_STATS_RX_DROPPED */ + nla_total_size_64bit(sizeof(__u64)) + + /* IFLA_VF_STATS_TX_DROPPED */ + nla_total_size_64bit(sizeof(__u64)) + nla_total_size(sizeof(struct ifla_vf_trust))); return size; } else @@ -920,8 +988,11 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev, + rtnl_xdp_size() /* IFLA_XDP */ + nla_total_size(4) /* IFLA_EVENT */ + nla_total_size(4) /* IFLA_NEW_NETNSID */ + + nla_total_size(4) /* IFLA_NEW_IFINDEX */ + nla_total_size(1) /* IFLA_PROTO_DOWN */ + nla_total_size(4) /* IFLA_IF_NETNSID */ + + nla_total_size(4) /* IFLA_CARRIER_UP_COUNT */ + + nla_total_size(4) /* IFLA_CARRIER_DOWN_COUNT */ + 0; } @@ -1194,7 +1265,11 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb, nla_put_u64_64bit(skb, IFLA_VF_STATS_BROADCAST, vf_stats.broadcast, IFLA_VF_STATS_PAD) || nla_put_u64_64bit(skb, IFLA_VF_STATS_MULTICAST, - vf_stats.multicast, IFLA_VF_STATS_PAD)) { + vf_stats.multicast, IFLA_VF_STATS_PAD) || + nla_put_u64_64bit(skb, IFLA_VF_STATS_RX_DROPPED, + vf_stats.rx_dropped, IFLA_VF_STATS_PAD) || + nla_put_u64_64bit(skb, IFLA_VF_STATS_TX_DROPPED, + vf_stats.tx_dropped, IFLA_VF_STATS_PAD)) { nla_nest_cancel(skb, vfstats); goto nla_put_vf_failure; } @@ -1261,6 +1336,7 @@ static u8 rtnl_xdp_attached_mode(struct net_device *dev, u32 *prog_id) { const struct net_device_ops *ops = dev->netdev_ops; const struct bpf_prog *generic_xdp_prog; + struct netdev_bpf xdp; ASSERT_RTNL(); @@ -1273,7 +1349,10 @@ static u8 rtnl_xdp_attached_mode(struct net_device *dev, u32 *prog_id) if (!ops->ndo_bpf) return XDP_ATTACHED_NONE; - return __dev_xdp_attached(dev, ops->ndo_bpf, prog_id); + __dev_xdp_query(dev, ops->ndo_bpf, &xdp); + *prog_id = xdp.prog_id; + + return xdp.prog_attached; } static int rtnl_xdp_fill(struct sk_buff *skb, struct net_device *dev) @@ -1433,7 +1512,8 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, struct net *src_net, int type, u32 pid, u32 seq, u32 change, unsigned int flags, u32 ext_filter_mask, - u32 event, int *new_nsid, int tgt_netnsid) + u32 event, int *new_nsid, int new_ifindex, + int tgt_netnsid) { struct ifinfomsg *ifm; struct nlmsghdr *nlh; @@ -1475,8 +1555,13 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, nla_put_string(skb, IFLA_QDISC, dev->qdisc->ops->id)) || nla_put_ifalias(skb, dev) || nla_put_u32(skb, IFLA_CARRIER_CHANGES, - atomic_read(&dev->carrier_changes)) || - nla_put_u8(skb, IFLA_PROTO_DOWN, dev->proto_down)) + atomic_read(&dev->carrier_up_count) + + atomic_read(&dev->carrier_down_count)) || + nla_put_u8(skb, IFLA_PROTO_DOWN, dev->proto_down) || + nla_put_u32(skb, IFLA_CARRIER_UP_COUNT, + atomic_read(&dev->carrier_up_count)) || + nla_put_u32(skb, IFLA_CARRIER_DOWN_COUNT, + atomic_read(&dev->carrier_down_count))) goto nla_put_failure; if (event != IFLA_EVENT_NONE) { @@ -1525,6 +1610,10 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, if (new_nsid && nla_put_s32(skb, IFLA_NEW_NETNSID, *new_nsid) < 0) goto nla_put_failure; + if (new_ifindex && + nla_put_s32(skb, IFLA_NEW_IFINDEX, new_ifindex) < 0) + goto nla_put_failure; + rcu_read_lock(); if (rtnl_fill_link_af(skb, dev, ext_filter_mask)) @@ -1569,6 +1658,8 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = { [IFLA_PROMISCUITY] = { .type = NLA_U32 }, [IFLA_NUM_TX_QUEUES] = { .type = NLA_U32 }, [IFLA_NUM_RX_QUEUES] = { .type = NLA_U32 }, + [IFLA_GSO_MAX_SEGS] = { .type = NLA_U32 }, + [IFLA_GSO_MAX_SIZE] = { .type = NLA_U32 }, [IFLA_PHYS_PORT_ID] = { .type = NLA_BINARY, .len = MAX_PHYS_ITEM_ID_LEN }, [IFLA_CARRIER_CHANGES] = { .type = NLA_U32 }, /* ignored */ [IFLA_PHYS_SWITCH_ID] = { .type = NLA_BINARY, .len = MAX_PHYS_ITEM_ID_LEN }, @@ -1578,6 +1669,8 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = { [IFLA_EVENT] = { .type = NLA_U32 }, [IFLA_GROUP] = { .type = NLA_U32 }, [IFLA_IF_NETNSID] = { .type = NLA_S32 }, + [IFLA_CARRIER_UP_COUNT] = { .type = NLA_U32 }, + [IFLA_CARRIER_DOWN_COUNT] = { .type = NLA_U32 }, }; static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = { @@ -1766,7 +1859,7 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, 0, flags, - ext_filter_mask, 0, NULL, + ext_filter_mask, 0, NULL, 0, netnsid); if (err < 0) { @@ -1815,6 +1908,49 @@ struct net *rtnl_link_get_net(struct net *src_net, struct nlattr *tb[]) } EXPORT_SYMBOL(rtnl_link_get_net); +/* Figure out which network namespace we are talking about by + * examining the link attributes in the following order: + * + * 1. IFLA_NET_NS_PID + * 2. IFLA_NET_NS_FD + * 3. IFLA_IF_NETNSID + */ +static struct net *rtnl_link_get_net_by_nlattr(struct net *src_net, + struct nlattr *tb[]) +{ + struct net *net; + + if (tb[IFLA_NET_NS_PID] || tb[IFLA_NET_NS_FD]) + return rtnl_link_get_net(src_net, tb); + + if (!tb[IFLA_IF_NETNSID]) + return get_net(src_net); + + net = get_net_ns_by_id(src_net, nla_get_u32(tb[IFLA_IF_NETNSID])); + if (!net) + return ERR_PTR(-EINVAL); + + return net; +} + +static struct net *rtnl_link_get_net_capable(const struct sk_buff *skb, + struct net *src_net, + struct nlattr *tb[], int cap) +{ + struct net *net; + + net = rtnl_link_get_net_by_nlattr(src_net, tb); + if (IS_ERR(net)) + return net; + + if (!netlink_ns_capable(skb, net->user_ns, cap)) { + put_net(net); + return ERR_PTR(-EPERM); + } + + return net; +} + static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[]) { if (dev) { @@ -2077,17 +2213,14 @@ static int do_setlink(const struct sk_buff *skb, const struct net_device_ops *ops = dev->netdev_ops; int err; - if (tb[IFLA_NET_NS_PID] || tb[IFLA_NET_NS_FD]) { - struct net *net = rtnl_link_get_net(dev_net(dev), tb); + if (tb[IFLA_NET_NS_PID] || tb[IFLA_NET_NS_FD] || tb[IFLA_IF_NETNSID]) { + struct net *net = rtnl_link_get_net_capable(skb, dev_net(dev), + tb, CAP_NET_ADMIN); if (IS_ERR(net)) { err = PTR_ERR(net); goto errout; } - if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) { - put_net(net); - err = -EPERM; - goto errout; - } + err = dev_change_net_namespace(dev, net, ifname); put_net(net); if (err) @@ -2204,17 +2337,37 @@ static int do_setlink(const struct sk_buff *skb, if (tb[IFLA_TXQLEN]) { unsigned int value = nla_get_u32(tb[IFLA_TXQLEN]); - unsigned int orig_len = dev->tx_queue_len; - - if (dev->tx_queue_len ^ value) { - dev->tx_queue_len = value; - err = call_netdevice_notifiers( - NETDEV_CHANGE_TX_QUEUE_LEN, dev); - err = notifier_to_errno(err); - if (err) { - dev->tx_queue_len = orig_len; - goto errout; - } + + err = dev_change_tx_queue_len(dev, value); + if (err) + goto errout; + status |= DO_SETLINK_MODIFIED; + } + + if (tb[IFLA_GSO_MAX_SIZE]) { + u32 max_size = nla_get_u32(tb[IFLA_GSO_MAX_SIZE]); + + if (max_size > GSO_MAX_SIZE) { + err = -EINVAL; + goto errout; + } + + if (dev->gso_max_size ^ max_size) { + netif_set_gso_max_size(dev, max_size); + status |= DO_SETLINK_MODIFIED; + } + } + + if (tb[IFLA_GSO_MAX_SEGS]) { + u32 max_segs = nla_get_u32(tb[IFLA_GSO_MAX_SEGS]); + + if (max_segs > GSO_MAX_SEGS) { + err = -EINVAL; + goto errout; + } + + if (dev->gso_max_segs ^ max_segs) { + dev->gso_max_segs = max_segs; status |= DO_SETLINK_MODIFIED; } } @@ -2400,9 +2553,6 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, if (err < 0) goto errout; - if (tb[IFLA_IF_NETNSID]) - return -EOPNOTSUPP; - if (tb[IFLA_IFNAME]) nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ); else @@ -2487,36 +2637,53 @@ static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); - struct net_device *dev; + struct net *tgt_net = net; + struct net_device *dev = NULL; struct ifinfomsg *ifm; char ifname[IFNAMSIZ]; struct nlattr *tb[IFLA_MAX+1]; int err; + int netnsid = -1; err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy, extack); if (err < 0) return err; - if (tb[IFLA_IF_NETNSID]) - return -EOPNOTSUPP; - if (tb[IFLA_IFNAME]) nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ); + if (tb[IFLA_IF_NETNSID]) { + netnsid = nla_get_s32(tb[IFLA_IF_NETNSID]); + tgt_net = get_target_net(NETLINK_CB(skb).sk, netnsid); + if (IS_ERR(tgt_net)) + return PTR_ERR(tgt_net); + } + + err = -EINVAL; ifm = nlmsg_data(nlh); if (ifm->ifi_index > 0) - dev = __dev_get_by_index(net, ifm->ifi_index); + dev = __dev_get_by_index(tgt_net, ifm->ifi_index); else if (tb[IFLA_IFNAME]) - dev = __dev_get_by_name(net, ifname); + dev = __dev_get_by_name(tgt_net, ifname); else if (tb[IFLA_GROUP]) - return rtnl_group_dellink(net, nla_get_u32(tb[IFLA_GROUP])); + err = rtnl_group_dellink(tgt_net, nla_get_u32(tb[IFLA_GROUP])); else - return -EINVAL; + goto out; - if (!dev) - return -ENODEV; + if (!dev) { + if (tb[IFLA_IFNAME] || ifm->ifi_index > 0) + err = -ENODEV; - return rtnl_delete_link(dev); + goto out; + } + + err = rtnl_delete_link(dev); + +out: + if (netnsid >= 0) + put_net(tgt_net); + + return err; } int rtnl_configure_link(struct net_device *dev, const struct ifinfomsg *ifm) @@ -2583,6 +2750,10 @@ struct net_device *rtnl_create_link(struct net *net, dev->link_mode = nla_get_u8(tb[IFLA_LINKMODE]); if (tb[IFLA_GROUP]) dev_set_group(dev, nla_get_u32(tb[IFLA_GROUP])); + if (tb[IFLA_GSO_MAX_SIZE]) + netif_set_gso_max_size(dev, nla_get_u32(tb[IFLA_GSO_MAX_SIZE])); + if (tb[IFLA_GSO_MAX_SEGS]) + dev->gso_max_segs = nla_get_u32(tb[IFLA_GSO_MAX_SEGS]); return dev; } @@ -2781,14 +2952,10 @@ replay: name_assign_type = NET_NAME_ENUM; } - dest_net = rtnl_link_get_net(net, tb); + dest_net = rtnl_link_get_net_capable(skb, net, tb, CAP_NET_ADMIN); if (IS_ERR(dest_net)) return PTR_ERR(dest_net); - err = -EPERM; - if (!netlink_ns_capable(skb, dest_net->user_ns, CAP_NET_ADMIN)) - goto out; - if (tb[IFLA_LINK_NETNSID]) { int id = nla_get_s32(tb[IFLA_LINK_NETNSID]); @@ -2915,7 +3082,7 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr *nlh, err = rtnl_fill_ifinfo(nskb, dev, net, RTM_NEWLINK, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0, 0, ext_filter_mask, - 0, NULL, netnsid); + 0, NULL, 0, netnsid); if (err < 0) { /* -EMSGSIZE implies BUG in if_nlmsg_size */ WARN_ON(err == -EMSGSIZE); @@ -2973,18 +3140,26 @@ static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb) s_idx = 1; for (idx = 1; idx <= RTNL_FAMILY_MAX; idx++) { + struct rtnl_link **tab; int type = cb->nlh->nlmsg_type-RTM_BASE; - struct rtnl_link *handlers; + struct rtnl_link *link; rtnl_dumpit_func dumpit; if (idx < s_idx || idx == PF_PACKET) continue; - handlers = rtnl_dereference(rtnl_msg_handlers[idx]); - if (!handlers) + if (type < 0 || type >= RTM_NR_MSGTYPES) continue; - dumpit = READ_ONCE(handlers[type].dumpit); + tab = rcu_dereference_rtnl(rtnl_msg_handlers[idx]); + if (!tab) + continue; + + link = tab[type]; + if (!link) + continue; + + dumpit = link->dumpit; if (!dumpit) continue; @@ -3003,7 +3178,8 @@ static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb) struct sk_buff *rtmsg_ifinfo_build_skb(int type, struct net_device *dev, unsigned int change, - u32 event, gfp_t flags, int *new_nsid) + u32 event, gfp_t flags, int *new_nsid, + int new_ifindex) { struct net *net = dev_net(dev); struct sk_buff *skb; @@ -3016,7 +3192,7 @@ struct sk_buff *rtmsg_ifinfo_build_skb(int type, struct net_device *dev, err = rtnl_fill_ifinfo(skb, dev, dev_net(dev), type, 0, 0, change, 0, 0, event, - new_nsid, -1); + new_nsid, new_ifindex, -1); if (err < 0) { /* -EMSGSIZE implies BUG in if_nlmsg_size() */ WARN_ON(err == -EMSGSIZE); @@ -3039,14 +3215,15 @@ void rtmsg_ifinfo_send(struct sk_buff *skb, struct net_device *dev, gfp_t flags) static void rtmsg_ifinfo_event(int type, struct net_device *dev, unsigned int change, u32 event, - gfp_t flags, int *new_nsid) + gfp_t flags, int *new_nsid, int new_ifindex) { struct sk_buff *skb; if (dev->reg_state != NETREG_REGISTERED) return; - skb = rtmsg_ifinfo_build_skb(type, dev, change, event, flags, new_nsid); + skb = rtmsg_ifinfo_build_skb(type, dev, change, event, flags, new_nsid, + new_ifindex); if (skb) rtmsg_ifinfo_send(skb, dev, flags); } @@ -3054,14 +3231,15 @@ static void rtmsg_ifinfo_event(int type, struct net_device *dev, void rtmsg_ifinfo(int type, struct net_device *dev, unsigned int change, gfp_t flags) { - rtmsg_ifinfo_event(type, dev, change, rtnl_get_event(0), flags, NULL); + rtmsg_ifinfo_event(type, dev, change, rtnl_get_event(0), flags, + NULL, 0); } void rtmsg_ifinfo_newnet(int type, struct net_device *dev, unsigned int change, - gfp_t flags, int *new_nsid) + gfp_t flags, int *new_nsid, int new_ifindex) { rtmsg_ifinfo_event(type, dev, change, rtnl_get_event(0), flags, - new_nsid); + new_nsid, new_ifindex); } static int nlmsg_populate_fdb_fill(struct sk_buff *skb, @@ -4314,7 +4492,8 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); - struct rtnl_link *handlers; + struct rtnl_link *link; + struct module *owner; int err = -EOPNOTSUPP; rtnl_doit_func doit; unsigned int flags; @@ -4338,79 +4517,85 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, if (kind != 2 && !netlink_net_capable(skb, CAP_NET_ADMIN)) return -EPERM; - if (family >= ARRAY_SIZE(rtnl_msg_handlers)) - family = PF_UNSPEC; - rcu_read_lock(); - handlers = rcu_dereference(rtnl_msg_handlers[family]); - if (!handlers) { - family = PF_UNSPEC; - handlers = rcu_dereference(rtnl_msg_handlers[family]); - } - if (kind == 2 && nlh->nlmsg_flags&NLM_F_DUMP) { struct sock *rtnl; rtnl_dumpit_func dumpit; u16 min_dump_alloc = 0; - dumpit = READ_ONCE(handlers[type].dumpit); - if (!dumpit) { + link = rtnl_get_link(family, type); + if (!link || !link->dumpit) { family = PF_UNSPEC; - handlers = rcu_dereference(rtnl_msg_handlers[PF_UNSPEC]); - if (!handlers) - goto err_unlock; - - dumpit = READ_ONCE(handlers[type].dumpit); - if (!dumpit) + link = rtnl_get_link(family, type); + if (!link || !link->dumpit) goto err_unlock; } - - refcount_inc(&rtnl_msg_handlers_ref[family]); + owner = link->owner; + dumpit = link->dumpit; if (type == RTM_GETLINK - RTM_BASE) min_dump_alloc = rtnl_calcit(skb, nlh); + err = 0; + /* need to do this before rcu_read_unlock() */ + if (!try_module_get(owner)) + err = -EPROTONOSUPPORT; + rcu_read_unlock(); rtnl = net->rtnl; - { + if (err == 0) { struct netlink_dump_control c = { .dump = dumpit, .min_dump_alloc = min_dump_alloc, + .module = owner, }; err = netlink_dump_start(rtnl, skb, nlh, &c); + /* netlink_dump_start() will keep a reference on + * module if dump is still in progress. + */ + module_put(owner); } - refcount_dec(&rtnl_msg_handlers_ref[family]); return err; } - doit = READ_ONCE(handlers[type].doit); - if (!doit) { + link = rtnl_get_link(family, type); + if (!link || !link->doit) { family = PF_UNSPEC; - handlers = rcu_dereference(rtnl_msg_handlers[family]); + link = rtnl_get_link(PF_UNSPEC, type); + if (!link || !link->doit) + goto out_unlock; + } + + owner = link->owner; + if (!try_module_get(owner)) { + err = -EPROTONOSUPPORT; + goto out_unlock; } - flags = READ_ONCE(handlers[type].flags); + flags = link->flags; if (flags & RTNL_FLAG_DOIT_UNLOCKED) { - refcount_inc(&rtnl_msg_handlers_ref[family]); - doit = READ_ONCE(handlers[type].doit); + doit = link->doit; rcu_read_unlock(); if (doit) err = doit(skb, nlh, extack); - refcount_dec(&rtnl_msg_handlers_ref[family]); + module_put(owner); return err; } - rcu_read_unlock(); rtnl_lock(); - handlers = rtnl_dereference(rtnl_msg_handlers[family]); - if (handlers) { - doit = READ_ONCE(handlers[type].doit); - if (doit) - err = doit(skb, nlh, extack); - } + link = rtnl_get_link(family, type); + if (link && link->doit) + err = link->doit(skb, nlh, extack); rtnl_unlock(); + + module_put(owner); + + return err; + +out_unlock: + rcu_read_unlock(); return err; err_unlock: @@ -4454,7 +4639,7 @@ static int rtnetlink_event(struct notifier_block *this, unsigned long event, voi case NETDEV_CHANGELOWERSTATE: case NETDEV_CHANGE_TX_QUEUE_LEN: rtmsg_ifinfo_event(RTM_NEWLINK, dev, 0, rtnl_get_event(event), - GFP_KERNEL, NULL); + GFP_KERNEL, NULL, 0); break; default: break; @@ -4498,11 +4683,6 @@ static struct pernet_operations rtnetlink_net_ops = { void __init rtnetlink_init(void) { - int i; - - for (i = 0; i < ARRAY_SIZE(rtnl_msg_handlers_ref); i++) - refcount_set(&rtnl_msg_handlers_ref[i], 1); - if (register_pernet_subsys(&rtnetlink_net_ops)) panic("rtnetlink_init: cannot initialize rtnetlink\n"); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 08f574081315..01e8285aea73 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3656,6 +3656,10 @@ normal: skb_shinfo(nskb)->tx_flags |= skb_shinfo(head_skb)->tx_flags & SKBTX_SHARED_FRAG; + if (skb_orphan_frags(frag_skb, GFP_ATOMIC) || + skb_zerocopy_clone(nskb, frag_skb, GFP_ATOMIC)) + goto err; + while (pos < offset + len) { if (i >= nfrags) { BUG_ON(skb_headlen(list_skb)); @@ -3667,6 +3671,11 @@ normal: BUG_ON(!nfrags); + if (skb_orphan_frags(frag_skb, GFP_ATOMIC) || + skb_zerocopy_clone(nskb, frag_skb, + GFP_ATOMIC)) + goto err; + list_skb = list_skb->next; } @@ -3678,11 +3687,6 @@ normal: goto err; } - if (unlikely(skb_orphan_frags(frag_skb, GFP_ATOMIC))) - goto err; - if (skb_zerocopy_clone(nskb, frag_skb, GFP_ATOMIC)) - goto err; - *nskb_frag = *frag; __skb_frag_ref(nskb_frag); size = skb_frag_size(nskb_frag); diff --git a/net/core/sock.c b/net/core/sock.c index 1211159718ad..1033f8ab0547 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -145,6 +145,8 @@ static DEFINE_MUTEX(proto_list_mutex); static LIST_HEAD(proto_list); +static void sock_inuse_add(struct net *net, int val); + /** * sk_ns_capable - General socket capability test * @sk: Socket to use a capability on or through @@ -1531,8 +1533,11 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority, sk->sk_kern_sock = kern; sock_lock_init(sk); sk->sk_net_refcnt = kern ? 0 : 1; - if (likely(sk->sk_net_refcnt)) + if (likely(sk->sk_net_refcnt)) { get_net(net); + sock_inuse_add(net, 1); + } + sock_net_set(sk, net); refcount_set(&sk->sk_wmem_alloc, 1); @@ -1595,6 +1600,9 @@ void sk_destruct(struct sock *sk) static void __sk_free(struct sock *sk) { + if (likely(sk->sk_net_refcnt)) + sock_inuse_add(sock_net(sk), -1); + if (unlikely(sock_diag_has_destroy_listeners(sk) && sk->sk_net_refcnt)) sock_diag_broadcast_destroy(sk); else @@ -1716,6 +1724,8 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) newsk->sk_priority = 0; newsk->sk_incoming_cpu = raw_smp_processor_id(); atomic64_set(&newsk->sk_cookie, 0); + if (likely(newsk->sk_net_refcnt)) + sock_inuse_add(sock_net(newsk), 1); /* * Before updating sk_refcnt, we must commit prior changes to memory @@ -3045,7 +3055,7 @@ static DECLARE_BITMAP(proto_inuse_idx, PROTO_INUSE_NR); void sock_prot_inuse_add(struct net *net, struct proto *prot, int val) { - __this_cpu_add(net->core.inuse->val[prot->inuse_idx], val); + __this_cpu_add(net->core.prot_inuse->val[prot->inuse_idx], val); } EXPORT_SYMBOL_GPL(sock_prot_inuse_add); @@ -3055,21 +3065,50 @@ int sock_prot_inuse_get(struct net *net, struct proto *prot) int res = 0; for_each_possible_cpu(cpu) - res += per_cpu_ptr(net->core.inuse, cpu)->val[idx]; + res += per_cpu_ptr(net->core.prot_inuse, cpu)->val[idx]; return res >= 0 ? res : 0; } EXPORT_SYMBOL_GPL(sock_prot_inuse_get); +static void sock_inuse_add(struct net *net, int val) +{ + this_cpu_add(*net->core.sock_inuse, val); +} + +int sock_inuse_get(struct net *net) +{ + int cpu, res = 0; + + for_each_possible_cpu(cpu) + res += *per_cpu_ptr(net->core.sock_inuse, cpu); + + return res; +} + +EXPORT_SYMBOL_GPL(sock_inuse_get); + static int __net_init sock_inuse_init_net(struct net *net) { - net->core.inuse = alloc_percpu(struct prot_inuse); - return net->core.inuse ? 0 : -ENOMEM; + net->core.prot_inuse = alloc_percpu(struct prot_inuse); + if (net->core.prot_inuse == NULL) + return -ENOMEM; + + net->core.sock_inuse = alloc_percpu(int); + if (net->core.sock_inuse == NULL) + goto out; + + return 0; + +out: + free_percpu(net->core.prot_inuse); + return -ENOMEM; } static void __net_exit sock_inuse_exit_net(struct net *net) { - free_percpu(net->core.inuse); + free_percpu(net->core.prot_inuse); + free_percpu(net->core.sock_inuse); } static struct pernet_operations net_inuse_ops = { @@ -3112,6 +3151,10 @@ static inline void assign_proto_idx(struct proto *prot) static inline void release_proto_idx(struct proto *prot) { } + +static void sock_inuse_add(struct net *net, int val) +{ +} #endif static void req_prot_cleanup(struct request_sock_ops *rsk_prot) @@ -3319,7 +3362,6 @@ static int proto_seq_open(struct inode *inode, struct file *file) } static const struct file_operations proto_seq_fops = { - .owner = THIS_MODULE, .open = proto_seq_open, .read = seq_read, .llseek = seq_lseek, diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c index 5eeb1d20cc38..c5bb52bc73a1 100644 --- a/net/core/sock_reuseport.c +++ b/net/core/sock_reuseport.c @@ -235,7 +235,9 @@ struct sock *reuseport_select_sock(struct sock *sk, if (prog && skb) sk2 = run_bpf(reuse, socks, prog, skb, hdr_len); - else + + /* no bpf or invalid bpf result: fall back to hash usage */ + if (!sk2) sk2 = reuse->socks[reciprocal_scale(hash, socks)]; } diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index a47ad6cd41c0..f2d0462611c3 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -25,6 +25,7 @@ static int zero = 0; static int one = 1; +static int two __maybe_unused = 2; static int min_sndbuf = SOCK_MIN_SNDBUF; static int min_rcvbuf = SOCK_MIN_RCVBUF; static int max_skb_frags = MAX_SKB_FRAGS; @@ -250,6 +251,46 @@ static int proc_do_rss_key(struct ctl_table *table, int write, return proc_dostring(&fake_table, write, buffer, lenp, ppos); } +#ifdef CONFIG_BPF_JIT +static int proc_dointvec_minmax_bpf_enable(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos) +{ + int ret, jit_enable = *(int *)table->data; + struct ctl_table tmp = *table; + + if (write && !capable(CAP_SYS_ADMIN)) + return -EPERM; + + tmp.data = &jit_enable; + ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); + if (write && !ret) { + if (jit_enable < 2 || + (jit_enable == 2 && bpf_dump_raw_ok())) { + *(int *)table->data = jit_enable; + if (jit_enable == 2) + pr_warn("bpf_jit_enable = 2 was set! NEVER use this in production, only for JIT debugging!\n"); + } else { + ret = -EPERM; + } + } + return ret; +} + +# ifdef CONFIG_HAVE_EBPF_JIT +static int +proc_dointvec_minmax_bpf_restricted(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos) +{ + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + return proc_dointvec_minmax(table, write, buffer, lenp, ppos); +} +# endif +#endif + static struct ctl_table net_core_table[] = { #ifdef CONFIG_NET { @@ -325,13 +366,14 @@ static struct ctl_table net_core_table[] = { .data = &bpf_jit_enable, .maxlen = sizeof(int), .mode = 0644, -#ifndef CONFIG_BPF_JIT_ALWAYS_ON - .proc_handler = proc_dointvec -#else - .proc_handler = proc_dointvec_minmax, + .proc_handler = proc_dointvec_minmax_bpf_enable, +# ifdef CONFIG_BPF_JIT_ALWAYS_ON .extra1 = &one, .extra2 = &one, -#endif +# else + .extra1 = &zero, + .extra2 = &two, +# endif }, # ifdef CONFIG_HAVE_EBPF_JIT { @@ -339,14 +381,18 @@ static struct ctl_table net_core_table[] = { .data = &bpf_jit_harden, .maxlen = sizeof(int), .mode = 0600, - .proc_handler = proc_dointvec, + .proc_handler = proc_dointvec_minmax_bpf_restricted, + .extra1 = &zero, + .extra2 = &two, }, { .procname = "bpf_jit_kallsyms", .data = &bpf_jit_kallsyms, .maxlen = sizeof(int), .mode = 0600, - .proc_handler = proc_dointvec, + .proc_handler = proc_dointvec_minmax_bpf_restricted, + .extra1 = &zero, + .extra2 = &one, }, # endif #endif diff --git a/net/core/xdp.c b/net/core/xdp.c new file mode 100644 index 000000000000..097a0f74e004 --- /dev/null +++ b/net/core/xdp.c @@ -0,0 +1,73 @@ +/* net/core/xdp.c + * + * Copyright (c) 2017 Jesper Dangaard Brouer, Red Hat Inc. + * Released under terms in GPL version 2. See COPYING. + */ +#include <linux/types.h> +#include <linux/mm.h> + +#include <net/xdp.h> + +#define REG_STATE_NEW 0x0 +#define REG_STATE_REGISTERED 0x1 +#define REG_STATE_UNREGISTERED 0x2 +#define REG_STATE_UNUSED 0x3 + +void xdp_rxq_info_unreg(struct xdp_rxq_info *xdp_rxq) +{ + /* Simplify driver cleanup code paths, allow unreg "unused" */ + if (xdp_rxq->reg_state == REG_STATE_UNUSED) + return; + + WARN(!(xdp_rxq->reg_state == REG_STATE_REGISTERED), "Driver BUG"); + + xdp_rxq->reg_state = REG_STATE_UNREGISTERED; + xdp_rxq->dev = NULL; +} +EXPORT_SYMBOL_GPL(xdp_rxq_info_unreg); + +static void xdp_rxq_info_init(struct xdp_rxq_info *xdp_rxq) +{ + memset(xdp_rxq, 0, sizeof(*xdp_rxq)); +} + +/* Returns 0 on success, negative on failure */ +int xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq, + struct net_device *dev, u32 queue_index) +{ + if (xdp_rxq->reg_state == REG_STATE_UNUSED) { + WARN(1, "Driver promised not to register this"); + return -EINVAL; + } + + if (xdp_rxq->reg_state == REG_STATE_REGISTERED) { + WARN(1, "Missing unregister, handled but fix driver"); + xdp_rxq_info_unreg(xdp_rxq); + } + + if (!dev) { + WARN(1, "Missing net_device from driver"); + return -ENODEV; + } + + /* State either UNREGISTERED or NEW */ + xdp_rxq_info_init(xdp_rxq); + xdp_rxq->dev = dev; + xdp_rxq->queue_index = queue_index; + + xdp_rxq->reg_state = REG_STATE_REGISTERED; + return 0; +} +EXPORT_SYMBOL_GPL(xdp_rxq_info_reg); + +void xdp_rxq_info_unused(struct xdp_rxq_info *xdp_rxq) +{ + xdp_rxq->reg_state = REG_STATE_UNUSED; +} +EXPORT_SYMBOL_GPL(xdp_rxq_info_unused); + +bool xdp_rxq_info_is_reg(struct xdp_rxq_info *xdp_rxq) +{ + return (xdp_rxq->reg_state == REG_STATE_REGISTERED); +} +EXPORT_SYMBOL_GPL(xdp_rxq_info_is_reg); diff --git a/net/dccp/Kconfig b/net/dccp/Kconfig index 8c0ef71bed2f..b270e84d9c13 100644 --- a/net/dccp/Kconfig +++ b/net/dccp/Kconfig @@ -39,23 +39,6 @@ config IP_DCCP_DEBUG Just say N. -config NET_DCCPPROBE - tristate "DCCP connection probing" - depends on PROC_FS && KPROBES - ---help--- - This module allows for capturing the changes to DCCP connection - state in response to incoming packets. It is used for debugging - DCCP congestion avoidance modules. If you don't understand - what was just said, you don't need it: say N. - - Documentation on how to use DCCP connection probing can be found - at: - - http://www.linuxfoundation.org/collaborate/workgroups/networking/dccpprobe - - To compile this code as a module, choose M here: the - module will be called dccp_probe. - endmenu diff --git a/net/dccp/Makefile b/net/dccp/Makefile index 2e7b56097bc4..5b4ff37bc806 100644 --- a/net/dccp/Makefile +++ b/net/dccp/Makefile @@ -21,9 +21,10 @@ obj-$(subst y,$(CONFIG_IP_DCCP),$(CONFIG_IPV6)) += dccp_ipv6.o dccp_ipv6-y := ipv6.o obj-$(CONFIG_INET_DCCP_DIAG) += dccp_diag.o -obj-$(CONFIG_NET_DCCPPROBE) += dccp_probe.o dccp-$(CONFIG_SYSCTL) += sysctl.o dccp_diag-y := diag.o -dccp_probe-y := probe.o + +# build with local directory for trace.h +CFLAGS_proto.o := -I$(src) diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c index 3de0d0362d7f..2a24f7d171a5 100644 --- a/net/dccp/ackvec.c +++ b/net/dccp/ackvec.c @@ -228,7 +228,7 @@ static void dccp_ackvec_add_new(struct dccp_ackvec *av, u32 num_packets, } if (num_cells + dccp_ackvec_buflen(av) >= DCCPAV_MAX_ACKVEC_LEN) { - DCCP_CRIT("Ack Vector buffer overflow: dropping old entries\n"); + DCCP_CRIT("Ack Vector buffer overflow: dropping old entries"); av->av_overflow = true; } diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c index 178bb9833311..37ccbe62eb1a 100644 --- a/net/dccp/minisocks.c +++ b/net/dccp/minisocks.c @@ -63,9 +63,10 @@ void dccp_time_wait(struct sock *sk, int state, int timeo) */ local_bh_disable(); inet_twsk_schedule(tw, timeo); - /* Linkage updates. */ - __inet_twsk_hashdance(tw, sk, &dccp_hashinfo); - inet_twsk_put(tw); + /* Linkage updates. + * Note that access to tw after this point is illegal. + */ + inet_twsk_hashdance(tw, sk, &dccp_hashinfo); local_bh_enable(); } else { /* Sorry, if we're out of memory, just CLOSE this diff --git a/net/dccp/probe.c b/net/dccp/probe.c deleted file mode 100644 index 3d3fda05b32d..000000000000 --- a/net/dccp/probe.c +++ /dev/null @@ -1,203 +0,0 @@ -/* - * dccp_probe - Observe the DCCP flow with kprobes. - * - * The idea for this came from Werner Almesberger's umlsim - * Copyright (C) 2004, Stephen Hemminger <shemminger@osdl.org> - * - * Modified for DCCP from Stephen Hemminger's code - * Copyright (C) 2006, Ian McDonald <ian.mcdonald@jandi.co.nz> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#include <linux/kernel.h> -#include <linux/kprobes.h> -#include <linux/socket.h> -#include <linux/dccp.h> -#include <linux/proc_fs.h> -#include <linux/module.h> -#include <linux/kfifo.h> -#include <linux/vmalloc.h> -#include <linux/time64.h> -#include <linux/gfp.h> -#include <net/net_namespace.h> - -#include "dccp.h" -#include "ccid.h" -#include "ccids/ccid3.h" - -static int port; - -static int bufsize = 64 * 1024; - -static const char procname[] = "dccpprobe"; - -static struct { - struct kfifo fifo; - spinlock_t lock; - wait_queue_head_t wait; - struct timespec64 tstart; -} dccpw; - -static void printl(const char *fmt, ...) -{ - va_list args; - int len; - struct timespec64 now; - char tbuf[256]; - - va_start(args, fmt); - getnstimeofday64(&now); - - now = timespec64_sub(now, dccpw.tstart); - - len = sprintf(tbuf, "%lu.%06lu ", - (unsigned long) now.tv_sec, - (unsigned long) now.tv_nsec / NSEC_PER_USEC); - len += vscnprintf(tbuf+len, sizeof(tbuf)-len, fmt, args); - va_end(args); - - kfifo_in_locked(&dccpw.fifo, tbuf, len, &dccpw.lock); - wake_up(&dccpw.wait); -} - -static int jdccp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) -{ - const struct inet_sock *inet = inet_sk(sk); - struct ccid3_hc_tx_sock *hc = NULL; - - if (ccid_get_current_tx_ccid(dccp_sk(sk)) == DCCPC_CCID3) - hc = ccid3_hc_tx_sk(sk); - - if (port == 0 || ntohs(inet->inet_dport) == port || - ntohs(inet->inet_sport) == port) { - if (hc) - printl("%pI4:%u %pI4:%u %d %d %d %d %u %llu %llu %d\n", - &inet->inet_saddr, ntohs(inet->inet_sport), - &inet->inet_daddr, ntohs(inet->inet_dport), size, - hc->tx_s, hc->tx_rtt, hc->tx_p, - hc->tx_x_calc, hc->tx_x_recv >> 6, - hc->tx_x >> 6, hc->tx_t_ipi); - else - printl("%pI4:%u %pI4:%u %d\n", - &inet->inet_saddr, ntohs(inet->inet_sport), - &inet->inet_daddr, ntohs(inet->inet_dport), - size); - } - - jprobe_return(); - return 0; -} - -static struct jprobe dccp_send_probe = { - .kp = { - .symbol_name = "dccp_sendmsg", - }, - .entry = jdccp_sendmsg, -}; - -static int dccpprobe_open(struct inode *inode, struct file *file) -{ - kfifo_reset(&dccpw.fifo); - getnstimeofday64(&dccpw.tstart); - return 0; -} - -static ssize_t dccpprobe_read(struct file *file, char __user *buf, - size_t len, loff_t *ppos) -{ - int error = 0, cnt = 0; - unsigned char *tbuf; - - if (!buf) - return -EINVAL; - - if (len == 0) - return 0; - - tbuf = vmalloc(len); - if (!tbuf) - return -ENOMEM; - - error = wait_event_interruptible(dccpw.wait, - kfifo_len(&dccpw.fifo) != 0); - if (error) - goto out_free; - - cnt = kfifo_out_locked(&dccpw.fifo, tbuf, len, &dccpw.lock); - error = copy_to_user(buf, tbuf, cnt) ? -EFAULT : 0; - -out_free: - vfree(tbuf); - - return error ? error : cnt; -} - -static const struct file_operations dccpprobe_fops = { - .owner = THIS_MODULE, - .open = dccpprobe_open, - .read = dccpprobe_read, - .llseek = noop_llseek, -}; - -static __init int dccpprobe_init(void) -{ - int ret = -ENOMEM; - - init_waitqueue_head(&dccpw.wait); - spin_lock_init(&dccpw.lock); - if (kfifo_alloc(&dccpw.fifo, bufsize, GFP_KERNEL)) - return ret; - if (!proc_create(procname, S_IRUSR, init_net.proc_net, &dccpprobe_fops)) - goto err0; - - ret = register_jprobe(&dccp_send_probe); - if (ret) { - ret = request_module("dccp"); - if (!ret) - ret = register_jprobe(&dccp_send_probe); - } - - if (ret) - goto err1; - - pr_info("DCCP watch registered (port=%d)\n", port); - return 0; -err1: - remove_proc_entry(procname, init_net.proc_net); -err0: - kfifo_free(&dccpw.fifo); - return ret; -} -module_init(dccpprobe_init); - -static __exit void dccpprobe_exit(void) -{ - kfifo_free(&dccpw.fifo); - remove_proc_entry(procname, init_net.proc_net); - unregister_jprobe(&dccp_send_probe); - -} -module_exit(dccpprobe_exit); - -MODULE_PARM_DESC(port, "Port to match (0=all)"); -module_param(port, int, 0); - -MODULE_PARM_DESC(bufsize, "Log buffer size (default 64k)"); -module_param(bufsize, int, 0); - -MODULE_AUTHOR("Ian McDonald <ian.mcdonald@jandi.co.nz>"); -MODULE_DESCRIPTION("DCCP snooper"); -MODULE_LICENSE("GPL"); diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 8b8db3d481bd..74685fecfdb9 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -38,6 +38,9 @@ #include "dccp.h" #include "feat.h" +#define CREATE_TRACE_POINTS +#include "trace.h" + DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly; EXPORT_SYMBOL_GPL(dccp_statistics); @@ -110,7 +113,7 @@ void dccp_set_state(struct sock *sk, const int state) /* Change state AFTER socket is unhashed to avoid closed * socket sitting in hash tables. */ - sk->sk_state = state; + inet_sk_set_state(sk, state); } EXPORT_SYMBOL_GPL(dccp_set_state); @@ -761,6 +764,8 @@ int dccp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) int rc, size; long timeo; + trace_dccp_probe(sk, len); + if (len > dp->dccps_mss_cache) return -EMSGSIZE; diff --git a/net/dccp/trace.h b/net/dccp/trace.h new file mode 100644 index 000000000000..5062421beee9 --- /dev/null +++ b/net/dccp/trace.h @@ -0,0 +1,84 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM dccp + +#if !defined(_TRACE_DCCP_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_DCCP_H + +#include <net/sock.h> +#include "dccp.h" +#include "ccids/ccid3.h" +#include <linux/tracepoint.h> +#include <trace/events/net_probe_common.h> + +TRACE_EVENT(dccp_probe, + + TP_PROTO(struct sock *sk, size_t size), + + TP_ARGS(sk, size), + + TP_STRUCT__entry( + /* sockaddr_in6 is always bigger than sockaddr_in */ + __array(__u8, saddr, sizeof(struct sockaddr_in6)) + __array(__u8, daddr, sizeof(struct sockaddr_in6)) + __field(__u16, sport) + __field(__u16, dport) + __field(__u16, size) + __field(__u16, tx_s) + __field(__u32, tx_rtt) + __field(__u32, tx_p) + __field(__u32, tx_x_calc) + __field(__u64, tx_x_recv) + __field(__u64, tx_x) + __field(__u32, tx_t_ipi) + ), + + TP_fast_assign( + const struct inet_sock *inet = inet_sk(sk); + struct ccid3_hc_tx_sock *hc = NULL; + + if (ccid_get_current_tx_ccid(dccp_sk(sk)) == DCCPC_CCID3) + hc = ccid3_hc_tx_sk(sk); + + memset(__entry->saddr, 0, sizeof(struct sockaddr_in6)); + memset(__entry->daddr, 0, sizeof(struct sockaddr_in6)); + + TP_STORE_ADDR_PORTS(__entry, inet, sk); + + /* For filtering use */ + __entry->sport = ntohs(inet->inet_sport); + __entry->dport = ntohs(inet->inet_dport); + + __entry->size = size; + if (hc) { + __entry->tx_s = hc->tx_s; + __entry->tx_rtt = hc->tx_rtt; + __entry->tx_p = hc->tx_p; + __entry->tx_x_calc = hc->tx_x_calc; + __entry->tx_x_recv = hc->tx_x_recv >> 6; + __entry->tx_x = hc->tx_x >> 6; + __entry->tx_t_ipi = hc->tx_t_ipi; + } else { + __entry->tx_s = 0; + memset(&__entry->tx_rtt, 0, (void *)&__entry->tx_t_ipi - + (void *)&__entry->tx_rtt + + sizeof(__entry->tx_t_ipi)); + } + ), + + TP_printk("src=%pISpc dest=%pISpc size=%d tx_s=%d tx_rtt=%d " + "tx_p=%d tx_x_calc=%u tx_x_recv=%llu tx_x=%llu tx_t_ipi=%d", + __entry->saddr, __entry->daddr, __entry->size, + __entry->tx_s, __entry->tx_rtt, __entry->tx_p, + __entry->tx_x_calc, __entry->tx_x_recv, __entry->tx_x, + __entry->tx_t_ipi) +); + +#endif /* _TRACE_TCP_H */ + +/* This part must be outside protection */ +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH . +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE trace +#include <trace/define_trace.h> diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index 9c2dde819817..cc1b505453a8 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -2320,7 +2320,6 @@ static int dn_socket_seq_open(struct inode *inode, struct file *file) } static const struct file_operations dn_socket_seq_fops = { - .owner = THIS_MODULE, .open = dn_socket_seq_open, .read = seq_read, .llseek = seq_lseek, diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index 9153247dad28..c9f5e1ebb9c8 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c @@ -1389,7 +1389,6 @@ static int dn_dev_seq_open(struct inode *inode, struct file *file) } static const struct file_operations dn_dev_seq_fops = { - .owner = THIS_MODULE, .open = dn_dev_seq_open, .read = seq_read, .llseek = seq_lseek, @@ -1418,9 +1417,12 @@ void __init dn_dev_init(void) dn_dev_devices_on(); - rtnl_register(PF_DECnet, RTM_NEWADDR, dn_nl_newaddr, NULL, 0); - rtnl_register(PF_DECnet, RTM_DELADDR, dn_nl_deladdr, NULL, 0); - rtnl_register(PF_DECnet, RTM_GETADDR, NULL, dn_nl_dump_ifaddr, 0); + rtnl_register_module(THIS_MODULE, PF_DECnet, RTM_NEWADDR, + dn_nl_newaddr, NULL, 0); + rtnl_register_module(THIS_MODULE, PF_DECnet, RTM_DELADDR, + dn_nl_deladdr, NULL, 0); + rtnl_register_module(THIS_MODULE, PF_DECnet, RTM_GETADDR, + NULL, dn_nl_dump_ifaddr, 0); proc_create("decnet_dev", S_IRUGO, init_net.proc_net, &dn_dev_seq_fops); diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c index b37a1b833c77..fce94cbd4378 100644 --- a/net/decnet/dn_fib.c +++ b/net/decnet/dn_fib.c @@ -792,8 +792,10 @@ void __init dn_fib_init(void) register_dnaddr_notifier(&dn_fib_dnaddr_notifier); - rtnl_register(PF_DECnet, RTM_NEWROUTE, dn_fib_rtm_newroute, NULL, 0); - rtnl_register(PF_DECnet, RTM_DELROUTE, dn_fib_rtm_delroute, NULL, 0); + rtnl_register_module(THIS_MODULE, PF_DECnet, RTM_NEWROUTE, + dn_fib_rtm_newroute, NULL, 0); + rtnl_register_module(THIS_MODULE, PF_DECnet, RTM_DELROUTE, + dn_fib_rtm_delroute, NULL, 0); } diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c index 528119a5618e..6e37d9e6345e 100644 --- a/net/decnet/dn_neigh.c +++ b/net/decnet/dn_neigh.c @@ -597,7 +597,6 @@ static int dn_neigh_seq_open(struct inode *inode, struct file *file) } static const struct file_operations dn_neigh_seq_fops = { - .owner = THIS_MODULE, .open = dn_neigh_seq_open, .read = seq_read, .llseek = seq_lseek, diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 324cb9f2f551..ef20b8e31669 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -199,11 +199,11 @@ static void dn_dst_check_expire(struct timer_list *unused) lockdep_is_held(&dn_rt_hash_table[i].lock))) != NULL) { if (atomic_read(&rt->dst.__refcnt) > 1 || (now - rt->dst.lastuse) < expire) { - rtp = &rt->dst.dn_next; + rtp = &rt->dn_next; continue; } - *rtp = rt->dst.dn_next; - rt->dst.dn_next = NULL; + *rtp = rt->dn_next; + rt->dn_next = NULL; dst_dev_put(&rt->dst); dst_release(&rt->dst); } @@ -233,11 +233,11 @@ static int dn_dst_gc(struct dst_ops *ops) lockdep_is_held(&dn_rt_hash_table[i].lock))) != NULL) { if (atomic_read(&rt->dst.__refcnt) > 1 || (now - rt->dst.lastuse) < expire) { - rtp = &rt->dst.dn_next; + rtp = &rt->dn_next; continue; } - *rtp = rt->dst.dn_next; - rt->dst.dn_next = NULL; + *rtp = rt->dn_next; + rt->dn_next = NULL; dst_dev_put(&rt->dst); dst_release(&rt->dst); break; @@ -333,8 +333,8 @@ static int dn_insert_route(struct dn_route *rt, unsigned int hash, struct dn_rou lockdep_is_held(&dn_rt_hash_table[hash].lock))) != NULL) { if (compare_keys(&rth->fld, &rt->fld)) { /* Put it first */ - *rthp = rth->dst.dn_next; - rcu_assign_pointer(rth->dst.dn_next, + *rthp = rth->dn_next; + rcu_assign_pointer(rth->dn_next, dn_rt_hash_table[hash].chain); rcu_assign_pointer(dn_rt_hash_table[hash].chain, rth); @@ -345,10 +345,10 @@ static int dn_insert_route(struct dn_route *rt, unsigned int hash, struct dn_rou *rp = rth; return 0; } - rthp = &rth->dst.dn_next; + rthp = &rth->dn_next; } - rcu_assign_pointer(rt->dst.dn_next, dn_rt_hash_table[hash].chain); + rcu_assign_pointer(rt->dn_next, dn_rt_hash_table[hash].chain); rcu_assign_pointer(dn_rt_hash_table[hash].chain, rt); dst_hold_and_use(&rt->dst, now); @@ -369,8 +369,8 @@ static void dn_run_flush(struct timer_list *unused) goto nothing_to_declare; for(; rt; rt = next) { - next = rcu_dereference_raw(rt->dst.dn_next); - RCU_INIT_POINTER(rt->dst.dn_next, NULL); + next = rcu_dereference_raw(rt->dn_next); + RCU_INIT_POINTER(rt->dn_next, NULL); dst_dev_put(&rt->dst); dst_release(&rt->dst); } @@ -1183,6 +1183,7 @@ make_route: if (rt == NULL) goto e_nobufs; + rt->dn_next = NULL; memset(&rt->fld, 0, sizeof(rt->fld)); rt->fld.saddr = oldflp->saddr; rt->fld.daddr = oldflp->daddr; @@ -1252,7 +1253,7 @@ static int __dn_route_output_key(struct dst_entry **pprt, const struct flowidn * if (!(flags & MSG_TRYHARD)) { rcu_read_lock_bh(); for (rt = rcu_dereference_bh(dn_rt_hash_table[hash].chain); rt; - rt = rcu_dereference_bh(rt->dst.dn_next)) { + rt = rcu_dereference_bh(rt->dn_next)) { if ((flp->daddr == rt->fld.daddr) && (flp->saddr == rt->fld.saddr) && (flp->flowidn_mark == rt->fld.flowidn_mark) && @@ -1448,6 +1449,7 @@ make_route: if (rt == NULL) goto e_nobufs; + rt->dn_next = NULL; memset(&rt->fld, 0, sizeof(rt->fld)); rt->rt_saddr = fld.saddr; rt->rt_daddr = fld.daddr; @@ -1529,7 +1531,7 @@ static int dn_route_input(struct sk_buff *skb) rcu_read_lock(); for(rt = rcu_dereference(dn_rt_hash_table[hash].chain); rt != NULL; - rt = rcu_dereference(rt->dst.dn_next)) { + rt = rcu_dereference(rt->dn_next)) { if ((rt->fld.saddr == cb->src) && (rt->fld.daddr == cb->dst) && (rt->fld.flowidn_oif == 0) && @@ -1749,7 +1751,7 @@ int dn_cache_dump(struct sk_buff *skb, struct netlink_callback *cb) rcu_read_lock_bh(); for(rt = rcu_dereference_bh(dn_rt_hash_table[h].chain), idx = 0; rt; - rt = rcu_dereference_bh(rt->dst.dn_next), idx++) { + rt = rcu_dereference_bh(rt->dn_next), idx++) { if (idx < s_idx) continue; skb_dst_set(skb, dst_clone(&rt->dst)); @@ -1795,7 +1797,7 @@ static struct dn_route *dn_rt_cache_get_next(struct seq_file *seq, struct dn_rou { struct dn_rt_cache_iter_state *s = seq->private; - rt = rcu_dereference_bh(rt->dst.dn_next); + rt = rcu_dereference_bh(rt->dn_next); while (!rt) { rcu_read_unlock_bh(); if (--s->bucket < 0) @@ -1858,7 +1860,6 @@ static int dn_rt_cache_seq_open(struct inode *inode, struct file *file) } static const struct file_operations dn_rt_cache_seq_fops = { - .owner = THIS_MODULE, .open = dn_rt_cache_seq_open, .read = seq_read, .llseek = seq_lseek, @@ -1921,11 +1922,11 @@ void __init dn_route_init(void) &dn_rt_cache_seq_fops); #ifdef CONFIG_DECNET_ROUTER - rtnl_register(PF_DECnet, RTM_GETROUTE, dn_cache_getroute, - dn_fib_dump, 0); + rtnl_register_module(THIS_MODULE, PF_DECnet, RTM_GETROUTE, + dn_cache_getroute, dn_fib_dump, 0); #else - rtnl_register(PF_DECnet, RTM_GETROUTE, dn_cache_getroute, - dn_cache_dump, 0); + rtnl_register_module(THIS_MODULE, PF_DECnet, RTM_GETROUTE, + dn_cache_getroute, dn_cache_dump, 0); #endif } diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig index 03c3bdf25468..bbf2c82cf7b2 100644 --- a/net/dsa/Kconfig +++ b/net/dsa/Kconfig @@ -16,6 +16,15 @@ config NET_DSA if NET_DSA +config NET_DSA_LEGACY + bool "Support for older platform device and Device Tree registration" + default y + ---help--- + Say Y if you want to enable support for the older platform device and + deprecated Device Tree binding registration. + + This feature is scheduled for removal in 4.17. + # tagging formats config NET_DSA_TAG_BRCM bool diff --git a/net/dsa/Makefile b/net/dsa/Makefile index 0e13c1f95d13..9e4d3536f977 100644 --- a/net/dsa/Makefile +++ b/net/dsa/Makefile @@ -1,7 +1,8 @@ # SPDX-License-Identifier: GPL-2.0 # the core obj-$(CONFIG_NET_DSA) += dsa_core.o -dsa_core-y += dsa.o dsa2.o legacy.o master.o port.o slave.o switch.o +dsa_core-y += dsa.o dsa2.o master.o port.o slave.o switch.o +dsa_core-$(CONFIG_NET_DSA_LEGACY) += legacy.o # tagging formats dsa_core-$(CONFIG_NET_DSA_TAG_BRCM) += tag_brcm.o diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index 1e287420ff49..adf50fbc4c13 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -241,7 +241,7 @@ static int dsa_tree_setup_default_cpu(struct dsa_switch_tree *dst) for (port = 0; port < ds->num_ports; port++) { dp = &ds->ports[port]; - if (dsa_port_is_user(dp)) + if (dsa_port_is_user(dp) || dsa_port_is_dsa(dp)) dp->cpu_dp = dst->cpu_dp; } } @@ -271,13 +271,12 @@ static int dsa_port_setup(struct dsa_port *dp) break; case DSA_PORT_TYPE_CPU: case DSA_PORT_TYPE_DSA: - err = dsa_port_fixed_link_register_of(dp); + err = dsa_port_link_register_of(dp); if (err) { - dev_err(ds->dev, "failed to register fixed link for port %d.%d\n", + dev_err(ds->dev, "failed to setup link for port %d.%d\n", ds->index, dp->index); return err; } - break; case DSA_PORT_TYPE_USER: err = dsa_slave_create(dp); @@ -301,7 +300,7 @@ static void dsa_port_teardown(struct dsa_port *dp) break; case DSA_PORT_TYPE_CPU: case DSA_PORT_TYPE_DSA: - dsa_port_fixed_link_unregister_of(dp); + dsa_port_link_unregister_of(dp); break; case DSA_PORT_TYPE_USER: if (dp->slave) { diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h index 7d036696e8c4..70de7895e5b8 100644 --- a/net/dsa/dsa_priv.h +++ b/net/dsa/dsa_priv.h @@ -97,8 +97,17 @@ const struct dsa_device_ops *dsa_resolve_tag_protocol(int tag_protocol); bool dsa_schedule_work(struct work_struct *work); /* legacy.c */ +#if IS_ENABLED(CONFIG_NET_DSA_LEGACY) int dsa_legacy_register(void); void dsa_legacy_unregister(void); +#else +static inline int dsa_legacy_register(void) +{ + return 0; +} + +static inline void dsa_legacy_unregister(void) { } +#endif int dsa_legacy_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr, u16 vid, @@ -157,8 +166,8 @@ int dsa_port_vlan_add(struct dsa_port *dp, struct switchdev_trans *trans); int dsa_port_vlan_del(struct dsa_port *dp, const struct switchdev_obj_port_vlan *vlan); -int dsa_port_fixed_link_register_of(struct dsa_port *dp); -void dsa_port_fixed_link_unregister_of(struct dsa_port *dp); +int dsa_port_link_register_of(struct dsa_port *dp); +void dsa_port_link_unregister_of(struct dsa_port *dp); /* slave.c */ extern const struct dsa_device_ops notag_netdev_ops; diff --git a/net/dsa/legacy.c b/net/dsa/legacy.c index 84611d7fcfa2..cb54b81d0bd9 100644 --- a/net/dsa/legacy.c +++ b/net/dsa/legacy.c @@ -86,7 +86,7 @@ static int dsa_cpu_dsa_setups(struct dsa_switch *ds) if (!(dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port))) continue; - ret = dsa_port_fixed_link_register_of(&ds->ports[port]); + ret = dsa_port_link_register_of(&ds->ports[port]); if (ret) return ret; } @@ -275,7 +275,7 @@ static void dsa_switch_destroy(struct dsa_switch *ds) for (port = 0; port < ds->num_ports; port++) { if (!(dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port))) continue; - dsa_port_fixed_link_unregister_of(&ds->ports[port]); + dsa_port_link_unregister_of(&ds->ports[port]); } if (ds->slave_mii_bus && ds->ops->phy_read) @@ -718,26 +718,6 @@ static int dsa_resume(struct device *d) } #endif -/* legacy way, bypassing the bridge *****************************************/ -int dsa_legacy_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], - struct net_device *dev, - const unsigned char *addr, u16 vid, - u16 flags) -{ - struct dsa_port *dp = dsa_slave_to_port(dev); - - return dsa_port_fdb_add(dp, addr, vid); -} - -int dsa_legacy_fdb_del(struct ndmsg *ndm, struct nlattr *tb[], - struct net_device *dev, - const unsigned char *addr, u16 vid) -{ - struct dsa_port *dp = dsa_slave_to_port(dev); - - return dsa_port_fdb_del(dp, addr, vid); -} - static SIMPLE_DEV_PM_OPS(dsa_pm_ops, dsa_suspend, dsa_resume); static const struct of_device_id dsa_of_match_table[] = { diff --git a/net/dsa/port.c b/net/dsa/port.c index bb4be2679904..7acc1169d75e 100644 --- a/net/dsa/port.c +++ b/net/dsa/port.c @@ -273,7 +273,56 @@ int dsa_port_vlan_del(struct dsa_port *dp, return 0; } -int dsa_port_fixed_link_register_of(struct dsa_port *dp) +static int dsa_port_setup_phy_of(struct dsa_port *dp, bool enable) +{ + struct device_node *port_dn = dp->dn; + struct device_node *phy_dn; + struct dsa_switch *ds = dp->ds; + struct phy_device *phydev; + int port = dp->index; + int err = 0; + + phy_dn = of_parse_phandle(port_dn, "phy-handle", 0); + if (!phy_dn) + return 0; + + phydev = of_phy_find_device(phy_dn); + if (!phydev) { + err = -EPROBE_DEFER; + goto err_put_of; + } + + if (enable) { + err = genphy_config_init(phydev); + if (err < 0) + goto err_put_dev; + + err = genphy_resume(phydev); + if (err < 0) + goto err_put_dev; + + err = genphy_read_status(phydev); + if (err < 0) + goto err_put_dev; + } else { + err = genphy_suspend(phydev); + if (err < 0) + goto err_put_dev; + } + + if (ds->ops->adjust_link) + ds->ops->adjust_link(ds, port, phydev); + + dev_dbg(ds->dev, "enabled port's phy: %s", phydev_name(phydev)); + +err_put_dev: + put_device(&phydev->mdio.dev); +err_put_of: + of_node_put(phy_dn); + return err; +} + +static int dsa_port_fixed_link_register_of(struct dsa_port *dp) { struct device_node *dn = dp->dn; struct dsa_switch *ds = dp->ds; @@ -282,38 +331,44 @@ int dsa_port_fixed_link_register_of(struct dsa_port *dp) int mode; int err; - if (of_phy_is_fixed_link(dn)) { - err = of_phy_register_fixed_link(dn); - if (err) { - dev_err(ds->dev, - "failed to register the fixed PHY of port %d\n", - port); - return err; - } + err = of_phy_register_fixed_link(dn); + if (err) { + dev_err(ds->dev, + "failed to register the fixed PHY of port %d\n", + port); + return err; + } - phydev = of_phy_find_device(dn); + phydev = of_phy_find_device(dn); - mode = of_get_phy_mode(dn); - if (mode < 0) - mode = PHY_INTERFACE_MODE_NA; - phydev->interface = mode; + mode = of_get_phy_mode(dn); + if (mode < 0) + mode = PHY_INTERFACE_MODE_NA; + phydev->interface = mode; - genphy_config_init(phydev); - genphy_read_status(phydev); + genphy_config_init(phydev); + genphy_read_status(phydev); - if (ds->ops->adjust_link) - ds->ops->adjust_link(ds, port, phydev); + if (ds->ops->adjust_link) + ds->ops->adjust_link(ds, port, phydev); - put_device(&phydev->mdio.dev); - } + put_device(&phydev->mdio.dev); return 0; } -void dsa_port_fixed_link_unregister_of(struct dsa_port *dp) +int dsa_port_link_register_of(struct dsa_port *dp) { - struct device_node *dn = dp->dn; + if (of_phy_is_fixed_link(dp->dn)) + return dsa_port_fixed_link_register_of(dp); + else + return dsa_port_setup_phy_of(dp, true); +} - if (of_phy_is_fixed_link(dn)) - of_phy_deregister_fixed_link(dn); +void dsa_port_link_unregister_of(struct dsa_port *dp) +{ + if (of_phy_is_fixed_link(dp->dn)) + of_phy_deregister_fixed_link(dp->dn); + else + dsa_port_setup_phy_of(dp, false); } diff --git a/net/dsa/slave.c b/net/dsa/slave.c index a95a55f79137..f52307296de4 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -708,14 +708,12 @@ static int dsa_slave_add_cls_matchall(struct net_device *dev, struct dsa_slave_priv *p = netdev_priv(dev); struct dsa_mall_tc_entry *mall_tc_entry; __be16 protocol = cls->common.protocol; - struct net *net = dev_net(dev); struct dsa_switch *ds = dp->ds; struct net_device *to_dev; const struct tc_action *a; struct dsa_port *to_dp; int err = -EOPNOTSUPP; LIST_HEAD(actions); - int ifindex; if (!ds->ops->port_mirror_add) return err; @@ -729,8 +727,7 @@ static int dsa_slave_add_cls_matchall(struct net_device *dev, if (is_tcf_mirred_egress_mirror(a) && protocol == htons(ETH_P_ALL)) { struct dsa_mall_mirror_tc_entry *mirror; - ifindex = tcf_mirred_ifindex(a); - to_dev = __dev_get_by_index(net, ifindex); + to_dev = tcf_mirred_dev(a); if (!to_dev) return -EINVAL; @@ -943,6 +940,26 @@ static const struct ethtool_ops dsa_slave_ethtool_ops = { .set_rxnfc = dsa_slave_set_rxnfc, }; +/* legacy way, bypassing the bridge *****************************************/ +int dsa_legacy_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], + struct net_device *dev, + const unsigned char *addr, u16 vid, + u16 flags) +{ + struct dsa_port *dp = dsa_slave_to_port(dev); + + return dsa_port_fdb_add(dp, addr, vid); +} + +int dsa_legacy_fdb_del(struct ndmsg *ndm, struct nlattr *tb[], + struct net_device *dev, + const unsigned char *addr, u16 vid) +{ + struct dsa_port *dp = dsa_slave_to_port(dev); + + return dsa_port_fdb_del(dp, addr, vid); +} + static const struct net_device_ops dsa_slave_netdev_ops = { .ndo_open = dsa_slave_open, .ndo_stop = dsa_slave_close, diff --git a/net/dsa/switch.c b/net/dsa/switch.c index 29608d087a7c..b93511726069 100644 --- a/net/dsa/switch.c +++ b/net/dsa/switch.c @@ -83,29 +83,52 @@ static int dsa_switch_bridge_leave(struct dsa_switch *ds, static int dsa_switch_fdb_add(struct dsa_switch *ds, struct dsa_notifier_fdb_info *info) { - /* Do not care yet about other switch chips of the fabric */ - if (ds->index != info->sw_index) - return 0; + int port = dsa_towards_port(ds, info->sw_index, info->port); if (!ds->ops->port_fdb_add) return -EOPNOTSUPP; - return ds->ops->port_fdb_add(ds, info->port, info->addr, - info->vid); + return ds->ops->port_fdb_add(ds, port, info->addr, info->vid); } static int dsa_switch_fdb_del(struct dsa_switch *ds, struct dsa_notifier_fdb_info *info) { - /* Do not care yet about other switch chips of the fabric */ - if (ds->index != info->sw_index) - return 0; + int port = dsa_towards_port(ds, info->sw_index, info->port); if (!ds->ops->port_fdb_del) return -EOPNOTSUPP; - return ds->ops->port_fdb_del(ds, info->port, info->addr, - info->vid); + return ds->ops->port_fdb_del(ds, port, info->addr, info->vid); +} + +static int +dsa_switch_mdb_prepare_bitmap(struct dsa_switch *ds, + const struct switchdev_obj_port_mdb *mdb, + const unsigned long *bitmap) +{ + int port, err; + + if (!ds->ops->port_mdb_prepare || !ds->ops->port_mdb_add) + return -EOPNOTSUPP; + + for_each_set_bit(port, bitmap, ds->num_ports) { + err = ds->ops->port_mdb_prepare(ds, port, mdb); + if (err) + return err; + } + + return 0; +} + +static void dsa_switch_mdb_add_bitmap(struct dsa_switch *ds, + const struct switchdev_obj_port_mdb *mdb, + const unsigned long *bitmap) +{ + int port; + + for_each_set_bit(port, bitmap, ds->num_ports) + ds->ops->port_mdb_add(ds, port, mdb); } static int dsa_switch_mdb_add(struct dsa_switch *ds, @@ -114,7 +137,7 @@ static int dsa_switch_mdb_add(struct dsa_switch *ds, const struct switchdev_obj_port_mdb *mdb = info->mdb; struct switchdev_trans *trans = info->trans; DECLARE_BITMAP(group, ds->num_ports); - int port, err; + int port; /* Build a mask of Multicast group members */ bitmap_zero(group, ds->num_ports); @@ -124,21 +147,10 @@ static int dsa_switch_mdb_add(struct dsa_switch *ds, if (dsa_is_dsa_port(ds, port)) set_bit(port, group); - if (switchdev_trans_ph_prepare(trans)) { - if (!ds->ops->port_mdb_prepare || !ds->ops->port_mdb_add) - return -EOPNOTSUPP; - - for_each_set_bit(port, group, ds->num_ports) { - err = ds->ops->port_mdb_prepare(ds, port, mdb, trans); - if (err) - return err; - } - - return 0; - } + if (switchdev_trans_ph_prepare(trans)) + return dsa_switch_mdb_prepare_bitmap(ds, mdb, group); - for_each_set_bit(port, group, ds->num_ports) - ds->ops->port_mdb_add(ds, port, mdb, trans); + dsa_switch_mdb_add_bitmap(ds, mdb, group); return 0; } @@ -157,13 +169,43 @@ static int dsa_switch_mdb_del(struct dsa_switch *ds, return 0; } +static int +dsa_switch_vlan_prepare_bitmap(struct dsa_switch *ds, + const struct switchdev_obj_port_vlan *vlan, + const unsigned long *bitmap) +{ + int port, err; + + if (!ds->ops->port_vlan_prepare || !ds->ops->port_vlan_add) + return -EOPNOTSUPP; + + for_each_set_bit(port, bitmap, ds->num_ports) { + err = ds->ops->port_vlan_prepare(ds, port, vlan); + if (err) + return err; + } + + return 0; +} + +static void +dsa_switch_vlan_add_bitmap(struct dsa_switch *ds, + const struct switchdev_obj_port_vlan *vlan, + const unsigned long *bitmap) +{ + int port; + + for_each_set_bit(port, bitmap, ds->num_ports) + ds->ops->port_vlan_add(ds, port, vlan); +} + static int dsa_switch_vlan_add(struct dsa_switch *ds, struct dsa_notifier_vlan_info *info) { const struct switchdev_obj_port_vlan *vlan = info->vlan; struct switchdev_trans *trans = info->trans; DECLARE_BITMAP(members, ds->num_ports); - int port, err; + int port; /* Build a mask of VLAN members */ bitmap_zero(members, ds->num_ports); @@ -173,21 +215,10 @@ static int dsa_switch_vlan_add(struct dsa_switch *ds, if (dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port)) set_bit(port, members); - if (switchdev_trans_ph_prepare(trans)) { - if (!ds->ops->port_vlan_prepare || !ds->ops->port_vlan_add) - return -EOPNOTSUPP; - - for_each_set_bit(port, members, ds->num_ports) { - err = ds->ops->port_vlan_prepare(ds, port, vlan, trans); - if (err) - return err; - } - - return 0; - } + if (switchdev_trans_ph_prepare(trans)) + return dsa_switch_vlan_prepare_bitmap(ds, vlan, members); - for_each_set_bit(port, members, ds->num_ports) - ds->ops->port_vlan_add(ds, port, vlan, trans); + dsa_switch_vlan_add_bitmap(ds, vlan, members); return 0; } diff --git a/net/dsa/tag_brcm.c b/net/dsa/tag_brcm.c index e6e0b7b6025c..2b06bb91318b 100644 --- a/net/dsa/tag_brcm.c +++ b/net/dsa/tag_brcm.c @@ -70,6 +70,18 @@ static struct sk_buff *brcm_tag_xmit_ll(struct sk_buff *skb, if (skb_cow_head(skb, BRCM_TAG_LEN) < 0) return NULL; + /* The Ethernet switch we are interfaced with needs packets to be at + * least 64 bytes (including FCS) otherwise they will be discarded when + * they enter the switch port logic. When Broadcom tags are enabled, we + * need to make sure that packets are at least 68 bytes + * (including FCS and tag) because the length verification is done after + * the Broadcom tag is stripped off the ingress packet. + * + * Let dsa_slave_xmit() free the SKB + */ + if (__skb_put_padto(skb, ETH_ZLEN + BRCM_TAG_LEN, false)) + return NULL; + skb_push(skb, BRCM_TAG_LEN); if (offset) diff --git a/net/dsa/tag_mtk.c b/net/dsa/tag_mtk.c index 8475434af7d5..11535bc70743 100644 --- a/net/dsa/tag_mtk.c +++ b/net/dsa/tag_mtk.c @@ -13,10 +13,13 @@ */ #include <linux/etherdevice.h> +#include <linux/if_vlan.h> #include "dsa_priv.h" #define MTK_HDR_LEN 4 +#define MTK_HDR_XMIT_UNTAGGED 0 +#define MTK_HDR_XMIT_TAGGED_TPID_8100 1 #define MTK_HDR_RECV_SOURCE_PORT_MASK GENMASK(2, 0) #define MTK_HDR_XMIT_DP_BIT_MASK GENMASK(5, 0) @@ -25,20 +28,37 @@ static struct sk_buff *mtk_tag_xmit(struct sk_buff *skb, { struct dsa_port *dp = dsa_slave_to_port(dev); u8 *mtk_tag; + bool is_vlan_skb = true; - if (skb_cow_head(skb, MTK_HDR_LEN) < 0) - return NULL; - - skb_push(skb, MTK_HDR_LEN); + /* Build the special tag after the MAC Source Address. If VLAN header + * is present, it's required that VLAN header and special tag is + * being combined. Only in this way we can allow the switch can parse + * the both special and VLAN tag at the same time and then look up VLAN + * table with VID. + */ + if (!skb_vlan_tagged(skb)) { + if (skb_cow_head(skb, MTK_HDR_LEN) < 0) + return NULL; - memmove(skb->data, skb->data + MTK_HDR_LEN, 2 * ETH_ALEN); + skb_push(skb, MTK_HDR_LEN); + memmove(skb->data, skb->data + MTK_HDR_LEN, 2 * ETH_ALEN); + is_vlan_skb = false; + } - /* Build the tag after the MAC Source Address */ mtk_tag = skb->data + 2 * ETH_ALEN; - mtk_tag[0] = 0; + + /* Mark tag attribute on special tag insertion to notify hardware + * whether that's a combined special tag with 802.1Q header. + */ + mtk_tag[0] = is_vlan_skb ? MTK_HDR_XMIT_TAGGED_TPID_8100 : + MTK_HDR_XMIT_UNTAGGED; mtk_tag[1] = (1 << dp->index) & MTK_HDR_XMIT_DP_BIT_MASK; - mtk_tag[2] = 0; - mtk_tag[3] = 0; + + /* Tag control information is kept for 802.1Q */ + if (!is_vlan_skb) { + mtk_tag[2] = 0; + mtk_tag[3] = 0; + } return skb; } diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index c6c8ad1d4b6d..47a0a6649a9d 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -43,7 +43,6 @@ obj-$(CONFIG_INET_DIAG) += inet_diag.o obj-$(CONFIG_INET_TCP_DIAG) += tcp_diag.o obj-$(CONFIG_INET_UDP_DIAG) += udp_diag.o obj-$(CONFIG_INET_RAW_DIAG) += raw_diag.o -obj-$(CONFIG_NET_TCPPROBE) += tcp_probe.o obj-$(CONFIG_TCP_CONG_BBR) += tcp_bbr.o obj-$(CONFIG_TCP_CONG_BIC) += tcp_bic.o obj-$(CONFIG_TCP_CONG_CDG) += tcp_cdg.o diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index f00499a46927..c24008daa3d8 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -121,6 +121,7 @@ #endif #include <net/l3mdev.h> +#include <trace/events/sock.h> /* The inetsw table contains everything that inet_create needs to * build a new socket. @@ -789,7 +790,8 @@ int inet_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, int addr_len = 0; int err; - sock_rps_record_flow(sk); + if (likely(!(flags & MSG_ERRQUEUE))) + sock_rps_record_flow(sk); err = sk->sk_prot->recvmsg(sk, msg, size, flags & MSG_DONTWAIT, flags & ~MSG_DONTWAIT, &addr_len); @@ -870,6 +872,9 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) struct sock *sk = sock->sk; int err = 0; struct net *net = sock_net(sk); + void __user *p = (void __user *)arg; + struct ifreq ifr; + struct rtentry rt; switch (cmd) { case SIOCGSTAMP: @@ -880,8 +885,12 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) break; case SIOCADDRT: case SIOCDELRT: + if (copy_from_user(&rt, p, sizeof(struct rtentry))) + return -EFAULT; + err = ip_rt_ioctl(net, cmd, &rt); + break; case SIOCRTMSG: - err = ip_rt_ioctl(net, cmd, (void __user *)arg); + err = -EINVAL; break; case SIOCDARP: case SIOCGARP: @@ -889,17 +898,26 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) err = arp_ioctl(net, cmd, (void __user *)arg); break; case SIOCGIFADDR: - case SIOCSIFADDR: case SIOCGIFBRDADDR: - case SIOCSIFBRDADDR: case SIOCGIFNETMASK: - case SIOCSIFNETMASK: case SIOCGIFDSTADDR: + case SIOCGIFPFLAGS: + if (copy_from_user(&ifr, p, sizeof(struct ifreq))) + return -EFAULT; + err = devinet_ioctl(net, cmd, &ifr); + if (!err && copy_to_user(p, &ifr, sizeof(struct ifreq))) + err = -EFAULT; + break; + + case SIOCSIFADDR: + case SIOCSIFBRDADDR: + case SIOCSIFNETMASK: case SIOCSIFDSTADDR: case SIOCSIFPFLAGS: - case SIOCGIFPFLAGS: case SIOCSIFFLAGS: - err = devinet_ioctl(net, cmd, (void __user *)arg); + if (copy_from_user(&ifr, p, sizeof(struct ifreq))) + return -EFAULT; + err = devinet_ioctl(net, cmd, &ifr); break; default: if (sk->sk_prot->ioctl) @@ -1220,6 +1238,19 @@ int inet_sk_rebuild_header(struct sock *sk) } EXPORT_SYMBOL(inet_sk_rebuild_header); +void inet_sk_set_state(struct sock *sk, int state) +{ + trace_inet_sock_set_state(sk, sk->sk_state, state); + sk->sk_state = state; +} +EXPORT_SYMBOL(inet_sk_set_state); + +void inet_sk_state_store(struct sock *sk, int newstate) +{ + trace_inet_sock_set_state(sk, sk->sk_state, newstate); + smp_store_release(&sk->sk_state, newstate); +} + struct sk_buff *inet_gso_segment(struct sk_buff *skb, netdev_features_t features) { diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 6c231b43974d..f28f06c91ead 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -1425,7 +1425,6 @@ static int arp_seq_open(struct inode *inode, struct file *file) } static const struct file_operations arp_seq_fops = { - .owner = THIS_MODULE, .open = arp_seq_open, .read = seq_read, .llseek = seq_lseek, diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 7a93359fbc72..40f001782c1b 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -946,11 +946,10 @@ static int inet_abc_len(__be32 addr) } -int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg) +int devinet_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr) { - struct ifreq ifr; struct sockaddr_in sin_orig; - struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr; + struct sockaddr_in *sin = (struct sockaddr_in *)&ifr->ifr_addr; struct in_device *in_dev; struct in_ifaddr **ifap = NULL; struct in_ifaddr *ifa = NULL; @@ -959,22 +958,16 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg) int ret = -EFAULT; int tryaddrmatch = 0; - /* - * Fetch the caller's info block into kernel space - */ - - if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) - goto out; - ifr.ifr_name[IFNAMSIZ - 1] = 0; + ifr->ifr_name[IFNAMSIZ - 1] = 0; /* save original address for comparison */ memcpy(&sin_orig, sin, sizeof(*sin)); - colon = strchr(ifr.ifr_name, ':'); + colon = strchr(ifr->ifr_name, ':'); if (colon) *colon = 0; - dev_load(net, ifr.ifr_name); + dev_load(net, ifr->ifr_name); switch (cmd) { case SIOCGIFADDR: /* Get interface address */ @@ -1014,7 +1007,7 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg) rtnl_lock(); ret = -ENODEV; - dev = __dev_get_by_name(net, ifr.ifr_name); + dev = __dev_get_by_name(net, ifr->ifr_name); if (!dev) goto done; @@ -1031,7 +1024,7 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg) This is checked above. */ for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL; ifap = &ifa->ifa_next) { - if (!strcmp(ifr.ifr_name, ifa->ifa_label) && + if (!strcmp(ifr->ifr_name, ifa->ifa_label) && sin_orig.sin_addr.s_addr == ifa->ifa_local) { break; /* found */ @@ -1044,7 +1037,7 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg) if (!ifa) { for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL; ifap = &ifa->ifa_next) - if (!strcmp(ifr.ifr_name, ifa->ifa_label)) + if (!strcmp(ifr->ifr_name, ifa->ifa_label)) break; } } @@ -1055,20 +1048,24 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg) switch (cmd) { case SIOCGIFADDR: /* Get interface address */ + ret = 0; sin->sin_addr.s_addr = ifa->ifa_local; - goto rarok; + break; case SIOCGIFBRDADDR: /* Get the broadcast address */ + ret = 0; sin->sin_addr.s_addr = ifa->ifa_broadcast; - goto rarok; + break; case SIOCGIFDSTADDR: /* Get the destination address */ + ret = 0; sin->sin_addr.s_addr = ifa->ifa_address; - goto rarok; + break; case SIOCGIFNETMASK: /* Get the netmask for the interface */ + ret = 0; sin->sin_addr.s_addr = ifa->ifa_mask; - goto rarok; + break; case SIOCSIFFLAGS: if (colon) { @@ -1076,11 +1073,11 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg) if (!ifa) break; ret = 0; - if (!(ifr.ifr_flags & IFF_UP)) + if (!(ifr->ifr_flags & IFF_UP)) inet_del_ifa(in_dev, ifap, 1); break; } - ret = dev_change_flags(dev, ifr.ifr_flags); + ret = dev_change_flags(dev, ifr->ifr_flags); break; case SIOCSIFADDR: /* Set interface address (and family) */ @@ -1095,7 +1092,7 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg) break; INIT_HLIST_NODE(&ifa->hash); if (colon) - memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ); + memcpy(ifa->ifa_label, ifr->ifr_name, IFNAMSIZ); else memcpy(ifa->ifa_label, dev->name, IFNAMSIZ); } else { @@ -1182,28 +1179,27 @@ done: rtnl_unlock(); out: return ret; -rarok: - rtnl_unlock(); - ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0; - goto out; } -static int inet_gifconf(struct net_device *dev, char __user *buf, int len) +static int inet_gifconf(struct net_device *dev, char __user *buf, int len, int size) { struct in_device *in_dev = __in_dev_get_rtnl(dev); struct in_ifaddr *ifa; struct ifreq ifr; int done = 0; + if (WARN_ON(size > sizeof(struct ifreq))) + goto out; + if (!in_dev) goto out; for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) { if (!buf) { - done += sizeof(ifr); + done += size; continue; } - if (len < (int) sizeof(ifr)) + if (len < size) break; memset(&ifr, 0, sizeof(struct ifreq)); strcpy(ifr.ifr_name, ifa->ifa_label); @@ -1212,13 +1208,12 @@ static int inet_gifconf(struct net_device *dev, char __user *buf, int len) (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr = ifa->ifa_local; - if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) { + if (copy_to_user(buf + done, &ifr, size)) { done = -EFAULT; break; } - buf += sizeof(struct ifreq); - len -= sizeof(struct ifreq); - done += sizeof(struct ifreq); + len -= size; + done += size; } out: return done; diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 61fe6e4d23fc..296d0b956bfe 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -121,14 +121,32 @@ static void esp_ssg_unref(struct xfrm_state *x, void *tmp) static void esp_output_done(struct crypto_async_request *base, int err) { struct sk_buff *skb = base->data; + struct xfrm_offload *xo = xfrm_offload(skb); void *tmp; - struct dst_entry *dst = skb_dst(skb); - struct xfrm_state *x = dst->xfrm; + struct xfrm_state *x; + + if (xo && (xo->flags & XFRM_DEV_RESUME)) + x = skb->sp->xvec[skb->sp->len - 1]; + else + x = skb_dst(skb)->xfrm; tmp = ESP_SKB_CB(skb)->tmp; esp_ssg_unref(x, tmp); kfree(tmp); - xfrm_output_resume(skb, err); + + if (xo && (xo->flags & XFRM_DEV_RESUME)) { + if (err) { + XFRM_INC_STATS(xs_net(x), LINUX_MIB_XFRMOUTSTATEPROTOERROR); + kfree_skb(skb); + return; + } + + skb_push(skb, skb->data - skb_mac_header(skb)); + secpath_reset(skb); + xfrm_dev_resume(skb); + } else { + xfrm_output_resume(skb, err); + } } /* Move ESP header back into place. */ @@ -825,17 +843,13 @@ static int esp_init_aead(struct xfrm_state *x) char aead_name[CRYPTO_MAX_ALG_NAME]; struct crypto_aead *aead; int err; - u32 mask = 0; err = -ENAMETOOLONG; if (snprintf(aead_name, CRYPTO_MAX_ALG_NAME, "%s(%s)", x->geniv, x->aead->alg_name) >= CRYPTO_MAX_ALG_NAME) goto error; - if (x->xso.offload_handle) - mask |= CRYPTO_ALG_ASYNC; - - aead = crypto_alloc_aead(aead_name, 0, mask); + aead = crypto_alloc_aead(aead_name, 0, 0); err = PTR_ERR(aead); if (IS_ERR(aead)) goto error; @@ -865,7 +879,6 @@ static int esp_init_authenc(struct xfrm_state *x) char authenc_name[CRYPTO_MAX_ALG_NAME]; unsigned int keylen; int err; - u32 mask = 0; err = -EINVAL; if (!x->ealg) @@ -891,10 +904,7 @@ static int esp_init_authenc(struct xfrm_state *x) goto error; } - if (x->xso.offload_handle) - mask |= CRYPTO_ALG_ASYNC; - - aead = crypto_alloc_aead(authenc_name, 0, mask); + aead = crypto_alloc_aead(authenc_name, 0, 0); err = PTR_ERR(aead); if (IS_ERR(aead)) goto error; diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c index 29b333a62ab0..da5635fc52c2 100644 --- a/net/ipv4/esp4_offload.c +++ b/net/ipv4/esp4_offload.c @@ -109,78 +109,39 @@ static void esp4_gso_encap(struct xfrm_state *x, struct sk_buff *skb) static struct sk_buff *esp4_gso_segment(struct sk_buff *skb, netdev_features_t features) { - __u32 seq; - int err = 0; - struct sk_buff *skb2; struct xfrm_state *x; struct ip_esp_hdr *esph; struct crypto_aead *aead; - struct sk_buff *segs = ERR_PTR(-EINVAL); netdev_features_t esp_features = features; struct xfrm_offload *xo = xfrm_offload(skb); if (!xo) - goto out; + return ERR_PTR(-EINVAL); if (!(skb_shinfo(skb)->gso_type & SKB_GSO_ESP)) - goto out; - - seq = xo->seq.low; + return ERR_PTR(-EINVAL); x = skb->sp->xvec[skb->sp->len - 1]; aead = x->data; esph = ip_esp_hdr(skb); if (esph->spi != x->id.spi) - goto out; + return ERR_PTR(-EINVAL); if (!pskb_may_pull(skb, sizeof(*esph) + crypto_aead_ivsize(aead))) - goto out; + return ERR_PTR(-EINVAL); __skb_pull(skb, sizeof(*esph) + crypto_aead_ivsize(aead)); skb->encap_hdr_csum = 1; - if (!(features & NETIF_F_HW_ESP)) + if (!(features & NETIF_F_HW_ESP) || !x->xso.offload_handle || + (x->xso.dev != skb->dev)) esp_features = features & ~(NETIF_F_SG | NETIF_F_CSUM_MASK); - segs = x->outer_mode->gso_segment(x, skb, esp_features); - if (IS_ERR_OR_NULL(segs)) - goto out; - - __skb_pull(skb, skb->data - skb_mac_header(skb)); - - skb2 = segs; - do { - struct sk_buff *nskb = skb2->next; - - xo = xfrm_offload(skb2); - xo->flags |= XFRM_GSO_SEGMENT; - xo->seq.low = seq; - xo->seq.hi = xfrm_replay_seqhi(x, seq); - - if(!(features & NETIF_F_HW_ESP)) - xo->flags |= CRYPTO_FALLBACK; - - x->outer_mode->xmit(x, skb2); + xo->flags |= XFRM_GSO_SEGMENT; - err = x->type_offload->xmit(x, skb2, esp_features); - if (err) { - kfree_skb_list(segs); - return ERR_PTR(err); - } - - if (!skb_is_gso(skb2)) - seq++; - else - seq += skb_shinfo(skb2)->gso_segs; - - skb_push(skb2, skb2->mac_len); - skb2 = nskb; - } while (skb2); - -out: - return segs; + return x->outer_mode->gso_segment(x, skb, esp_features); } static int esp_input_tail(struct xfrm_state *x, struct sk_buff *skb) @@ -207,6 +168,7 @@ static int esp_xmit(struct xfrm_state *x, struct sk_buff *skb, netdev_features_ struct crypto_aead *aead; struct esp_info esp; bool hw_offload = true; + __u32 seq; esp.inplace = true; @@ -245,23 +207,30 @@ static int esp_xmit(struct xfrm_state *x, struct sk_buff *skb, netdev_features_ return esp.nfrags; } + seq = xo->seq.low; + esph = esp.esph; esph->spi = x->id.spi; skb_push(skb, -skb_network_offset(skb)); if (xo->flags & XFRM_GSO_SEGMENT) { - esph->seq_no = htonl(xo->seq.low); - } else { - ip_hdr(skb)->tot_len = htons(skb->len); - ip_send_check(ip_hdr(skb)); + esph->seq_no = htonl(seq); + + if (!skb_is_gso(skb)) + xo->seq.low++; + else + xo->seq.low += skb_shinfo(skb)->gso_segs; } + esp.seqno = cpu_to_be64(seq + ((u64)xo->seq.hi << 32)); + + ip_hdr(skb)->tot_len = htons(skb->len); + ip_send_check(ip_hdr(skb)); + if (hw_offload) return 0; - esp.seqno = cpu_to_be64(xo->seq.low + ((u64)xo->seq.hi << 32)); - err = esp_output_tail(x, skb, &esp); if (err) return err; diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 08259d078b1c..f05afaf3235c 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -587,10 +587,9 @@ static int rtentry_to_fib_config(struct net *net, int cmd, struct rtentry *rt, * Handle IP routing ioctl calls. * These are used to manipulate the routing tables */ -int ip_rt_ioctl(struct net *net, unsigned int cmd, void __user *arg) +int ip_rt_ioctl(struct net *net, unsigned int cmd, struct rtentry *rt) { struct fib_config cfg; - struct rtentry rt; int err; switch (cmd) { @@ -599,11 +598,8 @@ int ip_rt_ioctl(struct net *net, unsigned int cmd, void __user *arg) if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) return -EPERM; - if (copy_from_user(&rt, arg, sizeof(rt))) - return -EFAULT; - rtnl_lock(); - err = rtentry_to_fib_config(net, cmd, &rt, &cfg); + err = rtentry_to_fib_config(net, cmd, rt, &cfg); if (err == 0) { struct fib_table *tb; diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 5ddc4aefff12..5530cd6fdbc7 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -2334,7 +2334,6 @@ static int fib_triestat_seq_open(struct inode *inode, struct file *file) } static const struct file_operations fib_triestat_fops = { - .owner = THIS_MODULE, .open = fib_triestat_seq_open, .read = seq_read, .llseek = seq_lseek, @@ -2521,7 +2520,6 @@ static int fib_trie_seq_open(struct inode *inode, struct file *file) } static const struct file_operations fib_trie_fops = { - .owner = THIS_MODULE, .open = fib_trie_seq_open, .read = seq_read, .llseek = seq_lseek, @@ -2715,7 +2713,6 @@ static int fib_route_seq_open(struct inode *inode, struct file *file) } static const struct file_operations fib_route_fops = { - .owner = THIS_MODULE, .open = fib_route_seq_open, .read = seq_read, .llseek = seq_lseek, diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 2d49717a7421..10f7f74a0831 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -2832,7 +2832,6 @@ static int igmp_mc_seq_open(struct inode *inode, struct file *file) } static const struct file_operations igmp_mc_seq_fops = { - .owner = THIS_MODULE, .open = igmp_mc_seq_open, .read = seq_read, .llseek = seq_lseek, @@ -2979,7 +2978,6 @@ static int igmp_mcf_seq_open(struct inode *inode, struct file *file) } static const struct file_operations igmp_mcf_seq_fops = { - .owner = THIS_MODULE, .open = igmp_mcf_seq_open, .read = seq_read, .llseek = seq_lseek, diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 4ca46dc08e63..12410ec6f7f7 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -685,7 +685,7 @@ static void reqsk_timer_handler(struct timer_list *t) int max_retries, thresh; u8 defer_accept; - if (sk_state_load(sk_listener) != TCP_LISTEN) + if (inet_sk_state_load(sk_listener) != TCP_LISTEN) goto drop; max_retries = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_synack_retries; @@ -783,7 +783,7 @@ struct sock *inet_csk_clone_lock(const struct sock *sk, if (newsk) { struct inet_connection_sock *newicsk = inet_csk(newsk); - newsk->sk_state = TCP_SYN_RECV; + inet_sk_set_state(newsk, TCP_SYN_RECV); newicsk->icsk_bind_hash = NULL; inet_sk(newsk)->inet_dport = inet_rsk(req)->ir_rmt_port; @@ -877,7 +877,7 @@ int inet_csk_listen_start(struct sock *sk, int backlog) * It is OK, because this socket enters to hash table only * after validation is complete. */ - sk_state_store(sk, TCP_LISTEN); + inet_sk_state_store(sk, TCP_LISTEN); if (!sk->sk_prot->get_port(sk, inet->inet_num)) { inet->inet_sport = htons(inet->inet_num); @@ -888,7 +888,7 @@ int inet_csk_listen_start(struct sock *sk, int backlog) return 0; } - sk->sk_state = TCP_CLOSE; + inet_sk_set_state(sk, TCP_CLOSE); return err; } EXPORT_SYMBOL_GPL(inet_csk_listen_start); diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index c9c35b61a027..a383f299ce24 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -564,12 +564,18 @@ static int inet_diag_bc_run(const struct nlattr *_bc, case INET_DIAG_BC_JMP: yes = 0; break; + case INET_DIAG_BC_S_EQ: + yes = entry->sport == op[1].no; + break; case INET_DIAG_BC_S_GE: yes = entry->sport >= op[1].no; break; case INET_DIAG_BC_S_LE: yes = entry->sport <= op[1].no; break; + case INET_DIAG_BC_D_EQ: + yes = entry->dport == op[1].no; + break; case INET_DIAG_BC_D_GE: yes = entry->dport >= op[1].no; break; @@ -802,8 +808,10 @@ static int inet_diag_bc_audit(const struct nlattr *attr, if (!valid_devcond(bc, len, &min_len)) return -EINVAL; break; + case INET_DIAG_BC_S_EQ: case INET_DIAG_BC_S_GE: case INET_DIAG_BC_S_LE: + case INET_DIAG_BC_D_EQ: case INET_DIAG_BC_D_GE: case INET_DIAG_BC_D_LE: if (!valid_port_comparison(bc, len, &min_len)) diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index e7d15fb0d94d..37b7da0b975d 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -19,6 +19,7 @@ #include <linux/slab.h> #include <linux/wait.h> #include <linux/vmalloc.h> +#include <linux/bootmem.h> #include <net/addrconf.h> #include <net/inet_connection_sock.h> @@ -168,6 +169,60 @@ int __inet_inherit_port(const struct sock *sk, struct sock *child) } EXPORT_SYMBOL_GPL(__inet_inherit_port); +static struct inet_listen_hashbucket * +inet_lhash2_bucket_sk(struct inet_hashinfo *h, struct sock *sk) +{ + u32 hash; + +#if IS_ENABLED(CONFIG_IPV6) + if (sk->sk_family == AF_INET6) + hash = ipv6_portaddr_hash(sock_net(sk), + &sk->sk_v6_rcv_saddr, + inet_sk(sk)->inet_num); + else +#endif + hash = ipv4_portaddr_hash(sock_net(sk), + inet_sk(sk)->inet_rcv_saddr, + inet_sk(sk)->inet_num); + return inet_lhash2_bucket(h, hash); +} + +static void inet_hash2(struct inet_hashinfo *h, struct sock *sk) +{ + struct inet_listen_hashbucket *ilb2; + + if (!h->lhash2) + return; + + ilb2 = inet_lhash2_bucket_sk(h, sk); + + spin_lock(&ilb2->lock); + if (sk->sk_reuseport && sk->sk_family == AF_INET6) + hlist_add_tail_rcu(&inet_csk(sk)->icsk_listen_portaddr_node, + &ilb2->head); + else + hlist_add_head_rcu(&inet_csk(sk)->icsk_listen_portaddr_node, + &ilb2->head); + ilb2->count++; + spin_unlock(&ilb2->lock); +} + +static void inet_unhash2(struct inet_hashinfo *h, struct sock *sk) +{ + struct inet_listen_hashbucket *ilb2; + + if (!h->lhash2 || + WARN_ON_ONCE(hlist_unhashed(&inet_csk(sk)->icsk_listen_portaddr_node))) + return; + + ilb2 = inet_lhash2_bucket_sk(h, sk); + + spin_lock(&ilb2->lock); + hlist_del_init_rcu(&inet_csk(sk)->icsk_listen_portaddr_node); + ilb2->count--; + spin_unlock(&ilb2->lock); +} + static inline int compute_score(struct sock *sk, struct net *net, const unsigned short hnum, const __be32 daddr, const int dif, const int sdif, bool exact_dif) @@ -207,6 +262,40 @@ static inline int compute_score(struct sock *sk, struct net *net, */ /* called with rcu_read_lock() : No refcount taken on the socket */ +static struct sock *inet_lhash2_lookup(struct net *net, + struct inet_listen_hashbucket *ilb2, + struct sk_buff *skb, int doff, + const __be32 saddr, __be16 sport, + const __be32 daddr, const unsigned short hnum, + const int dif, const int sdif) +{ + bool exact_dif = inet_exact_dif_match(net, skb); + struct inet_connection_sock *icsk; + struct sock *sk, *result = NULL; + int score, hiscore = 0; + u32 phash = 0; + + inet_lhash2_for_each_icsk_rcu(icsk, &ilb2->head) { + sk = (struct sock *)icsk; + score = compute_score(sk, net, hnum, daddr, + dif, sdif, exact_dif); + if (score > hiscore) { + if (sk->sk_reuseport) { + phash = inet_ehashfn(net, daddr, hnum, + saddr, sport); + result = reuseport_select_sock(sk, phash, + skb, doff); + if (result) + return result; + } + result = sk; + hiscore = score; + } + } + + return result; +} + struct sock *__inet_lookup_listener(struct net *net, struct inet_hashinfo *hashinfo, struct sk_buff *skb, int doff, @@ -216,32 +305,57 @@ struct sock *__inet_lookup_listener(struct net *net, { unsigned int hash = inet_lhashfn(net, hnum); struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash]; - int score, hiscore = 0, matches = 0, reuseport = 0; bool exact_dif = inet_exact_dif_match(net, skb); + struct inet_listen_hashbucket *ilb2; struct sock *sk, *result = NULL; + int score, hiscore = 0; + unsigned int hash2; u32 phash = 0; + if (ilb->count <= 10 || !hashinfo->lhash2) + goto port_lookup; + + /* Too many sk in the ilb bucket (which is hashed by port alone). + * Try lhash2 (which is hashed by port and addr) instead. + */ + + hash2 = ipv4_portaddr_hash(net, daddr, hnum); + ilb2 = inet_lhash2_bucket(hashinfo, hash2); + if (ilb2->count > ilb->count) + goto port_lookup; + + result = inet_lhash2_lookup(net, ilb2, skb, doff, + saddr, sport, daddr, hnum, + dif, sdif); + if (result) + return result; + + /* Lookup lhash2 with INADDR_ANY */ + + hash2 = ipv4_portaddr_hash(net, htonl(INADDR_ANY), hnum); + ilb2 = inet_lhash2_bucket(hashinfo, hash2); + if (ilb2->count > ilb->count) + goto port_lookup; + + return inet_lhash2_lookup(net, ilb2, skb, doff, + saddr, sport, daddr, hnum, + dif, sdif); + +port_lookup: sk_for_each_rcu(sk, &ilb->head) { score = compute_score(sk, net, hnum, daddr, dif, sdif, exact_dif); if (score > hiscore) { - reuseport = sk->sk_reuseport; - if (reuseport) { + if (sk->sk_reuseport) { phash = inet_ehashfn(net, daddr, hnum, saddr, sport); result = reuseport_select_sock(sk, phash, skb, doff); if (result) return result; - matches = 1; } result = sk; hiscore = score; - } else if (score == hiscore && reuseport) { - matches++; - if (reciprocal_scale(phash, matches) == 0) - result = sk; - phash = next_pseudo_random32(phash); } } return result; @@ -430,7 +544,7 @@ bool inet_ehash_nolisten(struct sock *sk, struct sock *osk) sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); } else { percpu_counter_inc(sk->sk_prot->orphan_count); - sk->sk_state = TCP_CLOSE; + inet_sk_set_state(sk, TCP_CLOSE); sock_set_flag(sk, SOCK_DEAD); inet_csk_destroy_sock(sk); } @@ -483,6 +597,8 @@ int __inet_hash(struct sock *sk, struct sock *osk) hlist_add_tail_rcu(&sk->sk_node, &ilb->head); else hlist_add_head_rcu(&sk->sk_node, &ilb->head); + inet_hash2(hashinfo, sk); + ilb->count++; sock_set_flag(sk, SOCK_RCU_FREE); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); unlock: @@ -509,28 +625,35 @@ EXPORT_SYMBOL_GPL(inet_hash); void inet_unhash(struct sock *sk) { struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; + struct inet_listen_hashbucket *ilb; spinlock_t *lock; bool listener = false; - int done; if (sk_unhashed(sk)) return; if (sk->sk_state == TCP_LISTEN) { - lock = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)].lock; + ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)]; + lock = &ilb->lock; listener = true; } else { lock = inet_ehash_lockp(hashinfo, sk->sk_hash); } spin_lock_bh(lock); + if (sk_unhashed(sk)) + goto unlock; + if (rcu_access_pointer(sk->sk_reuseport_cb)) reuseport_detach_sock(sk); - if (listener) - done = __sk_del_node_init(sk); - else - done = __sk_nulls_del_node_init_rcu(sk); - if (done) - sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); + if (listener) { + inet_unhash2(hashinfo, sk); + __sk_del_node_init(sk); + ilb->count--; + } else { + __sk_nulls_del_node_init_rcu(sk); + } + sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); +unlock: spin_unlock_bh(lock); } EXPORT_SYMBOL_GPL(inet_unhash); @@ -665,10 +788,37 @@ void inet_hashinfo_init(struct inet_hashinfo *h) for (i = 0; i < INET_LHTABLE_SIZE; i++) { spin_lock_init(&h->listening_hash[i].lock); INIT_HLIST_HEAD(&h->listening_hash[i].head); + h->listening_hash[i].count = 0; } + + h->lhash2 = NULL; } EXPORT_SYMBOL_GPL(inet_hashinfo_init); +void __init inet_hashinfo2_init(struct inet_hashinfo *h, const char *name, + unsigned long numentries, int scale, + unsigned long low_limit, + unsigned long high_limit) +{ + unsigned int i; + + h->lhash2 = alloc_large_system_hash(name, + sizeof(*h->lhash2), + numentries, + scale, + 0, + NULL, + &h->lhash2_mask, + low_limit, + high_limit); + + for (i = 0; i <= h->lhash2_mask; i++) { + spin_lock_init(&h->lhash2[i].lock); + INIT_HLIST_HEAD(&h->lhash2[i].head); + h->lhash2[i].count = 0; + } +} + int inet_ehash_locks_alloc(struct inet_hashinfo *hashinfo) { unsigned int locksz = sizeof(spinlock_t); diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index b563e0c46bac..c3ea4906d237 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -97,7 +97,7 @@ static void inet_twsk_add_bind_node(struct inet_timewait_sock *tw, * Essentially we whip up a timewait bucket, copy the relevant info into it * from the SK, and mess with hash chains and list linkage. */ -void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, +void inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, struct inet_hashinfo *hashinfo) { const struct inet_sock *inet = inet_sk(sk); @@ -119,18 +119,6 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, spin_lock(lock); - /* - * Step 2: Hash TW into tcp ehash chain. - * Notes : - * - tw_refcnt is set to 4 because : - * - We have one reference from bhash chain. - * - We have one reference from ehash chain. - * - We have one reference from timer. - * - One reference for ourself (our caller will release it). - * We can use atomic_set() because prior spin_lock()/spin_unlock() - * committed into memory all tw fields. - */ - refcount_set(&tw->tw_refcnt, 4); inet_twsk_add_node_rcu(tw, &ehead->chain); /* Step 3: Remove SK from hash chain */ @@ -138,8 +126,19 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); spin_unlock(lock); + + /* tw_refcnt is set to 3 because we have : + * - one reference for bhash chain. + * - one reference for ehash chain. + * - one reference for timer. + * We can use atomic_set() because prior spin_lock()/spin_unlock() + * committed into memory all tw fields. + * Also note that after this point, we lost our implicit reference + * so we are not allowed to use tw anymore. + */ + refcount_set(&tw->tw_refcnt, 3); } -EXPORT_SYMBOL_GPL(__inet_twsk_hashdance); +EXPORT_SYMBOL_GPL(inet_twsk_hashdance); static void tw_timer_handler(struct timer_list *t) { @@ -271,14 +270,14 @@ restart: continue; tw = inet_twsk(sk); if ((tw->tw_family != family) || - atomic_read(&twsk_net(tw)->count)) + refcount_read(&twsk_net(tw)->count)) continue; if (unlikely(!refcount_inc_not_zero(&tw->tw_refcnt))) continue; if (unlikely((tw->tw_family != family) || - atomic_read(&twsk_net(tw)->count))) { + refcount_read(&twsk_net(tw)->count))) { inet_twsk_put(tw); goto restart; } diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 45ffd3d045d2..6ec670fbbbdd 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -114,7 +114,8 @@ MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN"); static struct rtnl_link_ops ipgre_link_ops __read_mostly; static int ipgre_tunnel_init(struct net_device *dev); static void erspan_build_header(struct sk_buff *skb, - __be32 id, u32 index, bool truncate); + u32 id, u32 index, + bool truncate, bool is_ipv4); static unsigned int ipgre_net_id __read_mostly; static unsigned int gre_tap_net_id __read_mostly; @@ -255,34 +256,43 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi, { struct net *net = dev_net(skb->dev); struct metadata_dst *tun_dst = NULL; + struct erspan_base_hdr *ershdr; + struct erspan_metadata *pkt_md; struct ip_tunnel_net *itn; struct ip_tunnel *tunnel; - struct erspanhdr *ershdr; const struct iphdr *iph; - __be32 index; + int ver; int len; itn = net_generic(net, erspan_net_id); len = gre_hdr_len + sizeof(*ershdr); + /* Check based hdr len */ if (unlikely(!pskb_may_pull(skb, len))) return PACKET_REJECT; iph = ip_hdr(skb); - ershdr = (struct erspanhdr *)(skb->data + gre_hdr_len); + ershdr = (struct erspan_base_hdr *)(skb->data + gre_hdr_len); + ver = ershdr->ver; /* The original GRE header does not have key field, * Use ERSPAN 10-bit session ID as key. */ - tpi->key = cpu_to_be32(ntohs(ershdr->session_id) & ID_MASK); - index = ershdr->md.index; + tpi->key = cpu_to_be32(get_session_id(ershdr)); tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags | TUNNEL_KEY, iph->saddr, iph->daddr, tpi->key); if (tunnel) { + len = gre_hdr_len + erspan_hdr_len(ver); + if (unlikely(!pskb_may_pull(skb, len))) + return PACKET_REJECT; + + ershdr = (struct erspan_base_hdr *)(skb->data + gre_hdr_len); + pkt_md = (struct erspan_metadata *)(ershdr + 1); + if (__iptunnel_pull_header(skb, - gre_hdr_len + sizeof(*ershdr), + len, htons(ETH_P_TEB), false, false) < 0) goto drop; @@ -303,15 +313,21 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi, return PACKET_REJECT; md = ip_tunnel_info_opts(&tun_dst->u.tun_info); - if (!md) - return PACKET_REJECT; + memcpy(md, pkt_md, sizeof(*md)); + md->version = ver; - md->index = index; info = &tun_dst->u.tun_info; info->key.tun_flags |= TUNNEL_ERSPAN_OPT; info->options_len = sizeof(*md); } else { - tunnel->index = ntohl(index); + tunnel->erspan_ver = ver; + if (ver == 1) { + tunnel->index = ntohl(pkt_md->u.index); + } else { + tunnel->dir = pkt_md->u.md2.dir; + tunnel->hwid = get_hwid(&pkt_md->u.md2); + } + } skb_reset_mac_header(skb); @@ -405,14 +421,17 @@ static int gre_rcv(struct sk_buff *skb) if (hdr_len < 0) goto drop; - if (unlikely(tpi.proto == htons(ETH_P_ERSPAN))) { + if (unlikely(tpi.proto == htons(ETH_P_ERSPAN) || + tpi.proto == htons(ETH_P_ERSPAN2))) { if (erspan_rcv(skb, &tpi, hdr_len) == PACKET_RCVD) return 0; + goto out; } if (ipgre_rcv(skb, &tpi, hdr_len) == PACKET_RCVD) return 0; +out: icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); drop: kfree_skb(skb); @@ -560,6 +579,7 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev, bool truncate = false; struct flowi4 fl; int tunnel_hlen; + int version; __be16 df; tun_info = skb_tunnel_info(skb); @@ -568,9 +588,13 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev, goto err_free_skb; key = &tun_info->key; + md = ip_tunnel_info_opts(tun_info); + if (!md) + goto err_free_rt; /* ERSPAN has fixed 8 byte GRE header */ - tunnel_hlen = 8 + sizeof(struct erspanhdr); + version = md->version; + tunnel_hlen = 8 + erspan_hdr_len(version); rt = prepare_fb_xmit(skb, dev, &fl, tunnel_hlen); if (!rt) @@ -584,12 +608,18 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev, truncate = true; } - md = ip_tunnel_info_opts(tun_info); - if (!md) + if (version == 1) { + erspan_build_header(skb, ntohl(tunnel_id_to_key32(key->tun_id)), + ntohl(md->u.index), truncate, true); + } else if (version == 2) { + erspan_build_header_v2(skb, + ntohl(tunnel_id_to_key32(key->tun_id)), + md->u.md2.dir, + get_hwid(&md->u.md2), + truncate, true); + } else { goto err_free_rt; - - erspan_build_header(skb, tunnel_id_to_key32(key->tun_id), - ntohl(md->index), truncate); + } gre_build_header(skb, 8, TUNNEL_SEQ, htons(ETH_P_ERSPAN), 0, htonl(tunnel->o_seqno++)); @@ -668,52 +698,6 @@ free_skb: return NETDEV_TX_OK; } -static inline u8 tos_to_cos(u8 tos) -{ - u8 dscp, cos; - - dscp = tos >> 2; - cos = dscp >> 3; - return cos; -} - -static void erspan_build_header(struct sk_buff *skb, - __be32 id, u32 index, bool truncate) -{ - struct iphdr *iphdr = ip_hdr(skb); - struct ethhdr *eth = eth_hdr(skb); - enum erspan_encap_type enc_type; - struct erspanhdr *ershdr; - struct qtag_prefix { - __be16 eth_type; - __be16 tci; - } *qp; - u16 vlan_tci = 0; - - enc_type = ERSPAN_ENCAP_NOVLAN; - - /* If mirrored packet has vlan tag, extract tci and - * perserve vlan header in the mirrored frame. - */ - if (eth->h_proto == htons(ETH_P_8021Q)) { - qp = (struct qtag_prefix *)(skb->data + 2 * ETH_ALEN); - vlan_tci = ntohs(qp->tci); - enc_type = ERSPAN_ENCAP_INFRAME; - } - - skb_push(skb, sizeof(*ershdr)); - ershdr = (struct erspanhdr *)skb->data; - memset(ershdr, 0, sizeof(*ershdr)); - - ershdr->ver_vlan = htons((vlan_tci & VLAN_MASK) | - (ERSPAN_VERSION << VER_OFFSET)); - ershdr->session_id = htons((u16)(ntohl(id) & ID_MASK) | - ((tos_to_cos(iphdr->tos) << COS_OFFSET) & COS_MASK) | - (enc_type << EN_OFFSET & EN_MASK) | - ((truncate << T_OFFSET) & T_MASK)); - ershdr->md.index = htonl(index & INDEX_MASK); -} - static netdev_tx_t erspan_xmit(struct sk_buff *skb, struct net_device *dev) { @@ -737,7 +721,15 @@ static netdev_tx_t erspan_xmit(struct sk_buff *skb, } /* Push ERSPAN header */ - erspan_build_header(skb, tunnel->parms.o_key, tunnel->index, truncate); + if (tunnel->erspan_ver == 1) + erspan_build_header(skb, ntohl(tunnel->parms.o_key), + tunnel->index, + truncate, true); + else + erspan_build_header_v2(skb, ntohl(tunnel->parms.o_key), + tunnel->dir, tunnel->hwid, + truncate, true); + tunnel->parms.o_flags &= ~TUNNEL_KEY; __gre_xmit(skb, dev, &tunnel->parms.iph, htons(ETH_P_ERSPAN)); return NETDEV_TX_OK; @@ -1209,13 +1201,32 @@ static int ipgre_netlink_parms(struct net_device *dev, if (data[IFLA_GRE_FWMARK]) *fwmark = nla_get_u32(data[IFLA_GRE_FWMARK]); - if (data[IFLA_GRE_ERSPAN_INDEX]) { - t->index = nla_get_u32(data[IFLA_GRE_ERSPAN_INDEX]); + if (data[IFLA_GRE_ERSPAN_VER]) { + t->erspan_ver = nla_get_u8(data[IFLA_GRE_ERSPAN_VER]); - if (t->index & ~INDEX_MASK) + if (t->erspan_ver != 1 && t->erspan_ver != 2) return -EINVAL; } + if (t->erspan_ver == 1) { + if (data[IFLA_GRE_ERSPAN_INDEX]) { + t->index = nla_get_u32(data[IFLA_GRE_ERSPAN_INDEX]); + if (t->index & ~INDEX_MASK) + return -EINVAL; + } + } else if (t->erspan_ver == 2) { + if (data[IFLA_GRE_ERSPAN_DIR]) { + t->dir = nla_get_u8(data[IFLA_GRE_ERSPAN_DIR]); + if (t->dir & ~(DIR_MASK >> DIR_OFFSET)) + return -EINVAL; + } + if (data[IFLA_GRE_ERSPAN_HWID]) { + t->hwid = nla_get_u16(data[IFLA_GRE_ERSPAN_HWID]); + if (t->hwid & ~(HWID_MASK >> HWID_OFFSET)) + return -EINVAL; + } + } + return 0; } @@ -1282,7 +1293,7 @@ static int erspan_tunnel_init(struct net_device *dev) tunnel->tun_hlen = 8; tunnel->parms.iph.protocol = IPPROTO_GRE; tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen + - sizeof(struct erspanhdr); + erspan_hdr_len(tunnel->erspan_ver); t_hlen = tunnel->hlen + sizeof(struct iphdr); dev->needed_headroom = LL_MAX_HEADER + t_hlen + 4; @@ -1413,6 +1424,12 @@ static size_t ipgre_get_size(const struct net_device *dev) nla_total_size(4) + /* IFLA_GRE_ERSPAN_INDEX */ nla_total_size(4) + + /* IFLA_GRE_ERSPAN_VER */ + nla_total_size(1) + + /* IFLA_GRE_ERSPAN_DIR */ + nla_total_size(1) + + /* IFLA_GRE_ERSPAN_HWID */ + nla_total_size(2) + 0; } @@ -1455,9 +1472,18 @@ static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev) goto nla_put_failure; } - if (t->index) + if (nla_put_u8(skb, IFLA_GRE_ERSPAN_VER, t->erspan_ver)) + goto nla_put_failure; + + if (t->erspan_ver == 1) { if (nla_put_u32(skb, IFLA_GRE_ERSPAN_INDEX, t->index)) goto nla_put_failure; + } else if (t->erspan_ver == 2) { + if (nla_put_u8(skb, IFLA_GRE_ERSPAN_DIR, t->dir)) + goto nla_put_failure; + if (nla_put_u16(skb, IFLA_GRE_ERSPAN_HWID, t->hwid)) + goto nla_put_failure; + } return 0; @@ -1493,6 +1519,9 @@ static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = { [IFLA_GRE_IGNORE_DF] = { .type = NLA_U8 }, [IFLA_GRE_FWMARK] = { .type = NLA_U32 }, [IFLA_GRE_ERSPAN_INDEX] = { .type = NLA_U32 }, + [IFLA_GRE_ERSPAN_VER] = { .type = NLA_U8 }, + [IFLA_GRE_ERSPAN_DIR] = { .type = NLA_U8 }, + [IFLA_GRE_ERSPAN_HWID] = { .type = NLA_U16 }, }; static struct rtnl_link_ops ipgre_link_ops __read_mostly = { diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 60fb1eb7d7d8..6cc70fa488cb 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -808,6 +808,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, { struct net_device *dev = NULL; int ifindex; + int midx; if (optlen != sizeof(int)) goto e_inval; @@ -823,10 +824,13 @@ static int do_ip_setsockopt(struct sock *sk, int level, err = -EADDRNOTAVAIL; if (!dev) break; + + midx = l3mdev_master_ifindex(dev); dev_put(dev); err = -EINVAL; - if (sk->sk_bound_dev_if) + if (sk->sk_bound_dev_if && + (!midx || midx != sk->sk_bound_dev_if)) break; inet->uc_index = ifindex; diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 6d21068f9b55..d786a8441bce 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -710,9 +710,16 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, } } - init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr, - tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link, - tunnel->fwmark); + if (tunnel->fwmark) { + init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr, + tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link, + tunnel->fwmark); + } + else { + init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr, + tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link, + skb->mark); + } if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0) goto tx_error; diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index abdebca848c9..f75802ad960f 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -329,39 +329,6 @@ set_sockaddr(struct sockaddr_in *sin, __be32 addr, __be16 port) sin->sin_port = port; } -static int __init ic_devinet_ioctl(unsigned int cmd, struct ifreq *arg) -{ - int res; - - mm_segment_t oldfs = get_fs(); - set_fs(get_ds()); - res = devinet_ioctl(&init_net, cmd, (struct ifreq __user *) arg); - set_fs(oldfs); - return res; -} - -static int __init ic_dev_ioctl(unsigned int cmd, struct ifreq *arg) -{ - int res; - - mm_segment_t oldfs = get_fs(); - set_fs(get_ds()); - res = dev_ioctl(&init_net, cmd, (struct ifreq __user *) arg); - set_fs(oldfs); - return res; -} - -static int __init ic_route_ioctl(unsigned int cmd, struct rtentry *arg) -{ - int res; - - mm_segment_t oldfs = get_fs(); - set_fs(get_ds()); - res = ip_rt_ioctl(&init_net, cmd, (void __user *) arg); - set_fs(oldfs); - return res; -} - /* * Set up interface addresses and routes. */ @@ -375,19 +342,19 @@ static int __init ic_setup_if(void) memset(&ir, 0, sizeof(ir)); strcpy(ir.ifr_ifrn.ifrn_name, ic_dev->dev->name); set_sockaddr(sin, ic_myaddr, 0); - if ((err = ic_devinet_ioctl(SIOCSIFADDR, &ir)) < 0) { + if ((err = devinet_ioctl(&init_net, SIOCSIFADDR, &ir)) < 0) { pr_err("IP-Config: Unable to set interface address (%d)\n", err); return -1; } set_sockaddr(sin, ic_netmask, 0); - if ((err = ic_devinet_ioctl(SIOCSIFNETMASK, &ir)) < 0) { + if ((err = devinet_ioctl(&init_net, SIOCSIFNETMASK, &ir)) < 0) { pr_err("IP-Config: Unable to set interface netmask (%d)\n", err); return -1; } set_sockaddr(sin, ic_myaddr | ~ic_netmask, 0); - if ((err = ic_devinet_ioctl(SIOCSIFBRDADDR, &ir)) < 0) { + if ((err = devinet_ioctl(&init_net, SIOCSIFBRDADDR, &ir)) < 0) { pr_err("IP-Config: Unable to set interface broadcast address (%d)\n", err); return -1; @@ -397,11 +364,11 @@ static int __init ic_setup_if(void) * out, we'll try to muddle along. */ if (ic_dev_mtu != 0) { - strcpy(ir.ifr_name, ic_dev->dev->name); - ir.ifr_mtu = ic_dev_mtu; - if ((err = ic_dev_ioctl(SIOCSIFMTU, &ir)) < 0) + rtnl_lock(); + if ((err = dev_set_mtu(ic_dev->dev, ic_dev_mtu)) < 0) pr_err("IP-Config: Unable to set interface mtu to %d (%d)\n", ic_dev_mtu, err); + rtnl_unlock(); } return 0; } @@ -423,7 +390,7 @@ static int __init ic_setup_routes(void) set_sockaddr((struct sockaddr_in *) &rm.rt_genmask, 0, 0); set_sockaddr((struct sockaddr_in *) &rm.rt_gateway, ic_gateway, 0); rm.rt_flags = RTF_UP | RTF_GATEWAY; - if ((err = ic_route_ioctl(SIOCADDRT, &rm)) < 0) { + if ((err = ip_rt_ioctl(&init_net, SIOCADDRT, &rm)) < 0) { pr_err("IP-Config: Cannot add default route (%d)\n", err); return -1; @@ -1322,7 +1289,6 @@ static int pnp_seq_open(struct inode *indoe, struct file *file) } static const struct file_operations pnp_seq_fops = { - .owner = THIS_MODULE, .open = pnp_seq_open, .read = seq_read, .llseek = seq_lseek, diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index fd5f19c988e4..b05689bbba31 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -3022,7 +3022,7 @@ static int ipmr_vif_seq_show(struct seq_file *seq, void *v) const char *name = vif->dev ? vif->dev->name : "none"; seq_printf(seq, - "%2zd %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n", + "%2td %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n", vif - mrt->vif_table, name, vif->bytes_in, vif->pkt_in, vif->bytes_out, vif->pkt_out, @@ -3045,7 +3045,6 @@ static int ipmr_vif_open(struct inode *inode, struct file *file) } static const struct file_operations ipmr_vif_fops = { - .owner = THIS_MODULE, .open = ipmr_vif_open, .read = seq_read, .llseek = seq_lseek, @@ -3198,7 +3197,6 @@ static int ipmr_mfc_open(struct inode *inode, struct file *file) } static const struct file_operations ipmr_mfc_fops = { - .owner = THIS_MODULE, .open = ipmr_mfc_open, .read = seq_read, .llseek = seq_lseek, diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index c0cc6aa8cfaa..e6774ccb7731 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c @@ -80,35 +80,7 @@ int ip_route_me_harder(struct net *net, struct sk_buff *skb, unsigned int addr_t } EXPORT_SYMBOL(ip_route_me_harder); -/* - * Extra routing may needed on local out, as the QUEUE target never - * returns control to the table. - */ - -struct ip_rt_info { - __be32 daddr; - __be32 saddr; - u_int8_t tos; - u_int32_t mark; -}; - -static void nf_ip_saveroute(const struct sk_buff *skb, - struct nf_queue_entry *entry) -{ - struct ip_rt_info *rt_info = nf_queue_entry_reroute(entry); - - if (entry->state.hook == NF_INET_LOCAL_OUT) { - const struct iphdr *iph = ip_hdr(skb); - - rt_info->tos = iph->tos; - rt_info->daddr = iph->daddr; - rt_info->saddr = iph->saddr; - rt_info->mark = skb->mark; - } -} - -static int nf_ip_reroute(struct net *net, struct sk_buff *skb, - const struct nf_queue_entry *entry) +int nf_ip_reroute(struct sk_buff *skb, const struct nf_queue_entry *entry) { const struct ip_rt_info *rt_info = nf_queue_entry_reroute(entry); @@ -119,10 +91,12 @@ static int nf_ip_reroute(struct net *net, struct sk_buff *skb, skb->mark == rt_info->mark && iph->daddr == rt_info->daddr && iph->saddr == rt_info->saddr)) - return ip_route_me_harder(net, skb, RTN_UNSPEC); + return ip_route_me_harder(entry->state.net, skb, + RTN_UNSPEC); } return 0; } +EXPORT_SYMBOL_GPL(nf_ip_reroute); __sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook, unsigned int dataoff, u_int8_t protocol) @@ -155,9 +129,9 @@ __sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook, } EXPORT_SYMBOL(nf_ip_checksum); -static __sum16 nf_ip_checksum_partial(struct sk_buff *skb, unsigned int hook, - unsigned int dataoff, unsigned int len, - u_int8_t protocol) +__sum16 nf_ip_checksum_partial(struct sk_buff *skb, unsigned int hook, + unsigned int dataoff, unsigned int len, + u_int8_t protocol) { const struct iphdr *iph = ip_hdr(skb); __sum16 csum = 0; @@ -175,9 +149,10 @@ static __sum16 nf_ip_checksum_partial(struct sk_buff *skb, unsigned int hook, } return csum; } +EXPORT_SYMBOL_GPL(nf_ip_checksum_partial); -static int nf_ip_route(struct net *net, struct dst_entry **dst, - struct flowi *fl, bool strict __always_unused) +int nf_ip_route(struct net *net, struct dst_entry **dst, struct flowi *fl, + bool strict __always_unused) { struct rtable *rt = ip_route_output_key(net, &fl->u.ip4); if (IS_ERR(rt)) @@ -185,19 +160,4 @@ static int nf_ip_route(struct net *net, struct dst_entry **dst, *dst = &rt->dst; return 0; } - -static const struct nf_afinfo nf_ip_afinfo = { - .family = AF_INET, - .checksum = nf_ip_checksum, - .checksum_partial = nf_ip_checksum_partial, - .route = nf_ip_route, - .saveroute = nf_ip_saveroute, - .reroute = nf_ip_reroute, - .route_key_size = sizeof(struct ip_rt_info), -}; - -static int __init ipv4_netfilter_init(void) -{ - return nf_register_afinfo(&nf_ip_afinfo); -} -subsys_initcall(ipv4_netfilter_init); +EXPORT_SYMBOL_GPL(nf_ip_route); diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index c11eb1744ab1..5f52236780b4 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -72,11 +72,21 @@ endif # NF_TABLES_IPV4 config NF_TABLES_ARP tristate "ARP nf_tables support" + select NETFILTER_FAMILY_ARP help This option enables the ARP support for nf_tables. endif # NF_TABLES +config NF_FLOW_TABLE_IPV4 + tristate "Netfilter flow table IPv4 module" + depends on NF_CONNTRACK && NF_TABLES + select NF_FLOW_TABLE + help + This option adds the flow table IPv4 support. + + To compile it as a module, choose M here. + config NF_DUP_IPV4 tristate "Netfilter IPv4 packet duplication to alternate destination" depends on !NF_CONNTRACK || NF_CONNTRACK @@ -148,6 +158,7 @@ config NF_NAT_SNMP_BASIC depends on NF_CONNTRACK_SNMP depends on NETFILTER_ADVANCED default NF_NAT && NF_CONNTRACK_SNMP + select ASN1 ---help--- This module implements an Application Layer Gateway (ALG) for @@ -333,6 +344,7 @@ config IP_NF_TARGET_CLUSTERIP depends on NF_CONNTRACK_IPV4 depends on NETFILTER_ADVANCED select NF_CONNTRACK_MARK + select NETFILTER_FAMILY_ARP help The CLUSTERIP target allows you to build load-balancing clusters of network servers without having a dedicated load-balancing @@ -392,6 +404,7 @@ endif # IP_NF_IPTABLES config IP_NF_ARPTABLES tristate "ARP tables support" select NETFILTER_XTABLES + select NETFILTER_FAMILY_ARP depends on NETFILTER_ADVANCED help arptables is a general, extensible packet identification framework. diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index adcdae358365..2dad20eefd26 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -27,9 +27,15 @@ obj-$(CONFIG_NF_REJECT_IPV4) += nf_reject_ipv4.o # NAT helpers (nf_conntrack) obj-$(CONFIG_NF_NAT_H323) += nf_nat_h323.o obj-$(CONFIG_NF_NAT_PPTP) += nf_nat_pptp.o + +nf_nat_snmp_basic-y := nf_nat_snmp_basic-asn1.o nf_nat_snmp_basic_main.o +nf_nat_snmp_basic-y : nf_nat_snmp_basic-asn1.h nf_nat_snmp_basic-asn1.c obj-$(CONFIG_NF_NAT_SNMP_BASIC) += nf_nat_snmp_basic.o +clean-files := nf_nat_snmp_basic-asn1.c nf_nat_snmp_basic-asn1.h + obj-$(CONFIG_NF_NAT_MASQUERADE_IPV4) += nf_nat_masquerade_ipv4.o + # NAT protocols (nf_nat) obj-$(CONFIG_NF_NAT_PROTO_GRE) += nf_nat_proto_gre.o @@ -43,6 +49,9 @@ obj-$(CONFIG_NFT_REDIR_IPV4) += nft_redir_ipv4.o obj-$(CONFIG_NFT_DUP_IPV4) += nft_dup_ipv4.o obj-$(CONFIG_NF_TABLES_ARP) += nf_tables_arp.o +# flow table support +obj-$(CONFIG_NF_FLOW_TABLE_IPV4) += nf_flow_table_ipv4.o + # generic IP tables obj-$(CONFIG_IP_NF_IPTABLES) += ip_tables.o diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index eb8246c39de0..4ffe302f9b82 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -805,9 +805,8 @@ static int get_info(struct net *net, void __user *user, if (compat) xt_compat_lock(NFPROTO_ARP); #endif - t = try_then_request_module(xt_find_table_lock(net, NFPROTO_ARP, name), - "arptable_%s", name); - if (t) { + t = xt_request_find_table_lock(net, NFPROTO_ARP, name); + if (!IS_ERR(t)) { struct arpt_getinfo info; const struct xt_table_info *private = t->private; #ifdef CONFIG_COMPAT @@ -836,7 +835,7 @@ static int get_info(struct net *net, void __user *user, xt_table_unlock(t); module_put(t->me); } else - ret = -ENOENT; + ret = PTR_ERR(t); #ifdef CONFIG_COMPAT if (compat) xt_compat_unlock(NFPROTO_ARP); @@ -861,7 +860,7 @@ static int get_entries(struct net *net, struct arpt_get_entries __user *uptr, get.name[sizeof(get.name) - 1] = '\0'; t = xt_find_table_lock(net, NFPROTO_ARP, get.name); - if (t) { + if (!IS_ERR(t)) { const struct xt_table_info *private = t->private; if (get.size == private->size) @@ -873,7 +872,7 @@ static int get_entries(struct net *net, struct arpt_get_entries __user *uptr, module_put(t->me); xt_table_unlock(t); } else - ret = -ENOENT; + ret = PTR_ERR(t); return ret; } @@ -898,10 +897,9 @@ static int __do_replace(struct net *net, const char *name, goto out; } - t = try_then_request_module(xt_find_table_lock(net, NFPROTO_ARP, name), - "arptable_%s", name); - if (!t) { - ret = -ENOENT; + t = xt_request_find_table_lock(net, NFPROTO_ARP, name); + if (IS_ERR(t)) { + ret = PTR_ERR(t); goto free_newinfo_counters_untrans; } @@ -1015,8 +1013,8 @@ static int do_add_counters(struct net *net, const void __user *user, return PTR_ERR(paddc); t = xt_find_table_lock(net, NFPROTO_ARP, tmp.name); - if (!t) { - ret = -ENOENT; + if (IS_ERR(t)) { + ret = PTR_ERR(t); goto free; } @@ -1403,7 +1401,7 @@ static int compat_get_entries(struct net *net, xt_compat_lock(NFPROTO_ARP); t = xt_find_table_lock(net, NFPROTO_ARP, get.name); - if (t) { + if (!IS_ERR(t)) { const struct xt_table_info *private = t->private; struct xt_table_info info; @@ -1418,7 +1416,7 @@ static int compat_get_entries(struct net *net, module_put(t->me); xt_table_unlock(t); } else - ret = -ENOENT; + ret = PTR_ERR(t); xt_compat_unlock(NFPROTO_ARP); return ret; @@ -1653,7 +1651,6 @@ static int __init arp_tables_init(void) if (ret < 0) goto err4; - pr_info("arp_tables: (C) 2002 David S. Miller\n"); return 0; err4: diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index cc984d0e0c69..9a71f3149507 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -968,9 +968,8 @@ static int get_info(struct net *net, void __user *user, if (compat) xt_compat_lock(AF_INET); #endif - t = try_then_request_module(xt_find_table_lock(net, AF_INET, name), - "iptable_%s", name); - if (t) { + t = xt_request_find_table_lock(net, AF_INET, name); + if (!IS_ERR(t)) { struct ipt_getinfo info; const struct xt_table_info *private = t->private; #ifdef CONFIG_COMPAT @@ -1000,7 +999,7 @@ static int get_info(struct net *net, void __user *user, xt_table_unlock(t); module_put(t->me); } else - ret = -ENOENT; + ret = PTR_ERR(t); #ifdef CONFIG_COMPAT if (compat) xt_compat_unlock(AF_INET); @@ -1025,7 +1024,7 @@ get_entries(struct net *net, struct ipt_get_entries __user *uptr, get.name[sizeof(get.name) - 1] = '\0'; t = xt_find_table_lock(net, AF_INET, get.name); - if (t) { + if (!IS_ERR(t)) { const struct xt_table_info *private = t->private; if (get.size == private->size) ret = copy_entries_to_user(private->size, @@ -1036,7 +1035,7 @@ get_entries(struct net *net, struct ipt_get_entries __user *uptr, module_put(t->me); xt_table_unlock(t); } else - ret = -ENOENT; + ret = PTR_ERR(t); return ret; } @@ -1059,10 +1058,9 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks, goto out; } - t = try_then_request_module(xt_find_table_lock(net, AF_INET, name), - "iptable_%s", name); - if (!t) { - ret = -ENOENT; + t = xt_request_find_table_lock(net, AF_INET, name); + if (IS_ERR(t)) { + ret = PTR_ERR(t); goto free_newinfo_counters_untrans; } @@ -1176,8 +1174,8 @@ do_add_counters(struct net *net, const void __user *user, return PTR_ERR(paddc); t = xt_find_table_lock(net, AF_INET, tmp.name); - if (!t) { - ret = -ENOENT; + if (IS_ERR(t)) { + ret = PTR_ERR(t); goto free; } @@ -1620,7 +1618,7 @@ compat_get_entries(struct net *net, struct compat_ipt_get_entries __user *uptr, xt_compat_lock(AF_INET); t = xt_find_table_lock(net, AF_INET, get.name); - if (t) { + if (!IS_ERR(t)) { const struct xt_table_info *private = t->private; struct xt_table_info info; ret = compat_table_info(private, &info); @@ -1634,7 +1632,7 @@ compat_get_entries(struct net *net, struct compat_ipt_get_entries __user *uptr, module_put(t->me); xt_table_unlock(t); } else - ret = -ENOENT; + ret = PTR_ERR(t); xt_compat_unlock(AF_INET); return ret; @@ -1936,7 +1934,6 @@ static int __init ip_tables_init(void) if (ret < 0) goto err5; - pr_info("(C) 2000-2006 Netfilter Core Team\n"); return 0; err5: diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index 69060e3abe85..c29a6ca6c6d6 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -776,7 +776,6 @@ static ssize_t clusterip_proc_write(struct file *file, const char __user *input, } static const struct file_operations clusterip_proc_fops = { - .owner = THIS_MODULE, .open = clusterip_proc_open, .read = seq_read, .write = clusterip_proc_write, diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c index 7667f223d7f8..9ac92ea7b93c 100644 --- a/net/ipv4/netfilter/iptable_filter.c +++ b/net/ipv4/netfilter/iptable_filter.c @@ -38,12 +38,6 @@ static unsigned int iptable_filter_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { - if (state->hook == NF_INET_LOCAL_OUT && - (skb->len < sizeof(struct iphdr) || - ip_hdrlen(skb) < sizeof(struct iphdr))) - /* root is playing with raw sockets. */ - return NF_ACCEPT; - return ipt_do_table(skb, state, state->net->ipv4.iptable_filter); } diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c index aebdb337fd7e..dea138ca8925 100644 --- a/net/ipv4/netfilter/iptable_mangle.c +++ b/net/ipv4/netfilter/iptable_mangle.c @@ -49,11 +49,6 @@ ipt_mangle_out(struct sk_buff *skb, const struct nf_hook_state *state) u_int32_t mark; int err; - /* root is playing with raw sockets. */ - if (skb->len < sizeof(struct iphdr) || - ip_hdrlen(skb) < sizeof(struct iphdr)) - return NF_ACCEPT; - /* Save things which could affect route */ mark = skb->mark; iph = ip_hdr(skb); diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c index a1a07b338ccf..0f7255cc65ee 100644 --- a/net/ipv4/netfilter/iptable_nat.c +++ b/net/ipv4/netfilter/iptable_nat.c @@ -72,6 +72,7 @@ static const struct nf_hook_ops nf_nat_ipv4_ops[] = { { .hook = iptable_nat_ipv4_in, .pf = NFPROTO_IPV4, + .nat_hook = true, .hooknum = NF_INET_PRE_ROUTING, .priority = NF_IP_PRI_NAT_DST, }, @@ -79,6 +80,7 @@ static const struct nf_hook_ops nf_nat_ipv4_ops[] = { { .hook = iptable_nat_ipv4_out, .pf = NFPROTO_IPV4, + .nat_hook = true, .hooknum = NF_INET_POST_ROUTING, .priority = NF_IP_PRI_NAT_SRC, }, @@ -86,6 +88,7 @@ static const struct nf_hook_ops nf_nat_ipv4_ops[] = { { .hook = iptable_nat_ipv4_local_fn, .pf = NFPROTO_IPV4, + .nat_hook = true, .hooknum = NF_INET_LOCAL_OUT, .priority = NF_IP_PRI_NAT_DST, }, @@ -93,6 +96,7 @@ static const struct nf_hook_ops nf_nat_ipv4_ops[] = { { .hook = iptable_nat_ipv4_fn, .pf = NFPROTO_IPV4, + .nat_hook = true, .hooknum = NF_INET_LOCAL_IN, .priority = NF_IP_PRI_NAT_SRC, }, diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c index 2642ecd2645c..960625aabf04 100644 --- a/net/ipv4/netfilter/iptable_raw.c +++ b/net/ipv4/netfilter/iptable_raw.c @@ -3,6 +3,7 @@ * * Copyright (C) 2003 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu> */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> #include <linux/netfilter_ipv4/ip_tables.h> #include <linux/slab.h> @@ -12,6 +13,10 @@ static int __net_init iptable_raw_table_init(struct net *net); +static bool raw_before_defrag __read_mostly; +MODULE_PARM_DESC(raw_before_defrag, "Enable raw table before defrag"); +module_param(raw_before_defrag, bool, 0000); + static const struct xt_table packet_raw = { .name = "raw", .valid_hooks = RAW_VALID_HOOKS, @@ -21,17 +26,20 @@ static const struct xt_table packet_raw = { .table_init = iptable_raw_table_init, }; +static const struct xt_table packet_raw_before_defrag = { + .name = "raw", + .valid_hooks = RAW_VALID_HOOKS, + .me = THIS_MODULE, + .af = NFPROTO_IPV4, + .priority = NF_IP_PRI_RAW_BEFORE_DEFRAG, + .table_init = iptable_raw_table_init, +}; + /* The work comes in here from netfilter.c. */ static unsigned int iptable_raw_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { - if (state->hook == NF_INET_LOCAL_OUT && - (skb->len < sizeof(struct iphdr) || - ip_hdrlen(skb) < sizeof(struct iphdr))) - /* root is playing with raw sockets. */ - return NF_ACCEPT; - return ipt_do_table(skb, state, state->net->ipv4.iptable_raw); } @@ -40,15 +48,19 @@ static struct nf_hook_ops *rawtable_ops __read_mostly; static int __net_init iptable_raw_table_init(struct net *net) { struct ipt_replace *repl; + const struct xt_table *table = &packet_raw; int ret; + if (raw_before_defrag) + table = &packet_raw_before_defrag; + if (net->ipv4.iptable_raw) return 0; - repl = ipt_alloc_initial_table(&packet_raw); + repl = ipt_alloc_initial_table(table); if (repl == NULL) return -ENOMEM; - ret = ipt_register_table(net, &packet_raw, repl, rawtable_ops, + ret = ipt_register_table(net, table, repl, rawtable_ops, &net->ipv4.iptable_raw); kfree(repl); return ret; @@ -69,8 +81,15 @@ static struct pernet_operations iptable_raw_net_ops = { static int __init iptable_raw_init(void) { int ret; + const struct xt_table *table = &packet_raw; + + if (raw_before_defrag) { + table = &packet_raw_before_defrag; + + pr_info("Enabling raw table before defrag\n"); + } - rawtable_ops = xt_hook_ops_alloc(&packet_raw, iptable_raw_hook); + rawtable_ops = xt_hook_ops_alloc(table, iptable_raw_hook); if (IS_ERR(rawtable_ops)) return PTR_ERR(rawtable_ops); diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c index ff226596e4b5..e5379fe57b64 100644 --- a/net/ipv4/netfilter/iptable_security.c +++ b/net/ipv4/netfilter/iptable_security.c @@ -43,12 +43,6 @@ static unsigned int iptable_security_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { - if (state->hook == NF_INET_LOCAL_OUT && - (skb->len < sizeof(struct iphdr) || - ip_hdrlen(skb) < sizeof(struct iphdr))) - /* Somebody is playing with raw sockets. */ - return NF_ACCEPT; - return ipt_do_table(skb, state, state->net->ipv4.iptable_security); } diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index 89af9d88ca21..de213a397ea8 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -154,11 +154,6 @@ static unsigned int ipv4_conntrack_local(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { - /* root is playing with raw sockets. */ - if (skb->len < sizeof(struct iphdr) || - ip_hdrlen(skb) < sizeof(struct iphdr)) - return NF_ACCEPT; - if (ip_is_fragment(ip_hdr(skb))) /* IP_NODEFRAG setsockopt set */ return NF_ACCEPT; @@ -368,7 +363,7 @@ MODULE_ALIAS("nf_conntrack-" __stringify(AF_INET)); MODULE_ALIAS("ip_conntrack"); MODULE_LICENSE("GPL"); -static struct nf_conntrack_l4proto *builtin_l4proto4[] = { +static const struct nf_conntrack_l4proto * const builtin_l4proto4[] = { &nf_conntrack_l4proto_tcp4, &nf_conntrack_l4proto_udp4, &nf_conntrack_l4proto_icmp, diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c index 1849fedd9b81..5c15beafa711 100644 --- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c @@ -22,7 +22,7 @@ #include <net/netfilter/nf_conntrack_zones.h> #include <net/netfilter/nf_log.h> -static unsigned int nf_ct_icmp_timeout __read_mostly = 30*HZ; +static const unsigned int nf_ct_icmp_timeout = 30*HZ; static inline struct nf_icmp_net *icmp_pernet(struct net *net) { @@ -351,7 +351,7 @@ static struct nf_proto_net *icmp_get_net_proto(struct net *net) return &net->ct.nf_ct_proto.icmp.pn; } -struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp __read_mostly = +const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp = { .l3proto = PF_INET, .l4proto = IPPROTO_ICMP, diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c index 37fe1616ca0b..a0d3ad60a411 100644 --- a/net/ipv4/netfilter/nf_defrag_ipv4.c +++ b/net/ipv4/netfilter/nf_defrag_ipv4.c @@ -78,6 +78,8 @@ static unsigned int ipv4_conntrack_defrag(void *priv, if (skb_nfct(skb) && !nf_ct_is_template((struct nf_conn *)skb_nfct(skb))) return NF_ACCEPT; #endif + if (skb->_nfct == IP_CT_UNTRACKED) + return NF_ACCEPT; #endif /* Gather fragments. */ if (ip_is_fragment(ip_hdr(skb))) { diff --git a/net/ipv4/netfilter/nf_flow_table_ipv4.c b/net/ipv4/netfilter/nf_flow_table_ipv4.c new file mode 100644 index 000000000000..b2d01eb25f2c --- /dev/null +++ b/net/ipv4/netfilter/nf_flow_table_ipv4.c @@ -0,0 +1,284 @@ +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/netfilter.h> +#include <linux/rhashtable.h> +#include <linux/ip.h> +#include <linux/netdevice.h> +#include <net/ip.h> +#include <net/neighbour.h> +#include <net/netfilter/nf_flow_table.h> +#include <net/netfilter/nf_tables.h> +/* For layer 4 checksum field offset. */ +#include <linux/tcp.h> +#include <linux/udp.h> + +static int nf_flow_nat_ip_tcp(struct sk_buff *skb, unsigned int thoff, + __be32 addr, __be32 new_addr) +{ + struct tcphdr *tcph; + + if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) || + skb_try_make_writable(skb, thoff + sizeof(*tcph))) + return -1; + + tcph = (void *)(skb_network_header(skb) + thoff); + inet_proto_csum_replace4(&tcph->check, skb, addr, new_addr, true); + + return 0; +} + +static int nf_flow_nat_ip_udp(struct sk_buff *skb, unsigned int thoff, + __be32 addr, __be32 new_addr) +{ + struct udphdr *udph; + + if (!pskb_may_pull(skb, thoff + sizeof(*udph)) || + skb_try_make_writable(skb, thoff + sizeof(*udph))) + return -1; + + udph = (void *)(skb_network_header(skb) + thoff); + if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) { + inet_proto_csum_replace4(&udph->check, skb, addr, + new_addr, true); + if (!udph->check) + udph->check = CSUM_MANGLED_0; + } + + return 0; +} + +static int nf_flow_nat_ip_l4proto(struct sk_buff *skb, struct iphdr *iph, + unsigned int thoff, __be32 addr, + __be32 new_addr) +{ + switch (iph->protocol) { + case IPPROTO_TCP: + if (nf_flow_nat_ip_tcp(skb, thoff, addr, new_addr) < 0) + return NF_DROP; + break; + case IPPROTO_UDP: + if (nf_flow_nat_ip_udp(skb, thoff, addr, new_addr) < 0) + return NF_DROP; + break; + } + + return 0; +} + +static int nf_flow_snat_ip(const struct flow_offload *flow, struct sk_buff *skb, + struct iphdr *iph, unsigned int thoff, + enum flow_offload_tuple_dir dir) +{ + __be32 addr, new_addr; + + switch (dir) { + case FLOW_OFFLOAD_DIR_ORIGINAL: + addr = iph->saddr; + new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v4.s_addr; + iph->saddr = new_addr; + break; + case FLOW_OFFLOAD_DIR_REPLY: + addr = iph->daddr; + new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v4.s_addr; + iph->daddr = new_addr; + break; + default: + return -1; + } + csum_replace4(&iph->check, addr, new_addr); + + return nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr); +} + +static int nf_flow_dnat_ip(const struct flow_offload *flow, struct sk_buff *skb, + struct iphdr *iph, unsigned int thoff, + enum flow_offload_tuple_dir dir) +{ + __be32 addr, new_addr; + + switch (dir) { + case FLOW_OFFLOAD_DIR_ORIGINAL: + addr = iph->daddr; + new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v4.s_addr; + iph->daddr = new_addr; + break; + case FLOW_OFFLOAD_DIR_REPLY: + addr = iph->saddr; + new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v4.s_addr; + iph->saddr = new_addr; + break; + default: + return -1; + } + + return nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr); +} + +static int nf_flow_nat_ip(const struct flow_offload *flow, struct sk_buff *skb, + enum flow_offload_tuple_dir dir) +{ + struct iphdr *iph = ip_hdr(skb); + unsigned int thoff = iph->ihl * 4; + + if (flow->flags & FLOW_OFFLOAD_SNAT && + (nf_flow_snat_port(flow, skb, thoff, iph->protocol, dir) < 0 || + nf_flow_snat_ip(flow, skb, iph, thoff, dir) < 0)) + return -1; + if (flow->flags & FLOW_OFFLOAD_DNAT && + (nf_flow_dnat_port(flow, skb, thoff, iph->protocol, dir) < 0 || + nf_flow_dnat_ip(flow, skb, iph, thoff, dir) < 0)) + return -1; + + return 0; +} + +static bool ip_has_options(unsigned int thoff) +{ + return thoff != sizeof(struct iphdr); +} + +static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev, + struct flow_offload_tuple *tuple) +{ + struct flow_ports *ports; + unsigned int thoff; + struct iphdr *iph; + + if (!pskb_may_pull(skb, sizeof(*iph))) + return -1; + + iph = ip_hdr(skb); + thoff = iph->ihl * 4; + + if (ip_is_fragment(iph) || + unlikely(ip_has_options(thoff))) + return -1; + + if (iph->protocol != IPPROTO_TCP && + iph->protocol != IPPROTO_UDP) + return -1; + + thoff = iph->ihl * 4; + if (!pskb_may_pull(skb, thoff + sizeof(*ports))) + return -1; + + ports = (struct flow_ports *)(skb_network_header(skb) + thoff); + + tuple->src_v4.s_addr = iph->saddr; + tuple->dst_v4.s_addr = iph->daddr; + tuple->src_port = ports->source; + tuple->dst_port = ports->dest; + tuple->l3proto = AF_INET; + tuple->l4proto = iph->protocol; + tuple->iifidx = dev->ifindex; + + return 0; +} + +/* Based on ip_exceeds_mtu(). */ +static bool __nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu) +{ + if (skb->len <= mtu) + return false; + + if ((ip_hdr(skb)->frag_off & htons(IP_DF)) == 0) + return false; + + if (skb_is_gso(skb) && skb_gso_validate_mtu(skb, mtu)) + return false; + + return true; +} + +static bool nf_flow_exceeds_mtu(struct sk_buff *skb, const struct rtable *rt) +{ + u32 mtu; + + mtu = ip_dst_mtu_maybe_forward(&rt->dst, true); + if (__nf_flow_exceeds_mtu(skb, mtu)) + return true; + + return false; +} + +unsigned int +nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb, + const struct nf_hook_state *state) +{ + struct flow_offload_tuple_rhash *tuplehash; + struct nf_flowtable *flow_table = priv; + struct flow_offload_tuple tuple = {}; + enum flow_offload_tuple_dir dir; + struct flow_offload *flow; + struct net_device *outdev; + const struct rtable *rt; + struct iphdr *iph; + __be32 nexthop; + + if (skb->protocol != htons(ETH_P_IP)) + return NF_ACCEPT; + + if (nf_flow_tuple_ip(skb, state->in, &tuple) < 0) + return NF_ACCEPT; + + tuplehash = flow_offload_lookup(flow_table, &tuple); + if (tuplehash == NULL) + return NF_ACCEPT; + + outdev = dev_get_by_index_rcu(state->net, tuplehash->tuple.oifidx); + if (!outdev) + return NF_ACCEPT; + + dir = tuplehash->tuple.dir; + flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]); + + rt = (const struct rtable *)flow->tuplehash[dir].tuple.dst_cache; + if (unlikely(nf_flow_exceeds_mtu(skb, rt))) + return NF_ACCEPT; + + if (skb_try_make_writable(skb, sizeof(*iph))) + return NF_DROP; + + if (flow->flags & (FLOW_OFFLOAD_SNAT | FLOW_OFFLOAD_DNAT) && + nf_flow_nat_ip(flow, skb, dir) < 0) + return NF_DROP; + + flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT; + iph = ip_hdr(skb); + ip_decrease_ttl(iph); + + skb->dev = outdev; + nexthop = rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr); + neigh_xmit(NEIGH_ARP_TABLE, outdev, &nexthop, skb); + + return NF_STOLEN; +} +EXPORT_SYMBOL_GPL(nf_flow_offload_ip_hook); + +static struct nf_flowtable_type flowtable_ipv4 = { + .family = NFPROTO_IPV4, + .params = &nf_flow_offload_rhash_params, + .gc = nf_flow_offload_work_gc, + .hook = nf_flow_offload_ip_hook, + .owner = THIS_MODULE, +}; + +static int __init nf_flow_ipv4_module_init(void) +{ + nft_register_flowtable_type(&flowtable_ipv4); + + return 0; +} + +static void __exit nf_flow_ipv4_module_exit(void) +{ + nft_unregister_flowtable_type(&flowtable_ipv4); +} + +module_init(nf_flow_ipv4_module_init); +module_exit(nf_flow_ipv4_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>"); +MODULE_ALIAS_NF_FLOWTABLE(AF_INET); diff --git a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c index 0443ca4120b0..f7ff6a364d7b 100644 --- a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c @@ -356,11 +356,6 @@ nf_nat_ipv4_out(void *priv, struct sk_buff *skb, #endif unsigned int ret; - /* root is playing with raw sockets. */ - if (skb->len < sizeof(struct iphdr) || - ip_hdrlen(skb) < sizeof(struct iphdr)) - return NF_ACCEPT; - ret = nf_nat_ipv4_fn(priv, skb, state, do_chain); #ifdef CONFIG_XFRM if (ret != NF_DROP && ret != NF_STOLEN && @@ -396,11 +391,6 @@ nf_nat_ipv4_local_fn(void *priv, struct sk_buff *skb, unsigned int ret; int err; - /* root is playing with raw sockets. */ - if (skb->len < sizeof(struct iphdr) || - ip_hdrlen(skb) < sizeof(struct iphdr)) - return NF_ACCEPT; - ret = nf_nat_ipv4_fn(priv, skb, state, do_chain); if (ret != NF_DROP && ret != NF_STOLEN && (ct = nf_ct_get(skb, &ctinfo)) != NULL) { diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.asn1 b/net/ipv4/netfilter/nf_nat_snmp_basic.asn1 new file mode 100644 index 000000000000..24b73268f362 --- /dev/null +++ b/net/ipv4/netfilter/nf_nat_snmp_basic.asn1 @@ -0,0 +1,177 @@ +Message ::= + SEQUENCE { + version + INTEGER ({snmp_version}), + + community + OCTET STRING, + + pdu + PDUs + } + + +ObjectName ::= + OBJECT IDENTIFIER + +ObjectSyntax ::= + CHOICE { + simple + SimpleSyntax, + + application-wide + ApplicationSyntax + } + +SimpleSyntax ::= + CHOICE { + integer-value + INTEGER, + + string-value + OCTET STRING, + + objectID-value + OBJECT IDENTIFIER + } + +ApplicationSyntax ::= + CHOICE { + ipAddress-value + IpAddress, + + counter-value + Counter32, + + timeticks-value + TimeTicks, + + arbitrary-value + Opaque, + + big-counter-value + Counter64, + + unsigned-integer-value + Unsigned32 + } + +IpAddress ::= + [APPLICATION 0] + IMPLICIT OCTET STRING OPTIONAL ({snmp_helper}) + +Counter32 ::= + [APPLICATION 1] + IMPLICIT INTEGER OPTIONAL + +Unsigned32 ::= + [APPLICATION 2] + IMPLICIT INTEGER OPTIONAL + +Gauge32 ::= Unsigned32 OPTIONAL + +TimeTicks ::= + [APPLICATION 3] + IMPLICIT INTEGER OPTIONAL + +Opaque ::= + [APPLICATION 4] + IMPLICIT OCTET STRING OPTIONAL + +Counter64 ::= + [APPLICATION 6] + IMPLICIT INTEGER OPTIONAL + +PDUs ::= + CHOICE { + get-request + GetRequest-PDU, + + get-next-request + GetNextRequest-PDU, + + get-bulk-request + GetBulkRequest-PDU, + + response + Response-PDU, + + set-request + SetRequest-PDU, + + inform-request + InformRequest-PDU, + + snmpV2-trap + SNMPv2-Trap-PDU, + + report + Report-PDU + } + +GetRequest-PDU ::= + [0] IMPLICIT PDU OPTIONAL + +GetNextRequest-PDU ::= + [1] IMPLICIT PDU OPTIONAL + +Response-PDU ::= + [2] IMPLICIT PDU OPTIONAL + +SetRequest-PDU ::= + [3] IMPLICIT PDU OPTIONAL + +-- [4] is obsolete + +GetBulkRequest-PDU ::= + [5] IMPLICIT PDU OPTIONAL + +InformRequest-PDU ::= + [6] IMPLICIT PDU OPTIONAL + +SNMPv2-Trap-PDU ::= + [7] IMPLICIT PDU OPTIONAL + +Report-PDU ::= + [8] IMPLICIT PDU OPTIONAL + +PDU ::= + SEQUENCE { + request-id + INTEGER, + + error-status + INTEGER, + + error-index + INTEGER, + + variable-bindings + VarBindList + } + + +VarBind ::= + SEQUENCE { + name + ObjectName, + + CHOICE { + value + ObjectSyntax, + + unSpecified + NULL, + + noSuchObject + [0] IMPLICIT NULL, + + noSuchInstance + [1] IMPLICIT NULL, + + endOfMibView + [2] IMPLICIT NULL + } +} + +VarBindList ::= SEQUENCE OF VarBind diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c deleted file mode 100644 index d5b1e0b3f687..000000000000 --- a/net/ipv4/netfilter/nf_nat_snmp_basic.c +++ /dev/null @@ -1,1286 +0,0 @@ -/* - * nf_nat_snmp_basic.c - * - * Basic SNMP Application Layer Gateway - * - * This IP NAT module is intended for use with SNMP network - * discovery and monitoring applications where target networks use - * conflicting private address realms. - * - * Static NAT is used to remap the networks from the view of the network - * management system at the IP layer, and this module remaps some application - * layer addresses to match. - * - * The simplest form of ALG is performed, where only tagged IP addresses - * are modified. The module does not need to be MIB aware and only scans - * messages at the ASN.1/BER level. - * - * Currently, only SNMPv1 and SNMPv2 are supported. - * - * More information on ALG and associated issues can be found in - * RFC 2962 - * - * The ASB.1/BER parsing code is derived from the gxsnmp package by Gregory - * McLean & Jochen Friedrich, stripped down for use in the kernel. - * - * Copyright (c) 2000 RP Internet (www.rpi.net.au). - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * You should have received a copy of the GNU General Public License - * along with this program; if not, see <http://www.gnu.org/licenses/>. - * - * Author: James Morris <jmorris@intercode.com.au> - * - * Copyright (c) 2006-2010 Patrick McHardy <kaber@trash.net> - */ -#include <linux/module.h> -#include <linux/moduleparam.h> -#include <linux/types.h> -#include <linux/kernel.h> -#include <linux/slab.h> -#include <linux/in.h> -#include <linux/ip.h> -#include <linux/udp.h> -#include <net/checksum.h> -#include <net/udp.h> - -#include <net/netfilter/nf_nat.h> -#include <net/netfilter/nf_conntrack_expect.h> -#include <net/netfilter/nf_conntrack_helper.h> -#include <net/netfilter/nf_nat_helper.h> -#include <linux/netfilter/nf_conntrack_snmp.h> - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>"); -MODULE_DESCRIPTION("Basic SNMP Application Layer Gateway"); -MODULE_ALIAS("ip_nat_snmp_basic"); - -#define SNMP_PORT 161 -#define SNMP_TRAP_PORT 162 -#define NOCT1(n) (*(u8 *)(n)) - -static int debug; -static DEFINE_SPINLOCK(snmp_lock); - -/* - * Application layer address mapping mimics the NAT mapping, but - * only for the first octet in this case (a more flexible system - * can be implemented if needed). - */ -struct oct1_map -{ - u_int8_t from; - u_int8_t to; -}; - - -/***************************************************************************** - * - * Basic ASN.1 decoding routines (gxsnmp author Dirk Wisse) - * - *****************************************************************************/ - -/* Class */ -#define ASN1_UNI 0 /* Universal */ -#define ASN1_APL 1 /* Application */ -#define ASN1_CTX 2 /* Context */ -#define ASN1_PRV 3 /* Private */ - -/* Tag */ -#define ASN1_EOC 0 /* End Of Contents */ -#define ASN1_BOL 1 /* Boolean */ -#define ASN1_INT 2 /* Integer */ -#define ASN1_BTS 3 /* Bit String */ -#define ASN1_OTS 4 /* Octet String */ -#define ASN1_NUL 5 /* Null */ -#define ASN1_OJI 6 /* Object Identifier */ -#define ASN1_OJD 7 /* Object Description */ -#define ASN1_EXT 8 /* External */ -#define ASN1_SEQ 16 /* Sequence */ -#define ASN1_SET 17 /* Set */ -#define ASN1_NUMSTR 18 /* Numerical String */ -#define ASN1_PRNSTR 19 /* Printable String */ -#define ASN1_TEXSTR 20 /* Teletext String */ -#define ASN1_VIDSTR 21 /* Video String */ -#define ASN1_IA5STR 22 /* IA5 String */ -#define ASN1_UNITIM 23 /* Universal Time */ -#define ASN1_GENTIM 24 /* General Time */ -#define ASN1_GRASTR 25 /* Graphical String */ -#define ASN1_VISSTR 26 /* Visible String */ -#define ASN1_GENSTR 27 /* General String */ - -/* Primitive / Constructed methods*/ -#define ASN1_PRI 0 /* Primitive */ -#define ASN1_CON 1 /* Constructed */ - -/* - * Error codes. - */ -#define ASN1_ERR_NOERROR 0 -#define ASN1_ERR_DEC_EMPTY 2 -#define ASN1_ERR_DEC_EOC_MISMATCH 3 -#define ASN1_ERR_DEC_LENGTH_MISMATCH 4 -#define ASN1_ERR_DEC_BADVALUE 5 - -/* - * ASN.1 context. - */ -struct asn1_ctx -{ - int error; /* Error condition */ - unsigned char *pointer; /* Octet just to be decoded */ - unsigned char *begin; /* First octet */ - unsigned char *end; /* Octet after last octet */ -}; - -/* - * Octet string (not null terminated) - */ -struct asn1_octstr -{ - unsigned char *data; - unsigned int len; -}; - -static void asn1_open(struct asn1_ctx *ctx, - unsigned char *buf, - unsigned int len) -{ - ctx->begin = buf; - ctx->end = buf + len; - ctx->pointer = buf; - ctx->error = ASN1_ERR_NOERROR; -} - -static unsigned char asn1_octet_decode(struct asn1_ctx *ctx, unsigned char *ch) -{ - if (ctx->pointer >= ctx->end) { - ctx->error = ASN1_ERR_DEC_EMPTY; - return 0; - } - *ch = *(ctx->pointer)++; - return 1; -} - -static unsigned char asn1_tag_decode(struct asn1_ctx *ctx, unsigned int *tag) -{ - unsigned char ch; - - *tag = 0; - - do - { - if (!asn1_octet_decode(ctx, &ch)) - return 0; - *tag <<= 7; - *tag |= ch & 0x7F; - } while ((ch & 0x80) == 0x80); - return 1; -} - -static unsigned char asn1_id_decode(struct asn1_ctx *ctx, - unsigned int *cls, - unsigned int *con, - unsigned int *tag) -{ - unsigned char ch; - - if (!asn1_octet_decode(ctx, &ch)) - return 0; - - *cls = (ch & 0xC0) >> 6; - *con = (ch & 0x20) >> 5; - *tag = (ch & 0x1F); - - if (*tag == 0x1F) { - if (!asn1_tag_decode(ctx, tag)) - return 0; - } - return 1; -} - -static unsigned char asn1_length_decode(struct asn1_ctx *ctx, - unsigned int *def, - unsigned int *len) -{ - unsigned char ch, cnt; - - if (!asn1_octet_decode(ctx, &ch)) - return 0; - - if (ch == 0x80) - *def = 0; - else { - *def = 1; - - if (ch < 0x80) - *len = ch; - else { - cnt = ch & 0x7F; - *len = 0; - - while (cnt > 0) { - if (!asn1_octet_decode(ctx, &ch)) - return 0; - *len <<= 8; - *len |= ch; - cnt--; - } - } - } - - /* don't trust len bigger than ctx buffer */ - if (*len > ctx->end - ctx->pointer) - return 0; - - return 1; -} - -static unsigned char asn1_header_decode(struct asn1_ctx *ctx, - unsigned char **eoc, - unsigned int *cls, - unsigned int *con, - unsigned int *tag) -{ - unsigned int def, len; - - if (!asn1_id_decode(ctx, cls, con, tag)) - return 0; - - def = len = 0; - if (!asn1_length_decode(ctx, &def, &len)) - return 0; - - /* primitive shall be definite, indefinite shall be constructed */ - if (*con == ASN1_PRI && !def) - return 0; - - if (def) - *eoc = ctx->pointer + len; - else - *eoc = NULL; - return 1; -} - -static unsigned char asn1_eoc_decode(struct asn1_ctx *ctx, unsigned char *eoc) -{ - unsigned char ch; - - if (eoc == NULL) { - if (!asn1_octet_decode(ctx, &ch)) - return 0; - - if (ch != 0x00) { - ctx->error = ASN1_ERR_DEC_EOC_MISMATCH; - return 0; - } - - if (!asn1_octet_decode(ctx, &ch)) - return 0; - - if (ch != 0x00) { - ctx->error = ASN1_ERR_DEC_EOC_MISMATCH; - return 0; - } - return 1; - } else { - if (ctx->pointer != eoc) { - ctx->error = ASN1_ERR_DEC_LENGTH_MISMATCH; - return 0; - } - return 1; - } -} - -static unsigned char asn1_null_decode(struct asn1_ctx *ctx, unsigned char *eoc) -{ - ctx->pointer = eoc; - return 1; -} - -static unsigned char asn1_long_decode(struct asn1_ctx *ctx, - unsigned char *eoc, - long *integer) -{ - unsigned char ch; - unsigned int len; - - if (!asn1_octet_decode(ctx, &ch)) - return 0; - - *integer = (signed char) ch; - len = 1; - - while (ctx->pointer < eoc) { - if (++len > sizeof (long)) { - ctx->error = ASN1_ERR_DEC_BADVALUE; - return 0; - } - - if (!asn1_octet_decode(ctx, &ch)) - return 0; - - *integer <<= 8; - *integer |= ch; - } - return 1; -} - -static unsigned char asn1_uint_decode(struct asn1_ctx *ctx, - unsigned char *eoc, - unsigned int *integer) -{ - unsigned char ch; - unsigned int len; - - if (!asn1_octet_decode(ctx, &ch)) - return 0; - - *integer = ch; - if (ch == 0) len = 0; - else len = 1; - - while (ctx->pointer < eoc) { - if (++len > sizeof (unsigned int)) { - ctx->error = ASN1_ERR_DEC_BADVALUE; - return 0; - } - - if (!asn1_octet_decode(ctx, &ch)) - return 0; - - *integer <<= 8; - *integer |= ch; - } - return 1; -} - -static unsigned char asn1_ulong_decode(struct asn1_ctx *ctx, - unsigned char *eoc, - unsigned long *integer) -{ - unsigned char ch; - unsigned int len; - - if (!asn1_octet_decode(ctx, &ch)) - return 0; - - *integer = ch; - if (ch == 0) len = 0; - else len = 1; - - while (ctx->pointer < eoc) { - if (++len > sizeof (unsigned long)) { - ctx->error = ASN1_ERR_DEC_BADVALUE; - return 0; - } - - if (!asn1_octet_decode(ctx, &ch)) - return 0; - - *integer <<= 8; - *integer |= ch; - } - return 1; -} - -static unsigned char asn1_octets_decode(struct asn1_ctx *ctx, - unsigned char *eoc, - unsigned char **octets, - unsigned int *len) -{ - unsigned char *ptr; - - *len = 0; - - *octets = kmalloc(eoc - ctx->pointer, GFP_ATOMIC); - if (*octets == NULL) - return 0; - - ptr = *octets; - while (ctx->pointer < eoc) { - if (!asn1_octet_decode(ctx, ptr++)) { - kfree(*octets); - *octets = NULL; - return 0; - } - (*len)++; - } - return 1; -} - -static unsigned char asn1_subid_decode(struct asn1_ctx *ctx, - unsigned long *subid) -{ - unsigned char ch; - - *subid = 0; - - do { - if (!asn1_octet_decode(ctx, &ch)) - return 0; - - *subid <<= 7; - *subid |= ch & 0x7F; - } while ((ch & 0x80) == 0x80); - return 1; -} - -static unsigned char asn1_oid_decode(struct asn1_ctx *ctx, - unsigned char *eoc, - unsigned long **oid, - unsigned int *len) -{ - unsigned long subid; - unsigned long *optr; - size_t size; - - size = eoc - ctx->pointer + 1; - - /* first subid actually encodes first two subids */ - if (size < 2 || size > ULONG_MAX/sizeof(unsigned long)) - return 0; - - *oid = kmalloc(size * sizeof(unsigned long), GFP_ATOMIC); - if (*oid == NULL) - return 0; - - optr = *oid; - - if (!asn1_subid_decode(ctx, &subid)) { - kfree(*oid); - *oid = NULL; - return 0; - } - - if (subid < 40) { - optr[0] = 0; - optr[1] = subid; - } else if (subid < 80) { - optr[0] = 1; - optr[1] = subid - 40; - } else { - optr[0] = 2; - optr[1] = subid - 80; - } - - *len = 2; - optr += 2; - - while (ctx->pointer < eoc) { - if (++(*len) > size) { - ctx->error = ASN1_ERR_DEC_BADVALUE; - kfree(*oid); - *oid = NULL; - return 0; - } - - if (!asn1_subid_decode(ctx, optr++)) { - kfree(*oid); - *oid = NULL; - return 0; - } - } - return 1; -} - -/***************************************************************************** - * - * SNMP decoding routines (gxsnmp author Dirk Wisse) - * - *****************************************************************************/ - -/* SNMP Versions */ -#define SNMP_V1 0 -#define SNMP_V2C 1 -#define SNMP_V2 2 -#define SNMP_V3 3 - -/* Default Sizes */ -#define SNMP_SIZE_COMM 256 -#define SNMP_SIZE_OBJECTID 128 -#define SNMP_SIZE_BUFCHR 256 -#define SNMP_SIZE_BUFINT 128 -#define SNMP_SIZE_SMALLOBJECTID 16 - -/* Requests */ -#define SNMP_PDU_GET 0 -#define SNMP_PDU_NEXT 1 -#define SNMP_PDU_RESPONSE 2 -#define SNMP_PDU_SET 3 -#define SNMP_PDU_TRAP1 4 -#define SNMP_PDU_BULK 5 -#define SNMP_PDU_INFORM 6 -#define SNMP_PDU_TRAP2 7 - -/* Errors */ -#define SNMP_NOERROR 0 -#define SNMP_TOOBIG 1 -#define SNMP_NOSUCHNAME 2 -#define SNMP_BADVALUE 3 -#define SNMP_READONLY 4 -#define SNMP_GENERROR 5 -#define SNMP_NOACCESS 6 -#define SNMP_WRONGTYPE 7 -#define SNMP_WRONGLENGTH 8 -#define SNMP_WRONGENCODING 9 -#define SNMP_WRONGVALUE 10 -#define SNMP_NOCREATION 11 -#define SNMP_INCONSISTENTVALUE 12 -#define SNMP_RESOURCEUNAVAILABLE 13 -#define SNMP_COMMITFAILED 14 -#define SNMP_UNDOFAILED 15 -#define SNMP_AUTHORIZATIONERROR 16 -#define SNMP_NOTWRITABLE 17 -#define SNMP_INCONSISTENTNAME 18 - -/* General SNMP V1 Traps */ -#define SNMP_TRAP_COLDSTART 0 -#define SNMP_TRAP_WARMSTART 1 -#define SNMP_TRAP_LINKDOWN 2 -#define SNMP_TRAP_LINKUP 3 -#define SNMP_TRAP_AUTFAILURE 4 -#define SNMP_TRAP_EQPNEIGHBORLOSS 5 -#define SNMP_TRAP_ENTSPECIFIC 6 - -/* SNMPv1 Types */ -#define SNMP_NULL 0 -#define SNMP_INTEGER 1 /* l */ -#define SNMP_OCTETSTR 2 /* c */ -#define SNMP_DISPLAYSTR 2 /* c */ -#define SNMP_OBJECTID 3 /* ul */ -#define SNMP_IPADDR 4 /* uc */ -#define SNMP_COUNTER 5 /* ul */ -#define SNMP_GAUGE 6 /* ul */ -#define SNMP_TIMETICKS 7 /* ul */ -#define SNMP_OPAQUE 8 /* c */ - -/* Additional SNMPv2 Types */ -#define SNMP_UINTEGER 5 /* ul */ -#define SNMP_BITSTR 9 /* uc */ -#define SNMP_NSAP 10 /* uc */ -#define SNMP_COUNTER64 11 /* ul */ -#define SNMP_NOSUCHOBJECT 12 -#define SNMP_NOSUCHINSTANCE 13 -#define SNMP_ENDOFMIBVIEW 14 - -union snmp_syntax -{ - unsigned char uc[0]; /* 8 bit unsigned */ - char c[0]; /* 8 bit signed */ - unsigned long ul[0]; /* 32 bit unsigned */ - long l[0]; /* 32 bit signed */ -}; - -struct snmp_object -{ - unsigned long *id; - unsigned int id_len; - unsigned short type; - unsigned int syntax_len; - union snmp_syntax syntax; -}; - -struct snmp_request -{ - unsigned long id; - unsigned int error_status; - unsigned int error_index; -}; - -struct snmp_v1_trap -{ - unsigned long *id; - unsigned int id_len; - unsigned long ip_address; /* pointer */ - unsigned int general; - unsigned int specific; - unsigned long time; -}; - -/* SNMP types */ -#define SNMP_IPA 0 -#define SNMP_CNT 1 -#define SNMP_GGE 2 -#define SNMP_TIT 3 -#define SNMP_OPQ 4 -#define SNMP_C64 6 - -/* SNMP errors */ -#define SERR_NSO 0 -#define SERR_NSI 1 -#define SERR_EOM 2 - -static inline void mangle_address(unsigned char *begin, - unsigned char *addr, - const struct oct1_map *map, - __sum16 *check); -struct snmp_cnv -{ - unsigned int class; - unsigned int tag; - int syntax; -}; - -static const struct snmp_cnv snmp_conv[] = { - {ASN1_UNI, ASN1_NUL, SNMP_NULL}, - {ASN1_UNI, ASN1_INT, SNMP_INTEGER}, - {ASN1_UNI, ASN1_OTS, SNMP_OCTETSTR}, - {ASN1_UNI, ASN1_OTS, SNMP_DISPLAYSTR}, - {ASN1_UNI, ASN1_OJI, SNMP_OBJECTID}, - {ASN1_APL, SNMP_IPA, SNMP_IPADDR}, - {ASN1_APL, SNMP_CNT, SNMP_COUNTER}, /* Counter32 */ - {ASN1_APL, SNMP_GGE, SNMP_GAUGE}, /* Gauge32 == Unsigned32 */ - {ASN1_APL, SNMP_TIT, SNMP_TIMETICKS}, - {ASN1_APL, SNMP_OPQ, SNMP_OPAQUE}, - - /* SNMPv2 data types and errors */ - {ASN1_UNI, ASN1_BTS, SNMP_BITSTR}, - {ASN1_APL, SNMP_C64, SNMP_COUNTER64}, - {ASN1_CTX, SERR_NSO, SNMP_NOSUCHOBJECT}, - {ASN1_CTX, SERR_NSI, SNMP_NOSUCHINSTANCE}, - {ASN1_CTX, SERR_EOM, SNMP_ENDOFMIBVIEW}, - {0, 0, -1} -}; - -static unsigned char snmp_tag_cls2syntax(unsigned int tag, - unsigned int cls, - unsigned short *syntax) -{ - const struct snmp_cnv *cnv; - - cnv = snmp_conv; - - while (cnv->syntax != -1) { - if (cnv->tag == tag && cnv->class == cls) { - *syntax = cnv->syntax; - return 1; - } - cnv++; - } - return 0; -} - -static unsigned char snmp_object_decode(struct asn1_ctx *ctx, - struct snmp_object **obj) -{ - unsigned int cls, con, tag, len, idlen; - unsigned short type; - unsigned char *eoc, *end, *p; - unsigned long *lp, *id; - unsigned long ul; - long l; - - *obj = NULL; - id = NULL; - - if (!asn1_header_decode(ctx, &eoc, &cls, &con, &tag)) - return 0; - - if (cls != ASN1_UNI || con != ASN1_CON || tag != ASN1_SEQ) - return 0; - - if (!asn1_header_decode(ctx, &end, &cls, &con, &tag)) - return 0; - - if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_OJI) - return 0; - - if (!asn1_oid_decode(ctx, end, &id, &idlen)) - return 0; - - if (!asn1_header_decode(ctx, &end, &cls, &con, &tag)) { - kfree(id); - return 0; - } - - if (con != ASN1_PRI) { - kfree(id); - return 0; - } - - type = 0; - if (!snmp_tag_cls2syntax(tag, cls, &type)) { - kfree(id); - return 0; - } - - l = 0; - switch (type) { - case SNMP_INTEGER: - len = sizeof(long); - if (!asn1_long_decode(ctx, end, &l)) { - kfree(id); - return 0; - } - *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC); - if (*obj == NULL) { - kfree(id); - return 0; - } - (*obj)->syntax.l[0] = l; - break; - case SNMP_OCTETSTR: - case SNMP_OPAQUE: - if (!asn1_octets_decode(ctx, end, &p, &len)) { - kfree(id); - return 0; - } - *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC); - if (*obj == NULL) { - kfree(p); - kfree(id); - return 0; - } - memcpy((*obj)->syntax.c, p, len); - kfree(p); - break; - case SNMP_NULL: - case SNMP_NOSUCHOBJECT: - case SNMP_NOSUCHINSTANCE: - case SNMP_ENDOFMIBVIEW: - len = 0; - *obj = kmalloc(sizeof(struct snmp_object), GFP_ATOMIC); - if (*obj == NULL) { - kfree(id); - return 0; - } - if (!asn1_null_decode(ctx, end)) { - kfree(id); - kfree(*obj); - *obj = NULL; - return 0; - } - break; - case SNMP_OBJECTID: - if (!asn1_oid_decode(ctx, end, &lp, &len)) { - kfree(id); - return 0; - } - len *= sizeof(unsigned long); - *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC); - if (*obj == NULL) { - kfree(lp); - kfree(id); - return 0; - } - memcpy((*obj)->syntax.ul, lp, len); - kfree(lp); - break; - case SNMP_IPADDR: - if (!asn1_octets_decode(ctx, end, &p, &len)) { - kfree(id); - return 0; - } - if (len != 4) { - kfree(p); - kfree(id); - return 0; - } - *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC); - if (*obj == NULL) { - kfree(p); - kfree(id); - return 0; - } - memcpy((*obj)->syntax.uc, p, len); - kfree(p); - break; - case SNMP_COUNTER: - case SNMP_GAUGE: - case SNMP_TIMETICKS: - len = sizeof(unsigned long); - if (!asn1_ulong_decode(ctx, end, &ul)) { - kfree(id); - return 0; - } - *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC); - if (*obj == NULL) { - kfree(id); - return 0; - } - (*obj)->syntax.ul[0] = ul; - break; - default: - kfree(id); - return 0; - } - - (*obj)->syntax_len = len; - (*obj)->type = type; - (*obj)->id = id; - (*obj)->id_len = idlen; - - if (!asn1_eoc_decode(ctx, eoc)) { - kfree(id); - kfree(*obj); - *obj = NULL; - return 0; - } - return 1; -} - -static unsigned char noinline_for_stack -snmp_request_decode(struct asn1_ctx *ctx, struct snmp_request *request) -{ - unsigned int cls, con, tag; - unsigned char *end; - - if (!asn1_header_decode(ctx, &end, &cls, &con, &tag)) - return 0; - - if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_INT) - return 0; - - if (!asn1_ulong_decode(ctx, end, &request->id)) - return 0; - - if (!asn1_header_decode(ctx, &end, &cls, &con, &tag)) - return 0; - - if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_INT) - return 0; - - if (!asn1_uint_decode(ctx, end, &request->error_status)) - return 0; - - if (!asn1_header_decode(ctx, &end, &cls, &con, &tag)) - return 0; - - if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_INT) - return 0; - - if (!asn1_uint_decode(ctx, end, &request->error_index)) - return 0; - - return 1; -} - -/* - * Fast checksum update for possibly oddly-aligned UDP byte, from the - * code example in the draft. - */ -static void fast_csum(__sum16 *csum, - const unsigned char *optr, - const unsigned char *nptr, - int offset) -{ - unsigned char s[4]; - - if (offset & 1) { - s[0] = ~0; - s[1] = ~*optr; - s[2] = 0; - s[3] = *nptr; - } else { - s[0] = ~*optr; - s[1] = ~0; - s[2] = *nptr; - s[3] = 0; - } - - *csum = csum_fold(csum_partial(s, 4, ~csum_unfold(*csum))); -} - -/* - * Mangle IP address. - * - begin points to the start of the snmp messgae - * - addr points to the start of the address - */ -static inline void mangle_address(unsigned char *begin, - unsigned char *addr, - const struct oct1_map *map, - __sum16 *check) -{ - if (map->from == NOCT1(addr)) { - u_int32_t old; - - if (debug) - memcpy(&old, addr, sizeof(old)); - - *addr = map->to; - - /* Update UDP checksum if being used */ - if (*check) { - fast_csum(check, - &map->from, &map->to, addr - begin); - - } - - if (debug) - printk(KERN_DEBUG "bsalg: mapped %pI4 to %pI4\n", - &old, addr); - } -} - -static unsigned char noinline_for_stack -snmp_trap_decode(struct asn1_ctx *ctx, struct snmp_v1_trap *trap, - const struct oct1_map *map, - __sum16 *check) -{ - unsigned int cls, con, tag, len; - unsigned char *end; - - if (!asn1_header_decode(ctx, &end, &cls, &con, &tag)) - return 0; - - if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_OJI) - return 0; - - if (!asn1_oid_decode(ctx, end, &trap->id, &trap->id_len)) - return 0; - - if (!asn1_header_decode(ctx, &end, &cls, &con, &tag)) - goto err_id_free; - - if (!((cls == ASN1_APL && con == ASN1_PRI && tag == SNMP_IPA) || - (cls == ASN1_UNI && con == ASN1_PRI && tag == ASN1_OTS))) - goto err_id_free; - - if (!asn1_octets_decode(ctx, end, (unsigned char **)&trap->ip_address, &len)) - goto err_id_free; - - /* IPv4 only */ - if (len != 4) - goto err_addr_free; - - mangle_address(ctx->begin, ctx->pointer - 4, map, check); - - if (!asn1_header_decode(ctx, &end, &cls, &con, &tag)) - goto err_addr_free; - - if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_INT) - goto err_addr_free; - - if (!asn1_uint_decode(ctx, end, &trap->general)) - goto err_addr_free; - - if (!asn1_header_decode(ctx, &end, &cls, &con, &tag)) - goto err_addr_free; - - if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_INT) - goto err_addr_free; - - if (!asn1_uint_decode(ctx, end, &trap->specific)) - goto err_addr_free; - - if (!asn1_header_decode(ctx, &end, &cls, &con, &tag)) - goto err_addr_free; - - if (!((cls == ASN1_APL && con == ASN1_PRI && tag == SNMP_TIT) || - (cls == ASN1_UNI && con == ASN1_PRI && tag == ASN1_INT))) - goto err_addr_free; - - if (!asn1_ulong_decode(ctx, end, &trap->time)) - goto err_addr_free; - - return 1; - -err_addr_free: - kfree((unsigned long *)trap->ip_address); - -err_id_free: - kfree(trap->id); - - return 0; -} - -/***************************************************************************** - * - * Misc. routines - * - *****************************************************************************/ - -/* - * Parse and mangle SNMP message according to mapping. - * (And this is the fucking 'basic' method). - */ -static int snmp_parse_mangle(unsigned char *msg, - u_int16_t len, - const struct oct1_map *map, - __sum16 *check) -{ - unsigned char *eoc, *end; - unsigned int cls, con, tag, vers, pdutype; - struct asn1_ctx ctx; - struct asn1_octstr comm; - struct snmp_object *obj; - - if (debug > 1) - print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_NONE, 16, 1, - msg, len, 0); - - asn1_open(&ctx, msg, len); - - /* - * Start of SNMP message. - */ - if (!asn1_header_decode(&ctx, &eoc, &cls, &con, &tag)) - return 0; - if (cls != ASN1_UNI || con != ASN1_CON || tag != ASN1_SEQ) - return 0; - - /* - * Version 1 or 2 handled. - */ - if (!asn1_header_decode(&ctx, &end, &cls, &con, &tag)) - return 0; - if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_INT) - return 0; - if (!asn1_uint_decode (&ctx, end, &vers)) - return 0; - if (debug > 1) - pr_debug("bsalg: snmp version: %u\n", vers + 1); - if (vers > 1) - return 1; - - /* - * Community. - */ - if (!asn1_header_decode (&ctx, &end, &cls, &con, &tag)) - return 0; - if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_OTS) - return 0; - if (!asn1_octets_decode(&ctx, end, &comm.data, &comm.len)) - return 0; - if (debug > 1) { - unsigned int i; - - pr_debug("bsalg: community: "); - for (i = 0; i < comm.len; i++) - pr_cont("%c", comm.data[i]); - pr_cont("\n"); - } - kfree(comm.data); - - /* - * PDU type - */ - if (!asn1_header_decode(&ctx, &eoc, &cls, &con, &pdutype)) - return 0; - if (cls != ASN1_CTX || con != ASN1_CON) - return 0; - if (debug > 1) { - static const unsigned char *const pdus[] = { - [SNMP_PDU_GET] = "get", - [SNMP_PDU_NEXT] = "get-next", - [SNMP_PDU_RESPONSE] = "response", - [SNMP_PDU_SET] = "set", - [SNMP_PDU_TRAP1] = "trapv1", - [SNMP_PDU_BULK] = "bulk", - [SNMP_PDU_INFORM] = "inform", - [SNMP_PDU_TRAP2] = "trapv2" - }; - - if (pdutype > SNMP_PDU_TRAP2) - pr_debug("bsalg: bad pdu type %u\n", pdutype); - else - pr_debug("bsalg: pdu: %s\n", pdus[pdutype]); - } - if (pdutype != SNMP_PDU_RESPONSE && - pdutype != SNMP_PDU_TRAP1 && pdutype != SNMP_PDU_TRAP2) - return 1; - - /* - * Request header or v1 trap - */ - if (pdutype == SNMP_PDU_TRAP1) { - struct snmp_v1_trap trap; - unsigned char ret = snmp_trap_decode(&ctx, &trap, map, check); - - if (ret) { - kfree(trap.id); - kfree((unsigned long *)trap.ip_address); - } else - return ret; - - } else { - struct snmp_request req; - - if (!snmp_request_decode(&ctx, &req)) - return 0; - - if (debug > 1) - pr_debug("bsalg: request: id=0x%lx error_status=%u " - "error_index=%u\n", req.id, req.error_status, - req.error_index); - } - - /* - * Loop through objects, look for IP addresses to mangle. - */ - if (!asn1_header_decode(&ctx, &eoc, &cls, &con, &tag)) - return 0; - - if (cls != ASN1_UNI || con != ASN1_CON || tag != ASN1_SEQ) - return 0; - - while (!asn1_eoc_decode(&ctx, eoc)) { - unsigned int i; - - if (!snmp_object_decode(&ctx, &obj)) { - if (obj) { - kfree(obj->id); - kfree(obj); - } - return 0; - } - - if (debug > 1) { - pr_debug("bsalg: object: "); - for (i = 0; i < obj->id_len; i++) { - if (i > 0) - pr_cont("."); - pr_cont("%lu", obj->id[i]); - } - pr_cont(": type=%u\n", obj->type); - - } - - if (obj->type == SNMP_IPADDR) - mangle_address(ctx.begin, ctx.pointer - 4, map, check); - - kfree(obj->id); - kfree(obj); - } - - if (!asn1_eoc_decode(&ctx, eoc)) - return 0; - - return 1; -} - -/***************************************************************************** - * - * NAT routines. - * - *****************************************************************************/ - -/* - * SNMP translation routine. - */ -static int snmp_translate(struct nf_conn *ct, - enum ip_conntrack_info ctinfo, - struct sk_buff *skb) -{ - struct iphdr *iph = ip_hdr(skb); - struct udphdr *udph = (struct udphdr *)((__be32 *)iph + iph->ihl); - u_int16_t udplen = ntohs(udph->len); - u_int16_t paylen = udplen - sizeof(struct udphdr); - int dir = CTINFO2DIR(ctinfo); - struct oct1_map map; - - /* - * Determine mappping for application layer addresses based - * on NAT manipulations for the packet. - */ - if (dir == IP_CT_DIR_ORIGINAL) { - /* SNAT traps */ - map.from = NOCT1(&ct->tuplehash[dir].tuple.src.u3.ip); - map.to = NOCT1(&ct->tuplehash[!dir].tuple.dst.u3.ip); - } else { - /* DNAT replies */ - map.from = NOCT1(&ct->tuplehash[!dir].tuple.src.u3.ip); - map.to = NOCT1(&ct->tuplehash[dir].tuple.dst.u3.ip); - } - - if (map.from == map.to) - return NF_ACCEPT; - - if (!snmp_parse_mangle((unsigned char *)udph + sizeof(struct udphdr), - paylen, &map, &udph->check)) { - net_warn_ratelimited("bsalg: parser failed\n"); - return NF_DROP; - } - return NF_ACCEPT; -} - -/* We don't actually set up expectations, just adjust internal IP - * addresses if this is being NATted */ -static int help(struct sk_buff *skb, unsigned int protoff, - struct nf_conn *ct, - enum ip_conntrack_info ctinfo) -{ - int dir = CTINFO2DIR(ctinfo); - unsigned int ret; - const struct iphdr *iph = ip_hdr(skb); - const struct udphdr *udph = (struct udphdr *)((__be32 *)iph + iph->ihl); - - /* SNMP replies and originating SNMP traps get mangled */ - if (udph->source == htons(SNMP_PORT) && dir != IP_CT_DIR_REPLY) - return NF_ACCEPT; - if (udph->dest == htons(SNMP_TRAP_PORT) && dir != IP_CT_DIR_ORIGINAL) - return NF_ACCEPT; - - /* No NAT? */ - if (!(ct->status & IPS_NAT_MASK)) - return NF_ACCEPT; - - /* - * Make sure the packet length is ok. So far, we were only guaranteed - * to have a valid length IP header plus 8 bytes, which means we have - * enough room for a UDP header. Just verify the UDP length field so we - * can mess around with the payload. - */ - if (ntohs(udph->len) != skb->len - (iph->ihl << 2)) { - net_warn_ratelimited("SNMP: dropping malformed packet src=%pI4 dst=%pI4\n", - &iph->saddr, &iph->daddr); - return NF_DROP; - } - - if (!skb_make_writable(skb, skb->len)) - return NF_DROP; - - spin_lock_bh(&snmp_lock); - ret = snmp_translate(ct, ctinfo, skb); - spin_unlock_bh(&snmp_lock); - return ret; -} - -static const struct nf_conntrack_expect_policy snmp_exp_policy = { - .max_expected = 0, - .timeout = 180, -}; - -static struct nf_conntrack_helper snmp_trap_helper __read_mostly = { - .me = THIS_MODULE, - .help = help, - .expect_policy = &snmp_exp_policy, - .name = "snmp_trap", - .tuple.src.l3num = AF_INET, - .tuple.src.u.udp.port = cpu_to_be16(SNMP_TRAP_PORT), - .tuple.dst.protonum = IPPROTO_UDP, -}; - -/***************************************************************************** - * - * Module stuff. - * - *****************************************************************************/ - -static int __init nf_nat_snmp_basic_init(void) -{ - BUG_ON(nf_nat_snmp_hook != NULL); - RCU_INIT_POINTER(nf_nat_snmp_hook, help); - - return nf_conntrack_helper_register(&snmp_trap_helper); -} - -static void __exit nf_nat_snmp_basic_fini(void) -{ - RCU_INIT_POINTER(nf_nat_snmp_hook, NULL); - synchronize_rcu(); - nf_conntrack_helper_unregister(&snmp_trap_helper); -} - -module_init(nf_nat_snmp_basic_init); -module_exit(nf_nat_snmp_basic_fini); - -module_param(debug, int, 0600); diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic_main.c b/net/ipv4/netfilter/nf_nat_snmp_basic_main.c new file mode 100644 index 000000000000..b6e277093e7e --- /dev/null +++ b/net/ipv4/netfilter/nf_nat_snmp_basic_main.c @@ -0,0 +1,235 @@ +/* + * nf_nat_snmp_basic.c + * + * Basic SNMP Application Layer Gateway + * + * This IP NAT module is intended for use with SNMP network + * discovery and monitoring applications where target networks use + * conflicting private address realms. + * + * Static NAT is used to remap the networks from the view of the network + * management system at the IP layer, and this module remaps some application + * layer addresses to match. + * + * The simplest form of ALG is performed, where only tagged IP addresses + * are modified. The module does not need to be MIB aware and only scans + * messages at the ASN.1/BER level. + * + * Currently, only SNMPv1 and SNMPv2 are supported. + * + * More information on ALG and associated issues can be found in + * RFC 2962 + * + * The ASB.1/BER parsing code is derived from the gxsnmp package by Gregory + * McLean & Jochen Friedrich, stripped down for use in the kernel. + * + * Copyright (c) 2000 RP Internet (www.rpi.net.au). + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + * + * Author: James Morris <jmorris@intercode.com.au> + * + * Copyright (c) 2006-2010 Patrick McHardy <kaber@trash.net> + */ +#include <linux/module.h> +#include <linux/moduleparam.h> +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/in.h> +#include <linux/ip.h> +#include <linux/udp.h> +#include <net/checksum.h> +#include <net/udp.h> + +#include <net/netfilter/nf_nat.h> +#include <net/netfilter/nf_conntrack_expect.h> +#include <net/netfilter/nf_conntrack_helper.h> +#include <linux/netfilter/nf_conntrack_snmp.h> +#include "nf_nat_snmp_basic-asn1.h" + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>"); +MODULE_DESCRIPTION("Basic SNMP Application Layer Gateway"); +MODULE_ALIAS("ip_nat_snmp_basic"); + +#define SNMP_PORT 161 +#define SNMP_TRAP_PORT 162 + +static DEFINE_SPINLOCK(snmp_lock); + +struct snmp_ctx { + unsigned char *begin; + __sum16 *check; + __be32 from; + __be32 to; +}; + +static void fast_csum(struct snmp_ctx *ctx, unsigned char offset) +{ + unsigned char s[12] = {0,}; + int size; + + if (offset & 1) { + memcpy(&s[1], &ctx->from, 4); + memcpy(&s[7], &ctx->to, 4); + s[0] = ~0; + s[1] = ~s[1]; + s[2] = ~s[2]; + s[3] = ~s[3]; + s[4] = ~s[4]; + s[5] = ~0; + size = 12; + } else { + memcpy(&s[0], &ctx->from, 4); + memcpy(&s[4], &ctx->to, 4); + s[0] = ~s[0]; + s[1] = ~s[1]; + s[2] = ~s[2]; + s[3] = ~s[3]; + size = 8; + } + *ctx->check = csum_fold(csum_partial(s, size, + ~csum_unfold(*ctx->check))); +} + +int snmp_version(void *context, size_t hdrlen, unsigned char tag, + const void *data, size_t datalen) +{ + if (*(unsigned char *)data > 1) + return -ENOTSUPP; + return 1; +} + +int snmp_helper(void *context, size_t hdrlen, unsigned char tag, + const void *data, size_t datalen) +{ + struct snmp_ctx *ctx = (struct snmp_ctx *)context; + __be32 *pdata = (__be32 *)data; + + if (*pdata == ctx->from) { + pr_debug("%s: %pI4 to %pI4\n", __func__, + (void *)&ctx->from, (void *)&ctx->to); + + if (*ctx->check) + fast_csum(ctx, (unsigned char *)data - ctx->begin); + *pdata = ctx->to; + } + + return 1; +} + +static int snmp_translate(struct nf_conn *ct, int dir, struct sk_buff *skb) +{ + struct iphdr *iph = ip_hdr(skb); + struct udphdr *udph = (struct udphdr *)((__be32 *)iph + iph->ihl); + u16 datalen = ntohs(udph->len) - sizeof(struct udphdr); + char *data = (unsigned char *)udph + sizeof(struct udphdr); + struct snmp_ctx ctx; + int ret; + + if (dir == IP_CT_DIR_ORIGINAL) { + ctx.from = ct->tuplehash[dir].tuple.src.u3.ip; + ctx.to = ct->tuplehash[!dir].tuple.dst.u3.ip; + } else { + ctx.from = ct->tuplehash[!dir].tuple.src.u3.ip; + ctx.to = ct->tuplehash[dir].tuple.dst.u3.ip; + } + + if (ctx.from == ctx.to) + return NF_ACCEPT; + + ctx.begin = (unsigned char *)udph + sizeof(struct udphdr); + ctx.check = &udph->check; + ret = asn1_ber_decoder(&nf_nat_snmp_basic_decoder, &ctx, data, datalen); + if (ret < 0) { + nf_ct_helper_log(skb, ct, "parser failed\n"); + return NF_DROP; + } + + return NF_ACCEPT; +} + +/* We don't actually set up expectations, just adjust internal IP + * addresses if this is being NATted + */ +static int help(struct sk_buff *skb, unsigned int protoff, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo) +{ + int dir = CTINFO2DIR(ctinfo); + unsigned int ret; + const struct iphdr *iph = ip_hdr(skb); + const struct udphdr *udph = (struct udphdr *)((__be32 *)iph + iph->ihl); + + /* SNMP replies and originating SNMP traps get mangled */ + if (udph->source == htons(SNMP_PORT) && dir != IP_CT_DIR_REPLY) + return NF_ACCEPT; + if (udph->dest == htons(SNMP_TRAP_PORT) && dir != IP_CT_DIR_ORIGINAL) + return NF_ACCEPT; + + /* No NAT? */ + if (!(ct->status & IPS_NAT_MASK)) + return NF_ACCEPT; + + /* Make sure the packet length is ok. So far, we were only guaranteed + * to have a valid length IP header plus 8 bytes, which means we have + * enough room for a UDP header. Just verify the UDP length field so we + * can mess around with the payload. + */ + if (ntohs(udph->len) != skb->len - (iph->ihl << 2)) { + nf_ct_helper_log(skb, ct, "dropping malformed packet\n"); + return NF_DROP; + } + + if (!skb_make_writable(skb, skb->len)) { + nf_ct_helper_log(skb, ct, "cannot mangle packet"); + return NF_DROP; + } + + spin_lock_bh(&snmp_lock); + ret = snmp_translate(ct, dir, skb); + spin_unlock_bh(&snmp_lock); + return ret; +} + +static const struct nf_conntrack_expect_policy snmp_exp_policy = { + .max_expected = 0, + .timeout = 180, +}; + +static struct nf_conntrack_helper snmp_trap_helper __read_mostly = { + .me = THIS_MODULE, + .help = help, + .expect_policy = &snmp_exp_policy, + .name = "snmp_trap", + .tuple.src.l3num = AF_INET, + .tuple.src.u.udp.port = cpu_to_be16(SNMP_TRAP_PORT), + .tuple.dst.protonum = IPPROTO_UDP, +}; + +static int __init nf_nat_snmp_basic_init(void) +{ + BUG_ON(nf_nat_snmp_hook != NULL); + RCU_INIT_POINTER(nf_nat_snmp_hook, help); + + return nf_conntrack_helper_register(&snmp_trap_helper); +} + +static void __exit nf_nat_snmp_basic_fini(void) +{ + RCU_INIT_POINTER(nf_nat_snmp_hook, NULL); + synchronize_rcu(); + nf_conntrack_helper_unregister(&snmp_trap_helper); +} + +module_init(nf_nat_snmp_basic_init); +module_exit(nf_nat_snmp_basic_fini); diff --git a/net/ipv4/netfilter/nf_tables_arp.c b/net/ipv4/netfilter/nf_tables_arp.c index 4bbc273b45e8..036c074736b0 100644 --- a/net/ipv4/netfilter/nf_tables_arp.c +++ b/net/ipv4/netfilter/nf_tables_arp.c @@ -21,51 +21,12 @@ nft_do_chain_arp(void *priv, { struct nft_pktinfo pkt; - nft_set_pktinfo_unspec(&pkt, skb, state); + nft_set_pktinfo(&pkt, skb, state); + nft_set_pktinfo_unspec(&pkt, skb); return nft_do_chain(&pkt, priv); } -static struct nft_af_info nft_af_arp __read_mostly = { - .family = NFPROTO_ARP, - .nhooks = NF_ARP_NUMHOOKS, - .owner = THIS_MODULE, - .nops = 1, - .hooks = { - [NF_ARP_IN] = nft_do_chain_arp, - [NF_ARP_OUT] = nft_do_chain_arp, - [NF_ARP_FORWARD] = nft_do_chain_arp, - }, -}; - -static int nf_tables_arp_init_net(struct net *net) -{ - net->nft.arp = kmalloc(sizeof(struct nft_af_info), GFP_KERNEL); - if (net->nft.arp== NULL) - return -ENOMEM; - - memcpy(net->nft.arp, &nft_af_arp, sizeof(nft_af_arp)); - - if (nft_register_afinfo(net, net->nft.arp) < 0) - goto err; - - return 0; -err: - kfree(net->nft.arp); - return -ENOMEM; -} - -static void nf_tables_arp_exit_net(struct net *net) -{ - nft_unregister_afinfo(net, net->nft.arp); - kfree(net->nft.arp); -} - -static struct pernet_operations nf_tables_arp_net_ops = { - .init = nf_tables_arp_init_net, - .exit = nf_tables_arp_exit_net, -}; - static const struct nf_chain_type filter_arp = { .name = "filter", .type = NFT_CHAIN_T_DEFAULT, @@ -73,26 +34,19 @@ static const struct nf_chain_type filter_arp = { .owner = THIS_MODULE, .hook_mask = (1 << NF_ARP_IN) | (1 << NF_ARP_OUT), + .hooks = { + [NF_ARP_IN] = nft_do_chain_arp, + [NF_ARP_OUT] = nft_do_chain_arp, + }, }; static int __init nf_tables_arp_init(void) { - int ret; - - ret = nft_register_chain_type(&filter_arp); - if (ret < 0) - return ret; - - ret = register_pernet_subsys(&nf_tables_arp_net_ops); - if (ret < 0) - nft_unregister_chain_type(&filter_arp); - - return ret; + return nft_register_chain_type(&filter_arp); } static void __exit nf_tables_arp_exit(void) { - unregister_pernet_subsys(&nf_tables_arp_net_ops); nft_unregister_chain_type(&filter_arp); } @@ -101,4 +55,4 @@ module_exit(nf_tables_arp_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); -MODULE_ALIAS_NFT_FAMILY(3); /* NFPROTO_ARP */ +MODULE_ALIAS_NFT_CHAIN(3, "filter"); /* NFPROTO_ARP */ diff --git a/net/ipv4/netfilter/nf_tables_ipv4.c b/net/ipv4/netfilter/nf_tables_ipv4.c index 2840a29b2e04..96f955496d5f 100644 --- a/net/ipv4/netfilter/nf_tables_ipv4.c +++ b/net/ipv4/netfilter/nf_tables_ipv4.c @@ -24,69 +24,12 @@ static unsigned int nft_do_chain_ipv4(void *priv, { struct nft_pktinfo pkt; - nft_set_pktinfo_ipv4(&pkt, skb, state); + nft_set_pktinfo(&pkt, skb, state); + nft_set_pktinfo_ipv4(&pkt, skb); return nft_do_chain(&pkt, priv); } -static unsigned int nft_ipv4_output(void *priv, - struct sk_buff *skb, - const struct nf_hook_state *state) -{ - if (unlikely(skb->len < sizeof(struct iphdr) || - ip_hdr(skb)->ihl < sizeof(struct iphdr) / 4)) { - if (net_ratelimit()) - pr_info("nf_tables_ipv4: ignoring short SOCK_RAW " - "packet\n"); - return NF_ACCEPT; - } - - return nft_do_chain_ipv4(priv, skb, state); -} - -struct nft_af_info nft_af_ipv4 __read_mostly = { - .family = NFPROTO_IPV4, - .nhooks = NF_INET_NUMHOOKS, - .owner = THIS_MODULE, - .nops = 1, - .hooks = { - [NF_INET_LOCAL_IN] = nft_do_chain_ipv4, - [NF_INET_LOCAL_OUT] = nft_ipv4_output, - [NF_INET_FORWARD] = nft_do_chain_ipv4, - [NF_INET_PRE_ROUTING] = nft_do_chain_ipv4, - [NF_INET_POST_ROUTING] = nft_do_chain_ipv4, - }, -}; -EXPORT_SYMBOL_GPL(nft_af_ipv4); - -static int nf_tables_ipv4_init_net(struct net *net) -{ - net->nft.ipv4 = kmalloc(sizeof(struct nft_af_info), GFP_KERNEL); - if (net->nft.ipv4 == NULL) - return -ENOMEM; - - memcpy(net->nft.ipv4, &nft_af_ipv4, sizeof(nft_af_ipv4)); - - if (nft_register_afinfo(net, net->nft.ipv4) < 0) - goto err; - - return 0; -err: - kfree(net->nft.ipv4); - return -ENOMEM; -} - -static void nf_tables_ipv4_exit_net(struct net *net) -{ - nft_unregister_afinfo(net, net->nft.ipv4); - kfree(net->nft.ipv4); -} - -static struct pernet_operations nf_tables_ipv4_net_ops = { - .init = nf_tables_ipv4_init_net, - .exit = nf_tables_ipv4_exit_net, -}; - static const struct nf_chain_type filter_ipv4 = { .name = "filter", .type = NFT_CHAIN_T_DEFAULT, @@ -97,26 +40,22 @@ static const struct nf_chain_type filter_ipv4 = { (1 << NF_INET_FORWARD) | (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_POST_ROUTING), + .hooks = { + [NF_INET_LOCAL_IN] = nft_do_chain_ipv4, + [NF_INET_LOCAL_OUT] = nft_do_chain_ipv4, + [NF_INET_FORWARD] = nft_do_chain_ipv4, + [NF_INET_PRE_ROUTING] = nft_do_chain_ipv4, + [NF_INET_POST_ROUTING] = nft_do_chain_ipv4, + }, }; static int __init nf_tables_ipv4_init(void) { - int ret; - - ret = nft_register_chain_type(&filter_ipv4); - if (ret < 0) - return ret; - - ret = register_pernet_subsys(&nf_tables_ipv4_net_ops); - if (ret < 0) - nft_unregister_chain_type(&filter_ipv4); - - return ret; + return nft_register_chain_type(&filter_ipv4); } static void __exit nf_tables_ipv4_exit(void) { - unregister_pernet_subsys(&nf_tables_ipv4_net_ops); nft_unregister_chain_type(&filter_ipv4); } @@ -125,4 +64,4 @@ module_exit(nf_tables_ipv4_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); -MODULE_ALIAS_NFT_FAMILY(AF_INET); +MODULE_ALIAS_NFT_CHAIN(AF_INET, "filter"); diff --git a/net/ipv4/netfilter/nft_chain_nat_ipv4.c b/net/ipv4/netfilter/nft_chain_nat_ipv4.c index f5c66a7a4bf2..f2a490981594 100644 --- a/net/ipv4/netfilter/nft_chain_nat_ipv4.c +++ b/net/ipv4/netfilter/nft_chain_nat_ipv4.c @@ -33,7 +33,8 @@ static unsigned int nft_nat_do_chain(void *priv, { struct nft_pktinfo pkt; - nft_set_pktinfo_ipv4(&pkt, skb, state); + nft_set_pktinfo(&pkt, skb, state); + nft_set_pktinfo_ipv4(&pkt, skb); return nft_do_chain(&pkt, priv); } diff --git a/net/ipv4/netfilter/nft_chain_route_ipv4.c b/net/ipv4/netfilter/nft_chain_route_ipv4.c index 30493beb611a..d965c225b9f6 100644 --- a/net/ipv4/netfilter/nft_chain_route_ipv4.c +++ b/net/ipv4/netfilter/nft_chain_route_ipv4.c @@ -33,12 +33,8 @@ static unsigned int nf_route_table_hook(void *priv, const struct iphdr *iph; int err; - /* root is playing with raw sockets. */ - if (skb->len < sizeof(struct iphdr) || - ip_hdrlen(skb) < sizeof(struct iphdr)) - return NF_ACCEPT; - - nft_set_pktinfo_ipv4(&pkt, skb, state); + nft_set_pktinfo(&pkt, skb, state); + nft_set_pktinfo_ipv4(&pkt, skb); mark = skb->mark; iph = ip_hdr(skb); diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 9f37c4727861..dc5edc8f7564 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -83,7 +83,6 @@ static int sockstat_seq_open(struct inode *inode, struct file *file) } static const struct file_operations sockstat_seq_fops = { - .owner = THIS_MODULE, .open = sockstat_seq_open, .read = seq_read, .llseek = seq_lseek, @@ -467,7 +466,6 @@ static int snmp_seq_open(struct inode *inode, struct file *file) } static const struct file_operations snmp_seq_fops = { - .owner = THIS_MODULE, .open = snmp_seq_open, .read = seq_read, .llseek = seq_lseek, @@ -515,7 +513,6 @@ static int netstat_seq_open(struct inode *inode, struct file *file) } static const struct file_operations netstat_seq_fops = { - .owner = THIS_MODULE, .open = netstat_seq_open, .read = seq_read, .llseek = seq_lseek, diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 5e570aa9e43b..7c509697ebc7 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -617,8 +617,21 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) ipc.oif = inet->mc_index; if (!saddr) saddr = inet->mc_addr; - } else if (!ipc.oif) + } else if (!ipc.oif) { ipc.oif = inet->uc_index; + } else if (ipv4_is_lbcast(daddr) && inet->uc_index) { + /* oif is set, packet is to local broadcast and + * and uc_index is set. oif is most likely set + * by sk_bound_dev_if. If uc_index != oif check if the + * oif is an L3 master and uc_index is an L3 slave. + * If so, we want to allow the send using the uc_index. + */ + if (ipc.oif != inet->uc_index && + ipc.oif == l3mdev_master_ifindex_by_index(sock_net(sk), + inet->uc_index)) { + ipc.oif = inet->uc_index; + } + } flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos, RT_SCOPE_UNIVERSE, @@ -1119,7 +1132,6 @@ static int raw_v4_seq_open(struct inode *inode, struct file *file) } static const struct file_operations raw_seq_fops = { - .owner = THIS_MODULE, .open = raw_v4_seq_open, .read = seq_read, .llseek = seq_lseek, diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 4e153b23bcec..49cc1c1df1ba 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -240,7 +240,6 @@ static int rt_cache_seq_open(struct inode *inode, struct file *file) } static const struct file_operations rt_cache_seq_fops = { - .owner = THIS_MODULE, .open = rt_cache_seq_open, .read = seq_read, .llseek = seq_lseek, @@ -331,7 +330,6 @@ static int rt_cpu_seq_open(struct inode *inode, struct file *file) } static const struct file_operations rt_cpu_seq_fops = { - .owner = THIS_MODULE, .open = rt_cpu_seq_open, .read = seq_read, .llseek = seq_lseek, @@ -369,7 +367,6 @@ static int rt_acct_proc_open(struct inode *inode, struct file *file) } static const struct file_operations rt_acct_proc_fops = { - .owner = THIS_MODULE, .open = rt_acct_proc_open, .read = seq_read, .llseek = seq_lseek, @@ -1106,7 +1103,7 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu) new = true; } - __ip_rt_update_pmtu((struct rtable *) rt->dst.path, &fl4, mtu); + __ip_rt_update_pmtu((struct rtable *) xfrm_dst_path(&rt->dst), &fl4, mtu); if (!dst_check(&rt->dst, 0)) { if (new) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 1b38b4282cc9..c059aa7df0a9 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -283,8 +283,6 @@ #include <asm/ioctls.h> #include <net/busy_poll.h> -#include <trace/events/tcp.h> - struct percpu_counter tcp_orphan_count; EXPORT_SYMBOL_GPL(tcp_orphan_count); @@ -465,7 +463,7 @@ void tcp_init_transfer(struct sock *sk, int bpf_op) tcp_mtup_init(sk); icsk->icsk_af_ops->rebuild_header(sk); tcp_init_metrics(sk); - tcp_call_bpf(sk, bpf_op); + tcp_call_bpf(sk, bpf_op, 0, NULL); tcp_init_congestion_control(sk); tcp_init_buffer_space(sk); } @@ -500,11 +498,9 @@ __poll_t tcp_poll(struct file *file, struct socket *sock, poll_table *wait) const struct tcp_sock *tp = tcp_sk(sk); int state; - sock_rps_record_flow(sk); - sock_poll_wait(file, sk_sleep(sk), wait); - state = sk_state_load(sk); + state = inet_sk_state_load(sk); if (state == TCP_LISTEN) return inet_csk_listen_poll(sk); @@ -1106,12 +1102,15 @@ static int linear_payload_sz(bool first_skb) return 0; } -static int select_size(const struct sock *sk, bool sg, bool first_skb) +static int select_size(const struct sock *sk, bool sg, bool first_skb, bool zc) { const struct tcp_sock *tp = tcp_sk(sk); int tmp = tp->mss_cache; if (sg) { + if (zc) + return 0; + if (sk_can_gso(sk)) { tmp = linear_payload_sz(first_skb); } else { @@ -1188,7 +1187,7 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size) int flags, err, copied = 0; int mss_now = 0, size_goal, copied_syn = 0; bool process_backlog = false; - bool sg; + bool sg, zc = false; long timeo; flags = msg->msg_flags; @@ -1206,7 +1205,8 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size) goto out_err; } - if (!(sk_check_csum_caps(sk) && sk->sk_route_caps & NETIF_F_SG)) + zc = sk_check_csum_caps(sk) && sk->sk_route_caps & NETIF_F_SG; + if (!zc) uarg->zerocopy = 0; } @@ -1283,6 +1283,7 @@ restart: if (copy <= 0 || !tcp_skb_can_collapse_to(skb)) { bool first_skb; + int linear; new_segment: /* Allocate new segment. If the interface is SG, @@ -1296,9 +1297,8 @@ new_segment: goto restart; } first_skb = tcp_rtx_and_write_queues_empty(sk); - skb = sk_stream_alloc_skb(sk, - select_size(sk, sg, first_skb), - sk->sk_allocation, + linear = select_size(sk, sg, first_skb, zc); + skb = sk_stream_alloc_skb(sk, linear, sk->sk_allocation, first_skb); if (!skb) goto wait_for_memory; @@ -1327,13 +1327,13 @@ new_segment: copy = msg_data_left(msg); /* Where to copy to? */ - if (skb_availroom(skb) > 0) { + if (skb_availroom(skb) > 0 && !zc) { /* We have some space in skb head. Superb! */ copy = min_t(int, copy, skb_availroom(skb)); err = skb_add_data_nocache(sk, skb, &msg->msg_iter, copy); if (err) goto do_fault; - } else if (!uarg || !uarg->zerocopy) { + } else if (!zc) { bool merge = true; int i = skb_shinfo(skb)->nr_frags; struct page_frag *pfrag = sk_page_frag(sk); @@ -1373,8 +1373,10 @@ new_segment: pfrag->offset += copy; } else { err = skb_zerocopy_iter_stream(sk, skb, msg, copy, uarg); - if (err == -EMSGSIZE || err == -EEXIST) + if (err == -EMSGSIZE || err == -EEXIST) { + tcp_mark_push(tp, skb); goto new_segment; + } if (err < 0) goto do_error; copy = err; @@ -1731,8 +1733,8 @@ static void tcp_update_recv_tstamps(struct sk_buff *skb, } /* Similar to __sock_recv_timestamp, but does not require an skb */ -void tcp_recv_timestamp(struct msghdr *msg, const struct sock *sk, - struct scm_timestamping *tss) +static void tcp_recv_timestamp(struct msghdr *msg, const struct sock *sk, + struct scm_timestamping *tss) { struct timeval tv; bool has_timestamping = false; @@ -2040,7 +2042,29 @@ void tcp_set_state(struct sock *sk, int state) { int oldstate = sk->sk_state; - trace_tcp_set_state(sk, oldstate, state); + /* We defined a new enum for TCP states that are exported in BPF + * so as not force the internal TCP states to be frozen. The + * following checks will detect if an internal state value ever + * differs from the BPF value. If this ever happens, then we will + * need to remap the internal value to the BPF value before calling + * tcp_call_bpf_2arg. + */ + BUILD_BUG_ON((int)BPF_TCP_ESTABLISHED != (int)TCP_ESTABLISHED); + BUILD_BUG_ON((int)BPF_TCP_SYN_SENT != (int)TCP_SYN_SENT); + BUILD_BUG_ON((int)BPF_TCP_SYN_RECV != (int)TCP_SYN_RECV); + BUILD_BUG_ON((int)BPF_TCP_FIN_WAIT1 != (int)TCP_FIN_WAIT1); + BUILD_BUG_ON((int)BPF_TCP_FIN_WAIT2 != (int)TCP_FIN_WAIT2); + BUILD_BUG_ON((int)BPF_TCP_TIME_WAIT != (int)TCP_TIME_WAIT); + BUILD_BUG_ON((int)BPF_TCP_CLOSE != (int)TCP_CLOSE); + BUILD_BUG_ON((int)BPF_TCP_CLOSE_WAIT != (int)TCP_CLOSE_WAIT); + BUILD_BUG_ON((int)BPF_TCP_LAST_ACK != (int)TCP_LAST_ACK); + BUILD_BUG_ON((int)BPF_TCP_LISTEN != (int)TCP_LISTEN); + BUILD_BUG_ON((int)BPF_TCP_CLOSING != (int)TCP_CLOSING); + BUILD_BUG_ON((int)BPF_TCP_NEW_SYN_RECV != (int)TCP_NEW_SYN_RECV); + BUILD_BUG_ON((int)BPF_TCP_MAX_STATES != (int)TCP_MAX_STATES); + + if (BPF_SOCK_OPS_TEST_FLAG(tcp_sk(sk), BPF_SOCK_OPS_STATE_CB_FLAG)) + tcp_call_bpf_2arg(sk, BPF_SOCK_OPS_STATE_CB, oldstate, state); switch (state) { case TCP_ESTABLISHED: @@ -2065,7 +2089,7 @@ void tcp_set_state(struct sock *sk, int state) /* Change state AFTER socket is unhashed to avoid closed * socket sitting in hash tables. */ - sk_state_store(sk, state); + inet_sk_state_store(sk, state); #ifdef STATE_TRACE SOCK_DEBUG(sk, "TCP sk=%p, State %s -> %s\n", sk, statename[oldstate], statename[state]); @@ -2434,6 +2458,12 @@ int tcp_disconnect(struct sock *sk, int flags) WARN_ON(inet->inet_num && !icsk->icsk_bind_hash); + if (sk->sk_frag.page) { + put_page(sk->sk_frag.page); + sk->sk_frag.page = NULL; + sk->sk_frag.offset = 0; + } + sk->sk_error_report(sk); return err; } @@ -2923,7 +2953,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) if (sk->sk_type != SOCK_STREAM) return; - info->tcpi_state = sk_state_load(sk); + info->tcpi_state = inet_sk_state_load(sk); /* Report meaningful fields for all TCP states, including listeners */ rate = READ_ONCE(sk->sk_pacing_rate); @@ -3581,6 +3611,9 @@ void __init tcp_init(void) percpu_counter_init(&tcp_sockets_allocated, 0, GFP_KERNEL); percpu_counter_init(&tcp_orphan_count, 0, GFP_KERNEL); inet_hashinfo_init(&tcp_hashinfo); + inet_hashinfo2_init(&tcp_hashinfo, "tcp_listen_portaddr_hash", + thash_entries, 21, /* one slot per 2 MB*/ + 0, 64 * 1024); tcp_hashinfo.bind_bucket_cachep = kmem_cache_create("tcp_bind_bucket", sizeof(struct inet_bind_bucket), 0, diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c index 8322f26e770e..785712be5b0d 100644 --- a/net/ipv4/tcp_bbr.c +++ b/net/ipv4/tcp_bbr.c @@ -766,7 +766,8 @@ static void bbr_update_min_rtt(struct sock *sk, const struct rate_sample *rs) filter_expired = after(tcp_jiffies32, bbr->min_rtt_stamp + bbr_min_rtt_win_sec * HZ); if (rs->rtt_us >= 0 && - (rs->rtt_us <= bbr->min_rtt_us || filter_expired)) { + (rs->rtt_us <= bbr->min_rtt_us || + (filter_expired && !rs->is_ack_delayed))) { bbr->min_rtt_us = rs->rtt_us; bbr->min_rtt_stamp = tcp_jiffies32; } diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c index abbf0edcf6c2..81148f7a2323 100644 --- a/net/ipv4/tcp_diag.c +++ b/net/ipv4/tcp_diag.c @@ -24,7 +24,7 @@ static void tcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r, { struct tcp_info *info = _info; - if (sk_state_load(sk) == TCP_LISTEN) { + if (inet_sk_state_load(sk) == TCP_LISTEN) { r->idiag_rqueue = sk->sk_ack_backlog; r->idiag_wqueue = sk->sk_max_ack_backlog; } else if (sk->sk_type == SOCK_STREAM) { diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index 78c192ee03a4..018a48477355 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -379,18 +379,9 @@ fastopen: bool tcp_fastopen_cookie_check(struct sock *sk, u16 *mss, struct tcp_fastopen_cookie *cookie) { - unsigned long last_syn_loss = 0; const struct dst_entry *dst; - int syn_loss = 0; - tcp_fastopen_cache_get(sk, mss, cookie, &syn_loss, &last_syn_loss); - - /* Recurring FO SYN losses: no cookie or data in SYN */ - if (syn_loss > 1 && - time_before(jiffies, last_syn_loss + (60*HZ << syn_loss))) { - cookie->len = -1; - return false; - } + tcp_fastopen_cache_get(sk, mss, cookie); /* Firewall blackhole issue check */ if (tcp_fastopen_active_should_disable(sk)) { @@ -448,6 +439,8 @@ EXPORT_SYMBOL(tcp_fastopen_defer_connect); * following circumstances: * 1. client side TFO socket receives out of order FIN * 2. client side TFO socket receives out of order RST + * 3. client side TFO socket has timed out three times consecutively during + * or after handshake * We disable active side TFO globally for 1hr at first. Then if it * happens again, we disable it for 2h, then 4h, 8h, ... * And we reset the timeout back to 1hr when we see a successful active @@ -524,3 +517,20 @@ void tcp_fastopen_active_disable_ofo_check(struct sock *sk) dst_release(dst); } } + +void tcp_fastopen_active_detect_blackhole(struct sock *sk, bool expired) +{ + u32 timeouts = inet_csk(sk)->icsk_retransmits; + struct tcp_sock *tp = tcp_sk(sk); + + /* Broken middle-boxes may black-hole Fast Open connection during or + * even after the handshake. Be extremely conservative and pause + * Fast Open globally after hitting the third consecutive timeout or + * exceeding the configured timeout limit. + */ + if ((tp->syn_fastopen || tp->syn_data || tp->syn_data_acked) && + (timeouts == 2 || (timeouts < 2 && expired))) { + tcp_fastopen_active_disable(sk); + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENACTIVEFAIL); + } +} diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 45f750e85714..cfa51cfd2d99 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -97,6 +97,7 @@ int sysctl_tcp_max_orphans __read_mostly = NR_FILE; #define FLAG_SACK_RENEGING 0x2000 /* snd_una advanced to a sacked seq */ #define FLAG_UPDATE_TS_RECENT 0x4000 /* tcp_replace_ts_recent() */ #define FLAG_NO_CHALLENGE_ACK 0x8000 /* do not call tcp_send_challenge_ack() */ +#define FLAG_ACK_MAYBE_DELAYED 0x10000 /* Likely a delayed ACK */ #define FLAG_ACKED (FLAG_DATA_ACKED|FLAG_SYN_ACKED) #define FLAG_NOT_DUP (FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED) @@ -578,8 +579,8 @@ static inline void tcp_rcv_rtt_measure_ts(struct sock *sk, void tcp_rcv_space_adjust(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); + u32 copied; int time; - int copied; tcp_mstamp_refresh(tp); time = tcp_stamp_us_delta(tp->tcp_mstamp, tp->rcvq_space.time); @@ -602,38 +603,31 @@ void tcp_rcv_space_adjust(struct sock *sk) if (sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf && !(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) { - int rcvwin, rcvmem, rcvbuf; + int rcvmem, rcvbuf; + u64 rcvwin, grow; /* minimal window to cope with packet losses, assuming * steady state. Add some cushion because of small variations. */ - rcvwin = (copied << 1) + 16 * tp->advmss; + rcvwin = ((u64)copied << 1) + 16 * tp->advmss; - /* If rate increased by 25%, - * assume slow start, rcvwin = 3 * copied - * If rate increased by 50%, - * assume sender can use 2x growth, rcvwin = 4 * copied - */ - if (copied >= - tp->rcvq_space.space + (tp->rcvq_space.space >> 2)) { - if (copied >= - tp->rcvq_space.space + (tp->rcvq_space.space >> 1)) - rcvwin <<= 1; - else - rcvwin += (rcvwin >> 1); - } + /* Accommodate for sender rate increase (eg. slow start) */ + grow = rcvwin * (copied - tp->rcvq_space.space); + do_div(grow, tp->rcvq_space.space); + rcvwin += (grow << 1); rcvmem = SKB_TRUESIZE(tp->advmss + MAX_TCP_HEADER); while (tcp_win_from_space(sk, rcvmem) < tp->advmss) rcvmem += 128; - rcvbuf = min(rcvwin / tp->advmss * rcvmem, - sock_net(sk)->ipv4.sysctl_tcp_rmem[2]); + do_div(rcvwin, tp->advmss); + rcvbuf = min_t(u64, rcvwin * rcvmem, + sock_net(sk)->ipv4.sysctl_tcp_rmem[2]); if (rcvbuf > sk->sk_rcvbuf) { sk->sk_rcvbuf = rcvbuf; /* Make the window clamp follow along. */ - tp->window_clamp = rcvwin; + tp->window_clamp = tcp_win_from_space(sk, rcvbuf); } } tp->rcvq_space.space = copied; @@ -2864,11 +2858,18 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una, *rexmit = REXMIT_LOST; } -static void tcp_update_rtt_min(struct sock *sk, u32 rtt_us) +static void tcp_update_rtt_min(struct sock *sk, u32 rtt_us, const int flag) { u32 wlen = sock_net(sk)->ipv4.sysctl_tcp_min_rtt_wlen * HZ; struct tcp_sock *tp = tcp_sk(sk); + if ((flag & FLAG_ACK_MAYBE_DELAYED) && rtt_us > tcp_min_rtt(tp)) { + /* If the remote keeps returning delayed ACKs, eventually + * the min filter would pick it up and overestimate the + * prop. delay when it expires. Skip suspected delayed ACKs. + */ + return; + } minmax_running_min(&tp->rtt_min, wlen, tcp_jiffies32, rtt_us ? : jiffies_to_usecs(1)); } @@ -2908,7 +2909,7 @@ static bool tcp_ack_update_rtt(struct sock *sk, const int flag, * always taken together with ACK, SACK, or TS-opts. Any negative * values will be skipped with the seq_rtt_us < 0 check above. */ - tcp_update_rtt_min(sk, ca_rtt_us); + tcp_update_rtt_min(sk, ca_rtt_us, flag); tcp_rtt_estimator(sk, seq_rtt_us); tcp_set_rto(sk); @@ -3132,6 +3133,17 @@ static int tcp_clean_rtx_queue(struct sock *sk, u32 prior_fack, if (likely(first_ackt) && !(flag & FLAG_RETRANS_DATA_ACKED)) { seq_rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, first_ackt); ca_rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, last_ackt); + + if (pkts_acked == 1 && last_in_flight < tp->mss_cache && + last_in_flight && !prior_sacked && fully_acked && + sack->rate->prior_delivered + 1 == tp->delivered && + !(flag & (FLAG_CA_ALERT | FLAG_SYN_ACKED))) { + /* Conservatively mark a delayed ACK. It's typically + * from a lone runt packet over the round trip to + * a receiver w/o out-of-order or CE events. + */ + flag |= FLAG_ACK_MAYBE_DELAYED; + } } if (sack->first_sackt) { sack_rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, sack->first_sackt); @@ -3621,6 +3633,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) delivered = tp->delivered - delivered; /* freshly ACKed or SACKed */ lost = tp->lost - lost; /* freshly marked lost */ + rs.is_ack_delayed = !!(flag & FLAG_ACK_MAYBE_DELAYED); tcp_rate_gen(sk, delivered, lost, is_sack_reneg, sack_state.rate); tcp_cong_control(sk, ack, delivered, flag, sack_state.rate); tcp_xmit_recovery(sk, rexmit); @@ -5306,6 +5319,9 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb, unsigned int len = skb->len; struct tcp_sock *tp = tcp_sk(sk); + /* TCP congestion window tracking */ + trace_tcp_probe(sk, skb); + tcp_mstamp_refresh(tp); if (unlikely(!sk->sk_rx_dst)) inet_csk(sk)->icsk_af_ops->sk_rx_dst_set(sk, skb); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 94e28350f420..95738aa0d8a6 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1911,7 +1911,7 @@ void tcp_v4_destroy_sock(struct sock *sk) /* Clean up the MD5 key list, if any */ if (tp->md5sig_info) { tcp_clear_md5_list(sk); - kfree_rcu(tp->md5sig_info, rcu); + kfree_rcu(rcu_dereference_protected(tp->md5sig_info, 1), rcu); tp->md5sig_info = NULL; } #endif @@ -2281,7 +2281,7 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i) timer_expires = jiffies; } - state = sk_state_load(sk); + state = inet_sk_state_load(sk); if (state == TCP_LISTEN) rx_queue = sk->sk_ack_backlog; else @@ -2358,7 +2358,6 @@ out: } static const struct file_operations tcp_afinfo_seq_fops = { - .owner = THIS_MODULE, .open = tcp_seq_open, .read = seq_read, .llseek = seq_lseek, diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index 7097f92d16e5..03b51cdcc731 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -546,8 +546,7 @@ bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst) static DEFINE_SEQLOCK(fastopen_seqlock); void tcp_fastopen_cache_get(struct sock *sk, u16 *mss, - struct tcp_fastopen_cookie *cookie, - int *syn_loss, unsigned long *last_syn_loss) + struct tcp_fastopen_cookie *cookie) { struct tcp_metrics_block *tm; @@ -564,8 +563,6 @@ void tcp_fastopen_cache_get(struct sock *sk, u16 *mss, *cookie = tfom->cookie; if (cookie->len <= 0 && tfom->try_exp == 1) cookie->exp = true; - *syn_loss = tfom->syn_loss; - *last_syn_loss = *syn_loss ? tfom->last_syn_loss : 0; } while (read_seqretry(&fastopen_seqlock, seq)); } rcu_read_unlock(); @@ -895,7 +892,7 @@ static void tcp_metrics_flush_all(struct net *net) pp = &hb->chain; for (tm = deref_locked(*pp); tm; tm = deref_locked(*pp)) { match = net ? net_eq(tm_net(tm), net) : - !atomic_read(&tm_net(tm)->count); + !refcount_read(&tm_net(tm)->count); if (match) { *pp = tm->tcpm_next; kfree_rcu(tm, rcu_head); diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index b079b619b60c..a8384b0c11f8 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -316,9 +316,10 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) */ local_bh_disable(); inet_twsk_schedule(tw, timeo); - /* Linkage updates. */ - __inet_twsk_hashdance(tw, sk, &tcp_hashinfo); - inet_twsk_put(tw); + /* Linkage updates. + * Note that access to tw after this point is illegal. + */ + inet_twsk_hashdance(tw, sk, &tcp_hashinfo); local_bh_enable(); } else { /* Sorry, if we're out of memory, just CLOSE this diff --git a/net/ipv4/tcp_nv.c b/net/ipv4/tcp_nv.c index 0b5a05bd82e3..764298e52577 100644 --- a/net/ipv4/tcp_nv.c +++ b/net/ipv4/tcp_nv.c @@ -146,7 +146,7 @@ static void tcpnv_init(struct sock *sk) * within a datacenter, where we have reasonable estimates of * RTTs */ - base_rtt = tcp_call_bpf(sk, BPF_SOCK_OPS_BASE_RTT); + base_rtt = tcp_call_bpf(sk, BPF_SOCK_OPS_BASE_RTT, 0, NULL); if (base_rtt > 0) { ca->nv_base_rtt = base_rtt; ca->nv_lower_bound_rtt = (base_rtt * 205) >> 8; /* 80% */ @@ -364,7 +364,7 @@ static void tcpnv_acked(struct sock *sk, const struct ack_sample *sample) */ cwnd_by_slope = (u32) div64_u64(((u64)ca->nv_rtt_max_rate) * ca->nv_min_rtt, - (u64)(80000 * tp->mss_cache)); + 80000ULL * tp->mss_cache); max_win = cwnd_by_slope + nv_pad; /* If cwnd > max_win, decrease cwnd diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index a4d214c7b506..e9f985e42405 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1944,7 +1944,8 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb, in_flight = tcp_packets_in_flight(tp); - BUG_ON(tcp_skb_pcount(skb) <= 1 || (tp->snd_cwnd <= in_flight)); + BUG_ON(tcp_skb_pcount(skb) <= 1); + BUG_ON(tp->snd_cwnd <= in_flight); send_win = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq; @@ -2414,15 +2415,12 @@ bool tcp_schedule_loss_probe(struct sock *sk, bool advancing_rto) early_retrans = sock_net(sk)->ipv4.sysctl_tcp_early_retrans; /* Schedule a loss probe in 2*RTT for SACK capable connections - * in Open state, that are either limited by cwnd or application. + * not in loss recovery, that are either limited by cwnd or application. */ if ((early_retrans != 3 && early_retrans != 4) || !tp->packets_out || !tcp_is_sack(tp) || - icsk->icsk_ca_state != TCP_CA_Open) - return false; - - if ((tp->snd_cwnd > tcp_packets_in_flight(tp)) && - !tcp_write_queue_empty(sk)) + (icsk->icsk_ca_state != TCP_CA_Open && + icsk->icsk_ca_state != TCP_CA_CWR)) return false; /* Probe timeout is 2*rtt. Add minimum RTO to account @@ -2907,6 +2905,10 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs) err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC); } + if (BPF_SOCK_OPS_TEST_FLAG(tp, BPF_SOCK_OPS_RETRANS_CB_FLAG)) + tcp_call_bpf_3arg(sk, BPF_SOCK_OPS_RETRANS_CB, + TCP_SKB_CB(skb)->seq, segs, err); + if (likely(!err)) { TCP_SKB_CB(skb)->sacked |= TCPCB_EVER_RETRANS; trace_tcp_retransmit_skb(sk, skb); @@ -3471,7 +3473,7 @@ int tcp_connect(struct sock *sk) struct sk_buff *buff; int err; - tcp_call_bpf(sk, BPF_SOCK_OPS_TCP_CONNECT_CB); + tcp_call_bpf(sk, BPF_SOCK_OPS_TCP_CONNECT_CB, 0, NULL); if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk)) return -EHOSTUNREACH; /* Routing failure or similar. */ diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c deleted file mode 100644 index 697f4c67b2e3..000000000000 --- a/net/ipv4/tcp_probe.c +++ /dev/null @@ -1,301 +0,0 @@ -/* - * tcpprobe - Observe the TCP flow with kprobes. - * - * The idea for this came from Werner Almesberger's umlsim - * Copyright (C) 2004, Stephen Hemminger <shemminger@osdl.org> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#include <linux/kernel.h> -#include <linux/kprobes.h> -#include <linux/socket.h> -#include <linux/tcp.h> -#include <linux/slab.h> -#include <linux/proc_fs.h> -#include <linux/module.h> -#include <linux/ktime.h> -#include <linux/time.h> -#include <net/net_namespace.h> - -#include <net/tcp.h> - -MODULE_AUTHOR("Stephen Hemminger <shemminger@linux-foundation.org>"); -MODULE_DESCRIPTION("TCP cwnd snooper"); -MODULE_LICENSE("GPL"); -MODULE_VERSION("1.1"); - -static int port __read_mostly; -MODULE_PARM_DESC(port, "Port to match (0=all)"); -module_param(port, int, 0); - -static unsigned int bufsize __read_mostly = 4096; -MODULE_PARM_DESC(bufsize, "Log buffer size in packets (4096)"); -module_param(bufsize, uint, 0); - -static unsigned int fwmark __read_mostly; -MODULE_PARM_DESC(fwmark, "skb mark to match (0=no mark)"); -module_param(fwmark, uint, 0); - -static int full __read_mostly; -MODULE_PARM_DESC(full, "Full log (1=every ack packet received, 0=only cwnd changes)"); -module_param(full, int, 0); - -static const char procname[] = "tcpprobe"; - -struct tcp_log { - ktime_t tstamp; - union { - struct sockaddr raw; - struct sockaddr_in v4; - struct sockaddr_in6 v6; - } src, dst; - u16 length; - u32 snd_nxt; - u32 snd_una; - u32 snd_wnd; - u32 rcv_wnd; - u32 snd_cwnd; - u32 ssthresh; - u32 srtt; -}; - -static struct { - spinlock_t lock; - wait_queue_head_t wait; - ktime_t start; - u32 lastcwnd; - - unsigned long head, tail; - struct tcp_log *log; -} tcp_probe; - -static inline int tcp_probe_used(void) -{ - return (tcp_probe.head - tcp_probe.tail) & (bufsize - 1); -} - -static inline int tcp_probe_avail(void) -{ - return bufsize - tcp_probe_used() - 1; -} - -#define tcp_probe_copy_fl_to_si4(inet, si4, mem) \ - do { \ - si4.sin_family = AF_INET; \ - si4.sin_port = inet->inet_##mem##port; \ - si4.sin_addr.s_addr = inet->inet_##mem##addr; \ - } while (0) \ - -/* - * Hook inserted to be called before each receive packet. - * Note: arguments must match tcp_rcv_established()! - */ -static void jtcp_rcv_established(struct sock *sk, struct sk_buff *skb, - const struct tcphdr *th) -{ - unsigned int len = skb->len; - const struct tcp_sock *tp = tcp_sk(sk); - const struct inet_sock *inet = inet_sk(sk); - - /* Only update if port or skb mark matches */ - if (((port == 0 && fwmark == 0) || - ntohs(inet->inet_dport) == port || - ntohs(inet->inet_sport) == port || - (fwmark > 0 && skb->mark == fwmark)) && - (full || tp->snd_cwnd != tcp_probe.lastcwnd)) { - - spin_lock(&tcp_probe.lock); - /* If log fills, just silently drop */ - if (tcp_probe_avail() > 1) { - struct tcp_log *p = tcp_probe.log + tcp_probe.head; - - p->tstamp = ktime_get(); - switch (sk->sk_family) { - case AF_INET: - tcp_probe_copy_fl_to_si4(inet, p->src.v4, s); - tcp_probe_copy_fl_to_si4(inet, p->dst.v4, d); - break; - case AF_INET6: - memset(&p->src.v6, 0, sizeof(p->src.v6)); - memset(&p->dst.v6, 0, sizeof(p->dst.v6)); -#if IS_ENABLED(CONFIG_IPV6) - p->src.v6.sin6_family = AF_INET6; - p->src.v6.sin6_port = inet->inet_sport; - p->src.v6.sin6_addr = inet6_sk(sk)->saddr; - - p->dst.v6.sin6_family = AF_INET6; - p->dst.v6.sin6_port = inet->inet_dport; - p->dst.v6.sin6_addr = sk->sk_v6_daddr; -#endif - break; - default: - BUG(); - } - - p->length = len; - p->snd_nxt = tp->snd_nxt; - p->snd_una = tp->snd_una; - p->snd_cwnd = tp->snd_cwnd; - p->snd_wnd = tp->snd_wnd; - p->rcv_wnd = tp->rcv_wnd; - p->ssthresh = tcp_current_ssthresh(sk); - p->srtt = tp->srtt_us >> 3; - - tcp_probe.head = (tcp_probe.head + 1) & (bufsize - 1); - } - tcp_probe.lastcwnd = tp->snd_cwnd; - spin_unlock(&tcp_probe.lock); - - wake_up(&tcp_probe.wait); - } - - jprobe_return(); -} - -static struct jprobe tcp_jprobe = { - .kp = { - .symbol_name = "tcp_rcv_established", - }, - .entry = jtcp_rcv_established, -}; - -static int tcpprobe_open(struct inode *inode, struct file *file) -{ - /* Reset (empty) log */ - spin_lock_bh(&tcp_probe.lock); - tcp_probe.head = tcp_probe.tail = 0; - tcp_probe.start = ktime_get(); - spin_unlock_bh(&tcp_probe.lock); - - return 0; -} - -static int tcpprobe_sprint(char *tbuf, int n) -{ - const struct tcp_log *p - = tcp_probe.log + tcp_probe.tail; - struct timespec64 ts - = ktime_to_timespec64(ktime_sub(p->tstamp, tcp_probe.start)); - - return scnprintf(tbuf, n, - "%lu.%09lu %pISpc %pISpc %d %#x %#x %u %u %u %u %u\n", - (unsigned long)ts.tv_sec, - (unsigned long)ts.tv_nsec, - &p->src, &p->dst, p->length, p->snd_nxt, p->snd_una, - p->snd_cwnd, p->ssthresh, p->snd_wnd, p->srtt, p->rcv_wnd); -} - -static ssize_t tcpprobe_read(struct file *file, char __user *buf, - size_t len, loff_t *ppos) -{ - int error = 0; - size_t cnt = 0; - - if (!buf) - return -EINVAL; - - while (cnt < len) { - char tbuf[256]; - int width; - - /* Wait for data in buffer */ - error = wait_event_interruptible(tcp_probe.wait, - tcp_probe_used() > 0); - if (error) - break; - - spin_lock_bh(&tcp_probe.lock); - if (tcp_probe.head == tcp_probe.tail) { - /* multiple readers race? */ - spin_unlock_bh(&tcp_probe.lock); - continue; - } - - width = tcpprobe_sprint(tbuf, sizeof(tbuf)); - - if (cnt + width < len) - tcp_probe.tail = (tcp_probe.tail + 1) & (bufsize - 1); - - spin_unlock_bh(&tcp_probe.lock); - - /* if record greater than space available - return partial buffer (so far) */ - if (cnt + width >= len) - break; - - if (copy_to_user(buf + cnt, tbuf, width)) - return -EFAULT; - cnt += width; - } - - return cnt == 0 ? error : cnt; -} - -static const struct file_operations tcpprobe_fops = { - .owner = THIS_MODULE, - .open = tcpprobe_open, - .read = tcpprobe_read, - .llseek = noop_llseek, -}; - -static __init int tcpprobe_init(void) -{ - int ret = -ENOMEM; - - /* Warning: if the function signature of tcp_rcv_established, - * has been changed, you also have to change the signature of - * jtcp_rcv_established, otherwise you end up right here! - */ - BUILD_BUG_ON(__same_type(tcp_rcv_established, - jtcp_rcv_established) == 0); - - init_waitqueue_head(&tcp_probe.wait); - spin_lock_init(&tcp_probe.lock); - - if (bufsize == 0) - return -EINVAL; - - bufsize = roundup_pow_of_two(bufsize); - tcp_probe.log = kcalloc(bufsize, sizeof(struct tcp_log), GFP_KERNEL); - if (!tcp_probe.log) - goto err0; - - if (!proc_create(procname, S_IRUSR, init_net.proc_net, &tcpprobe_fops)) - goto err0; - - ret = register_jprobe(&tcp_jprobe); - if (ret) - goto err1; - - pr_info("probe registered (port=%d/fwmark=%u) bufsize=%u\n", - port, fwmark, bufsize); - return 0; - err1: - remove_proc_entry(procname, init_net.proc_net); - err0: - kfree(tcp_probe.log); - return ret; -} -module_init(tcpprobe_init); - -static __exit void tcpprobe_exit(void) -{ - remove_proc_entry(procname, init_net.proc_net); - unregister_jprobe(&tcp_jprobe); - kfree(tcp_probe.log); -} -module_exit(tcpprobe_exit); diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 388158c9d9f6..71fc60f1b326 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -198,11 +198,6 @@ static int tcp_write_timeout(struct sock *sk) if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) { if (icsk->icsk_retransmits) { dst_negative_advice(sk); - if (tp->syn_fastopen || tp->syn_data) - tcp_fastopen_cache_set(sk, 0, NULL, true, 0); - if (tp->syn_data && icsk->icsk_retransmits == 1) - NET_INC_STATS(sock_net(sk), - LINUX_MIB_TCPFASTOPENACTIVEFAIL); } else if (!tp->syn_data && !tp->syn_fastopen) { sk_rethink_txhash(sk); } @@ -210,17 +205,6 @@ static int tcp_write_timeout(struct sock *sk) expired = icsk->icsk_retransmits >= retry_until; } else { if (retransmits_timed_out(sk, net->ipv4.sysctl_tcp_retries1, 0)) { - /* Some middle-boxes may black-hole Fast Open _after_ - * the handshake. Therefore we conservatively disable - * Fast Open on this path on recurring timeouts after - * successful Fast Open. - */ - if (tp->syn_data_acked) { - tcp_fastopen_cache_set(sk, 0, NULL, true, 0); - if (icsk->icsk_retransmits == net->ipv4.sysctl_tcp_retries1) - NET_INC_STATS(sock_net(sk), - LINUX_MIB_TCPFASTOPENACTIVEFAIL); - } /* Black hole detection */ tcp_mtu_probing(icsk, sk); @@ -243,11 +227,19 @@ static int tcp_write_timeout(struct sock *sk) expired = retransmits_timed_out(sk, retry_until, icsk->icsk_user_timeout); } + tcp_fastopen_active_detect_blackhole(sk, expired); + + if (BPF_SOCK_OPS_TEST_FLAG(tp, BPF_SOCK_OPS_RTO_CB_FLAG)) + tcp_call_bpf_3arg(sk, BPF_SOCK_OPS_RTO_CB, + icsk->icsk_retransmits, + icsk->icsk_rto, (int)expired); + if (expired) { /* Has it gone just too far? */ tcp_write_err(sk); return 1; } + return 0; } diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index ef45adfc0edb..f81f969f9c06 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -357,18 +357,12 @@ fail: } EXPORT_SYMBOL(udp_lib_get_port); -static u32 udp4_portaddr_hash(const struct net *net, __be32 saddr, - unsigned int port) -{ - return jhash_1word((__force u32)saddr, net_hash_mix(net)) ^ port; -} - int udp_v4_get_port(struct sock *sk, unsigned short snum) { unsigned int hash2_nulladdr = - udp4_portaddr_hash(sock_net(sk), htonl(INADDR_ANY), snum); + ipv4_portaddr_hash(sock_net(sk), htonl(INADDR_ANY), snum); unsigned int hash2_partial = - udp4_portaddr_hash(sock_net(sk), inet_sk(sk)->inet_rcv_saddr, 0); + ipv4_portaddr_hash(sock_net(sk), inet_sk(sk)->inet_rcv_saddr, 0); /* precompute partial secondary hash */ udp_sk(sk)->udp_portaddr_hash = hash2_partial; @@ -445,7 +439,7 @@ static struct sock *udp4_lib_lookup2(struct net *net, struct sk_buff *skb) { struct sock *sk, *result; - int score, badness, matches = 0, reuseport = 0; + int score, badness; u32 hash = 0; result = NULL; @@ -454,23 +448,16 @@ static struct sock *udp4_lib_lookup2(struct net *net, score = compute_score(sk, net, saddr, sport, daddr, hnum, dif, sdif, exact_dif); if (score > badness) { - reuseport = sk->sk_reuseport; - if (reuseport) { + if (sk->sk_reuseport) { hash = udp_ehashfn(net, daddr, hnum, saddr, sport); result = reuseport_select_sock(sk, hash, skb, sizeof(struct udphdr)); if (result) return result; - matches = 1; } badness = score; result = sk; - } else if (score == badness && reuseport) { - matches++; - if (reciprocal_scale(hash, matches) == 0) - result = sk; - hash = next_pseudo_random32(hash); } } return result; @@ -488,11 +475,11 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, unsigned int hash2, slot2, slot = udp_hashfn(net, hnum, udptable->mask); struct udp_hslot *hslot2, *hslot = &udptable->hash[slot]; bool exact_dif = udp_lib_exact_dif_match(net, skb); - int score, badness, matches = 0, reuseport = 0; + int score, badness; u32 hash = 0; if (hslot->count > 10) { - hash2 = udp4_portaddr_hash(net, daddr, hnum); + hash2 = ipv4_portaddr_hash(net, daddr, hnum); slot2 = hash2 & udptable->mask; hslot2 = &udptable->hash2[slot2]; if (hslot->count < hslot2->count) @@ -503,7 +490,7 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, exact_dif, hslot2, skb); if (!result) { unsigned int old_slot2 = slot2; - hash2 = udp4_portaddr_hash(net, htonl(INADDR_ANY), hnum); + hash2 = ipv4_portaddr_hash(net, htonl(INADDR_ANY), hnum); slot2 = hash2 & udptable->mask; /* avoid searching the same slot again. */ if (unlikely(slot2 == old_slot2)) @@ -526,23 +513,16 @@ begin: score = compute_score(sk, net, saddr, sport, daddr, hnum, dif, sdif, exact_dif); if (score > badness) { - reuseport = sk->sk_reuseport; - if (reuseport) { + if (sk->sk_reuseport) { hash = udp_ehashfn(net, daddr, hnum, saddr, sport); result = reuseport_select_sock(sk, hash, skb, sizeof(struct udphdr)); if (result) return result; - matches = 1; } result = sk; badness = score; - } else if (score == badness && reuseport) { - matches++; - if (reciprocal_scale(hash, matches) == 0) - result = sk; - hash = next_pseudo_random32(hash); } } return result; @@ -997,8 +977,21 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) if (!saddr) saddr = inet->mc_addr; connected = 0; - } else if (!ipc.oif) + } else if (!ipc.oif) { ipc.oif = inet->uc_index; + } else if (ipv4_is_lbcast(daddr) && inet->uc_index) { + /* oif is set, packet is to local broadcast and + * and uc_index is set. oif is most likely set + * by sk_bound_dev_if. If uc_index != oif check if the + * oif is an L3 master and uc_index is an L3 slave. + * If so, we want to allow the send using the uc_index. + */ + if (ipc.oif != inet->uc_index && + ipc.oif == l3mdev_master_ifindex_by_index(sock_net(sk), + inet->uc_index)) { + ipc.oif = inet->uc_index; + } + } if (connected) rt = (struct rtable *)sk_dst_check(sk, 0); @@ -1775,7 +1768,7 @@ EXPORT_SYMBOL(udp_lib_rehash); static void udp_v4_rehash(struct sock *sk) { - u16 new_hash = udp4_portaddr_hash(sock_net(sk), + u16 new_hash = ipv4_portaddr_hash(sock_net(sk), inet_sk(sk)->inet_rcv_saddr, inet_sk(sk)->inet_num); udp_lib_rehash(sk, new_hash); @@ -1966,9 +1959,9 @@ static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb, struct sk_buff *nskb; if (use_hash2) { - hash2_any = udp4_portaddr_hash(net, htonl(INADDR_ANY), hnum) & + hash2_any = ipv4_portaddr_hash(net, htonl(INADDR_ANY), hnum) & udptable->mask; - hash2 = udp4_portaddr_hash(net, daddr, hnum) & udptable->mask; + hash2 = ipv4_portaddr_hash(net, daddr, hnum) & udptable->mask; start_lookup: hslot = &udptable->hash2[hash2]; offset = offsetof(typeof(*sk), __sk_common.skc_portaddr_node); @@ -2200,7 +2193,7 @@ static struct sock *__udp4_lib_demux_lookup(struct net *net, int dif, int sdif) { unsigned short hnum = ntohs(loc_port); - unsigned int hash2 = udp4_portaddr_hash(net, loc_addr, hnum); + unsigned int hash2 = ipv4_portaddr_hash(net, loc_addr, hnum); unsigned int slot2 = hash2 & udp_table.mask; struct udp_hslot *hslot2 = &udp_table.hash2[slot2]; INET_ADDR_COOKIE(acookie, rmt_addr, loc_addr); @@ -2510,8 +2503,6 @@ __poll_t udp_poll(struct file *file, struct socket *sock, poll_table *wait) if (!skb_queue_empty(&udp_sk(sk)->reader_queue)) mask |= POLLIN | POLLRDNORM; - sock_rps_record_flow(sk); - /* Check for false positives due to checksum errors */ if ((mask & POLLRDNORM) && !(file->f_flags & O_NONBLOCK) && !(sk->sk_shutdown & RCV_SHUTDOWN) && first_packet_length(sk) == -1) @@ -2736,7 +2727,6 @@ int udp4_seq_show(struct seq_file *seq, void *v) } static const struct file_operations udp_afinfo_seq_fops = { - .owner = THIS_MODULE, .open = udp_seq_open, .read = seq_read, .llseek = seq_lseek, diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c index 59f10fe9782e..f96614e9b9a5 100644 --- a/net/ipv4/udplite.c +++ b/net/ipv4/udplite.c @@ -75,7 +75,6 @@ static struct inet_protosw udplite4_protosw = { #ifdef CONFIG_PROC_FS static const struct file_operations udplite_afinfo_seq_fops = { - .owner = THIS_MODULE, .open = udp_seq_open, .read = seq_read, .llseek = seq_lseek, diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c index 20ca486b3cad..63faeee989a9 100644 --- a/net/ipv4/xfrm4_mode_tunnel.c +++ b/net/ipv4/xfrm4_mode_tunnel.c @@ -62,7 +62,7 @@ static int xfrm4_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) top_iph->frag_off = (flags & XFRM_STATE_NOPMTUDISC) ? 0 : (XFRM_MODE_SKB_CB(skb)->frag_off & htons(IP_DF)); - top_iph->ttl = ip4_dst_hoplimit(dst->child); + top_iph->ttl = ip4_dst_hoplimit(xfrm_dst_child(dst)); top_iph->saddr = x->props.saddr.a4; top_iph->daddr = x->id.daddr.a4; @@ -106,18 +106,15 @@ static struct sk_buff *xfrm4_mode_tunnel_gso_segment(struct xfrm_state *x, { __skb_push(skb, skb->mac_len); return skb_mac_gso_segment(skb, features); - } static void xfrm4_mode_tunnel_xmit(struct xfrm_state *x, struct sk_buff *skb) { struct xfrm_offload *xo = xfrm_offload(skb); - if (xo->flags & XFRM_GSO_SEGMENT) { - skb->network_header = skb->network_header - x->props.header_len; + if (xo->flags & XFRM_GSO_SEGMENT) skb->transport_header = skb->network_header + sizeof(struct iphdr); - } skb_reset_mac_len(skb); pskb_pull(skb, skb->mac_len + x->props.header_len); diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index f49bd7897e95..e1846b97ee69 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -186,7 +186,8 @@ static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx, static void addrconf_dad_start(struct inet6_ifaddr *ifp); static void addrconf_dad_work(struct work_struct *w); -static void addrconf_dad_completed(struct inet6_ifaddr *ifp, bool bump_id); +static void addrconf_dad_completed(struct inet6_ifaddr *ifp, bool bump_id, + bool send_na); static void addrconf_dad_run(struct inet6_dev *idev); static void addrconf_rs_timer(struct timer_list *t); static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifa); @@ -3438,6 +3439,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, } else if (event == NETDEV_CHANGE) { if (!addrconf_link_ready(dev)) { /* device is still not ready. */ + rt6_sync_down_dev(dev, event); break; } @@ -3449,6 +3451,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, * multicast snooping switches */ ipv6_mc_up(idev); + rt6_sync_up(dev, RTNH_F_LINKDOWN); break; } idev->if_flags |= IF_READY; @@ -3484,6 +3487,9 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, if (run_pending) addrconf_dad_run(idev); + /* Device has an address by now */ + rt6_sync_up(dev, RTNH_F_DEAD); + /* * If the MTU changed during the interface down, * when the interface up, the changed MTU must be @@ -3577,6 +3583,7 @@ static bool addr_is_local(const struct in6_addr *addr) static int addrconf_ifdown(struct net_device *dev, int how) { + unsigned long event = how ? NETDEV_UNREGISTER : NETDEV_DOWN; struct net *net = dev_net(dev); struct inet6_dev *idev; struct inet6_ifaddr *ifa, *tmp; @@ -3586,8 +3593,7 @@ static int addrconf_ifdown(struct net_device *dev, int how) ASSERT_RTNL(); - rt6_ifdown(net, dev); - neigh_ifdown(&nd_tbl, dev); + rt6_disable_ip(dev, event); idev = __in6_dev_get(dev); if (!idev) @@ -3833,12 +3839,17 @@ static void addrconf_dad_begin(struct inet6_ifaddr *ifp) idev->cnf.accept_dad < 1) || !(ifp->flags&IFA_F_TENTATIVE) || ifp->flags & IFA_F_NODAD) { + bool send_na = false; + + if (ifp->flags & IFA_F_TENTATIVE && + !(ifp->flags & IFA_F_OPTIMISTIC)) + send_na = true; bump_id = ifp->flags & IFA_F_TENTATIVE; ifp->flags &= ~(IFA_F_TENTATIVE|IFA_F_OPTIMISTIC|IFA_F_DADFAILED); spin_unlock(&ifp->lock); read_unlock_bh(&idev->lock); - addrconf_dad_completed(ifp, bump_id); + addrconf_dad_completed(ifp, bump_id, send_na); return; } @@ -3967,16 +3978,21 @@ static void addrconf_dad_work(struct work_struct *w) } if (ifp->dad_probes == 0) { + bool send_na = false; + /* * DAD was successful */ + if (ifp->flags & IFA_F_TENTATIVE && + !(ifp->flags & IFA_F_OPTIMISTIC)) + send_na = true; bump_id = ifp->flags & IFA_F_TENTATIVE; ifp->flags &= ~(IFA_F_TENTATIVE|IFA_F_OPTIMISTIC|IFA_F_DADFAILED); spin_unlock(&ifp->lock); write_unlock_bh(&idev->lock); - addrconf_dad_completed(ifp, bump_id); + addrconf_dad_completed(ifp, bump_id, send_na); goto out; } @@ -4014,7 +4030,8 @@ static bool ipv6_lonely_lladdr(struct inet6_ifaddr *ifp) return true; } -static void addrconf_dad_completed(struct inet6_ifaddr *ifp, bool bump_id) +static void addrconf_dad_completed(struct inet6_ifaddr *ifp, bool bump_id, + bool send_na) { struct net_device *dev = ifp->idev->dev; struct in6_addr lladdr; @@ -4046,6 +4063,16 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp, bool bump_id) if (send_mld) ipv6_mc_dad_complete(ifp->idev); + /* send unsolicited NA if enabled */ + if (send_na && + (ifp->idev->cnf.ndisc_notify || + dev_net(dev)->ipv6.devconf_all->ndisc_notify)) { + ndisc_send_na(dev, &in6addr_linklocal_allnodes, &ifp->addr, + /*router=*/ !!ifp->idev->cnf.forwarding, + /*solicited=*/ false, /*override=*/ true, + /*inc_opt=*/ true); + } + if (send_rs) { /* * If a host as already performed a random delay @@ -4209,7 +4236,6 @@ static int if6_seq_open(struct inode *inode, struct file *file) } static const struct file_operations if6_fops = { - .owner = THIS_MODULE, .open = if6_seq_open, .read = seq_read, .llseek = seq_lseek, @@ -4352,9 +4378,11 @@ restart: spin_lock(&ifpub->lock); ifpub->regen_count = 0; spin_unlock(&ifpub->lock); + rcu_read_unlock_bh(); ipv6_create_tempaddr(ifpub, ifp, true); in6_ifa_put(ifpub); in6_ifa_put(ifp); + rcu_read_lock_bh(); goto restart; } } else if (time_before(ifp->tstamp + ifp->prefered_lft * HZ - regen_advance * HZ, next)) @@ -6595,27 +6623,45 @@ int __init addrconf_init(void) rtnl_af_register(&inet6_ops); - err = __rtnl_register(PF_INET6, RTM_GETLINK, NULL, inet6_dump_ifinfo, - 0); + err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETLINK, + NULL, inet6_dump_ifinfo, 0); if (err < 0) goto errout; - /* Only the first call to __rtnl_register can fail */ - __rtnl_register(PF_INET6, RTM_NEWADDR, inet6_rtm_newaddr, NULL, 0); - __rtnl_register(PF_INET6, RTM_DELADDR, inet6_rtm_deladdr, NULL, 0); - __rtnl_register(PF_INET6, RTM_GETADDR, inet6_rtm_getaddr, - inet6_dump_ifaddr, RTNL_FLAG_DOIT_UNLOCKED); - __rtnl_register(PF_INET6, RTM_GETMULTICAST, NULL, - inet6_dump_ifmcaddr, 0); - __rtnl_register(PF_INET6, RTM_GETANYCAST, NULL, - inet6_dump_ifacaddr, 0); - __rtnl_register(PF_INET6, RTM_GETNETCONF, inet6_netconf_get_devconf, - inet6_netconf_dump_devconf, RTNL_FLAG_DOIT_UNLOCKED); - - ipv6_addr_label_rtnl_register(); + err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_NEWADDR, + inet6_rtm_newaddr, NULL, 0); + if (err < 0) + goto errout; + err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_DELADDR, + inet6_rtm_deladdr, NULL, 0); + if (err < 0) + goto errout; + err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETADDR, + inet6_rtm_getaddr, inet6_dump_ifaddr, + RTNL_FLAG_DOIT_UNLOCKED); + if (err < 0) + goto errout; + err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETMULTICAST, + NULL, inet6_dump_ifmcaddr, 0); + if (err < 0) + goto errout; + err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETANYCAST, + NULL, inet6_dump_ifacaddr, 0); + if (err < 0) + goto errout; + err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETNETCONF, + inet6_netconf_get_devconf, + inet6_netconf_dump_devconf, + RTNL_FLAG_DOIT_UNLOCKED); + if (err < 0) + goto errout; + err = ipv6_addr_label_rtnl_register(); + if (err < 0) + goto errout; return 0; errout: + rtnl_unregister_all(PF_INET6); rtnl_af_unregister(&inet6_ops); unregister_netdevice_notifier(&ipv6_dev_notf); errlo: diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c index 00e1f8ee08f8..1d6ced37ad71 100644 --- a/net/ipv6/addrlabel.c +++ b/net/ipv6/addrlabel.c @@ -547,13 +547,22 @@ static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr *nlh, return err; } -void __init ipv6_addr_label_rtnl_register(void) +int __init ipv6_addr_label_rtnl_register(void) { - __rtnl_register(PF_INET6, RTM_NEWADDRLABEL, ip6addrlbl_newdel, - NULL, RTNL_FLAG_DOIT_UNLOCKED); - __rtnl_register(PF_INET6, RTM_DELADDRLABEL, ip6addrlbl_newdel, - NULL, RTNL_FLAG_DOIT_UNLOCKED); - __rtnl_register(PF_INET6, RTM_GETADDRLABEL, ip6addrlbl_get, - ip6addrlbl_dump, RTNL_FLAG_DOIT_UNLOCKED); -} + int ret; + ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_NEWADDRLABEL, + ip6addrlbl_newdel, + NULL, RTNL_FLAG_DOIT_UNLOCKED); + if (ret < 0) + return ret; + ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_DELADDRLABEL, + ip6addrlbl_newdel, + NULL, RTNL_FLAG_DOIT_UNLOCKED); + if (ret < 0) + return ret; + ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETADDRLABEL, + ip6addrlbl_get, + ip6addrlbl_dump, RTNL_FLAG_DOIT_UNLOCKED); + return ret; +} diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index c9441ca45399..416917719a6f 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -284,6 +284,7 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) struct net *net = sock_net(sk); __be32 v4addr = 0; unsigned short snum; + bool saved_ipv6only; int addr_type = 0; int err = 0; @@ -389,19 +390,21 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) if (!(addr_type & IPV6_ADDR_MULTICAST)) np->saddr = addr->sin6_addr; + saved_ipv6only = sk->sk_ipv6only; + if (addr_type != IPV6_ADDR_ANY && addr_type != IPV6_ADDR_MAPPED) + sk->sk_ipv6only = 1; + /* Make sure we are allowed to bind here. */ if ((snum || !inet->bind_address_no_port) && sk->sk_prot->get_port(sk, snum)) { + sk->sk_ipv6only = saved_ipv6only; inet_reset_saddr(sk); err = -EADDRINUSE; goto out; } - if (addr_type != IPV6_ADDR_ANY) { + if (addr_type != IPV6_ADDR_ANY) sk->sk_userlocks |= SOCK_BINDADDR_LOCK; - if (addr_type != IPV6_ADDR_MAPPED) - sk->sk_ipv6only = 1; - } if (snum) sk->sk_userlocks |= SOCK_BINDPORT_LOCK; inet->inet_sport = htons(inet->inet_num); diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c index 0bbab8a4b5d8..8e085cc05aeb 100644 --- a/net/ipv6/anycast.c +++ b/net/ipv6/anycast.c @@ -533,7 +533,6 @@ static int ac6_seq_open(struct inode *inode, struct file *file) } static const struct file_operations ac6_seq_fops = { - .owner = THIS_MODULE, .open = ac6_seq_open, .read = seq_read, .llseek = seq_lseek, diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index a1f918713006..fbf08ce3f5ab 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -221,8 +221,7 @@ ipv4_connected: if (__ipv6_addr_needs_scope_id(addr_type)) { if (addr_len >= sizeof(struct sockaddr_in6) && usin->sin6_scope_id) { - if (sk->sk_bound_dev_if && - sk->sk_bound_dev_if != usin->sin6_scope_id) { + if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id)) { err = -EINVAL; goto out; } diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 1a7f00cd4803..97513f35bcc5 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -141,14 +141,32 @@ static void esp_ssg_unref(struct xfrm_state *x, void *tmp) static void esp_output_done(struct crypto_async_request *base, int err) { struct sk_buff *skb = base->data; + struct xfrm_offload *xo = xfrm_offload(skb); void *tmp; - struct dst_entry *dst = skb_dst(skb); - struct xfrm_state *x = dst->xfrm; + struct xfrm_state *x; + + if (xo && (xo->flags & XFRM_DEV_RESUME)) + x = skb->sp->xvec[skb->sp->len - 1]; + else + x = skb_dst(skb)->xfrm; tmp = ESP_SKB_CB(skb)->tmp; esp_ssg_unref(x, tmp); kfree(tmp); - xfrm_output_resume(skb, err); + + if (xo && (xo->flags & XFRM_DEV_RESUME)) { + if (err) { + XFRM_INC_STATS(xs_net(x), LINUX_MIB_XFRMOUTSTATEPROTOERROR); + kfree_skb(skb); + return; + } + + skb_push(skb, skb->data - skb_mac_header(skb)); + secpath_reset(skb); + xfrm_dev_resume(skb); + } else { + xfrm_output_resume(skb, err); + } } /* Move ESP header back into place. */ @@ -734,17 +752,13 @@ static int esp_init_aead(struct xfrm_state *x) char aead_name[CRYPTO_MAX_ALG_NAME]; struct crypto_aead *aead; int err; - u32 mask = 0; err = -ENAMETOOLONG; if (snprintf(aead_name, CRYPTO_MAX_ALG_NAME, "%s(%s)", x->geniv, x->aead->alg_name) >= CRYPTO_MAX_ALG_NAME) goto error; - if (x->xso.offload_handle) - mask |= CRYPTO_ALG_ASYNC; - - aead = crypto_alloc_aead(aead_name, 0, mask); + aead = crypto_alloc_aead(aead_name, 0, 0); err = PTR_ERR(aead); if (IS_ERR(aead)) goto error; @@ -774,7 +788,6 @@ static int esp_init_authenc(struct xfrm_state *x) char authenc_name[CRYPTO_MAX_ALG_NAME]; unsigned int keylen; int err; - u32 mask = 0; err = -EINVAL; if (!x->ealg) @@ -800,10 +813,7 @@ static int esp_init_authenc(struct xfrm_state *x) goto error; } - if (x->xso.offload_handle) - mask |= CRYPTO_ALG_ASYNC; - - aead = crypto_alloc_aead(authenc_name, 0, mask); + aead = crypto_alloc_aead(authenc_name, 0, 0); err = PTR_ERR(aead); if (IS_ERR(aead)) goto error; diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c index f52c314d4c97..3fd1ec775dc2 100644 --- a/net/ipv6/esp6_offload.c +++ b/net/ipv6/esp6_offload.c @@ -136,78 +136,39 @@ static void esp6_gso_encap(struct xfrm_state *x, struct sk_buff *skb) static struct sk_buff *esp6_gso_segment(struct sk_buff *skb, netdev_features_t features) { - __u32 seq; - int err = 0; - struct sk_buff *skb2; struct xfrm_state *x; struct ip_esp_hdr *esph; struct crypto_aead *aead; - struct sk_buff *segs = ERR_PTR(-EINVAL); netdev_features_t esp_features = features; struct xfrm_offload *xo = xfrm_offload(skb); if (!xo) - goto out; + return ERR_PTR(-EINVAL); if (!(skb_shinfo(skb)->gso_type & SKB_GSO_ESP)) - goto out; - - seq = xo->seq.low; + return ERR_PTR(-EINVAL); x = skb->sp->xvec[skb->sp->len - 1]; aead = x->data; esph = ip_esp_hdr(skb); if (esph->spi != x->id.spi) - goto out; + return ERR_PTR(-EINVAL); if (!pskb_may_pull(skb, sizeof(*esph) + crypto_aead_ivsize(aead))) - goto out; + return ERR_PTR(-EINVAL); __skb_pull(skb, sizeof(*esph) + crypto_aead_ivsize(aead)); skb->encap_hdr_csum = 1; - if (!(features & NETIF_F_HW_ESP)) + if (!(features & NETIF_F_HW_ESP) || !x->xso.offload_handle || + (x->xso.dev != skb->dev)) esp_features = features & ~(NETIF_F_SG | NETIF_F_CSUM_MASK); - segs = x->outer_mode->gso_segment(x, skb, esp_features); - if (IS_ERR_OR_NULL(segs)) - goto out; - - __skb_pull(skb, skb->data - skb_mac_header(skb)); - - skb2 = segs; - do { - struct sk_buff *nskb = skb2->next; - - xo = xfrm_offload(skb2); - xo->flags |= XFRM_GSO_SEGMENT; - xo->seq.low = seq; - xo->seq.hi = xfrm_replay_seqhi(x, seq); - - if(!(features & NETIF_F_HW_ESP)) - xo->flags |= CRYPTO_FALLBACK; - - x->outer_mode->xmit(x, skb2); - - err = x->type_offload->xmit(x, skb2, esp_features); - if (err) { - kfree_skb_list(segs); - return ERR_PTR(err); - } + xo->flags |= XFRM_GSO_SEGMENT; - if (!skb_is_gso(skb2)) - seq++; - else - seq += skb_shinfo(skb2)->gso_segs; - - skb_push(skb2, skb2->mac_len); - skb2 = nskb; - } while (skb2); - -out: - return segs; + return x->outer_mode->gso_segment(x, skb, esp_features); } static int esp6_input_tail(struct xfrm_state *x, struct sk_buff *skb) @@ -226,6 +187,7 @@ static int esp6_input_tail(struct xfrm_state *x, struct sk_buff *skb) static int esp6_xmit(struct xfrm_state *x, struct sk_buff *skb, netdev_features_t features) { + int len; int err; int alen; int blksize; @@ -234,6 +196,7 @@ static int esp6_xmit(struct xfrm_state *x, struct sk_buff *skb, netdev_features struct crypto_aead *aead; struct esp_info esp; bool hw_offload = true; + __u32 seq; esp.inplace = true; @@ -269,28 +232,33 @@ static int esp6_xmit(struct xfrm_state *x, struct sk_buff *skb, netdev_features return esp.nfrags; } + seq = xo->seq.low; + esph = ip_esp_hdr(skb); esph->spi = x->id.spi; skb_push(skb, -skb_network_offset(skb)); if (xo->flags & XFRM_GSO_SEGMENT) { - esph->seq_no = htonl(xo->seq.low); - } else { - int len; - - len = skb->len - sizeof(struct ipv6hdr); - if (len > IPV6_MAXPLEN) - len = 0; + esph->seq_no = htonl(seq); - ipv6_hdr(skb)->payload_len = htons(len); + if (!skb_is_gso(skb)) + xo->seq.low++; + else + xo->seq.low += skb_shinfo(skb)->gso_segs; } + esp.seqno = cpu_to_be64(xo->seq.low + ((u64)xo->seq.hi << 32)); + + len = skb->len - sizeof(struct ipv6hdr); + if (len > IPV6_MAXPLEN) + len = 0; + + ipv6_hdr(skb)->payload_len = htons(len); + if (hw_offload) return 0; - esp.seqno = cpu_to_be64(xo->seq.low + ((u64)xo->seq.hi << 32)); - err = esp6_output_tail(x, skb, &esp); if (err) return err; diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c index 6eb5e68f112a..44c39c5f0638 100644 --- a/net/ipv6/ila/ila_xlat.c +++ b/net/ipv6/ila/ila_xlat.c @@ -512,9 +512,7 @@ static int ila_nl_dump(struct sk_buff *skb, struct netlink_callback *cb) struct ila_map *ila; int ret; - ret = rhashtable_walk_start(rhiter); - if (ret && ret != -EAGAIN) - goto done; + rhashtable_walk_start(rhiter); for (;;) { ila = rhashtable_walk_next(rhiter); diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index b01858f5deb1..2febe26de6a1 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -125,6 +125,40 @@ static inline int compute_score(struct sock *sk, struct net *net, } /* called with rcu_read_lock() */ +static struct sock *inet6_lhash2_lookup(struct net *net, + struct inet_listen_hashbucket *ilb2, + struct sk_buff *skb, int doff, + const struct in6_addr *saddr, + const __be16 sport, const struct in6_addr *daddr, + const unsigned short hnum, const int dif, const int sdif) +{ + bool exact_dif = inet6_exact_dif_match(net, skb); + struct inet_connection_sock *icsk; + struct sock *sk, *result = NULL; + int score, hiscore = 0; + u32 phash = 0; + + inet_lhash2_for_each_icsk_rcu(icsk, &ilb2->head) { + sk = (struct sock *)icsk; + score = compute_score(sk, net, hnum, daddr, dif, sdif, + exact_dif); + if (score > hiscore) { + if (sk->sk_reuseport) { + phash = inet6_ehashfn(net, daddr, hnum, + saddr, sport); + result = reuseport_select_sock(sk, phash, + skb, doff); + if (result) + return result; + } + result = sk; + hiscore = score; + } + } + + return result; +} + struct sock *inet6_lookup_listener(struct net *net, struct inet_hashinfo *hashinfo, struct sk_buff *skb, int doff, @@ -134,31 +168,56 @@ struct sock *inet6_lookup_listener(struct net *net, { unsigned int hash = inet_lhashfn(net, hnum); struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash]; - int score, hiscore = 0, matches = 0, reuseport = 0; bool exact_dif = inet6_exact_dif_match(net, skb); + struct inet_listen_hashbucket *ilb2; struct sock *sk, *result = NULL; + int score, hiscore = 0; + unsigned int hash2; u32 phash = 0; + if (ilb->count <= 10 || !hashinfo->lhash2) + goto port_lookup; + + /* Too many sk in the ilb bucket (which is hashed by port alone). + * Try lhash2 (which is hashed by port and addr) instead. + */ + + hash2 = ipv6_portaddr_hash(net, daddr, hnum); + ilb2 = inet_lhash2_bucket(hashinfo, hash2); + if (ilb2->count > ilb->count) + goto port_lookup; + + result = inet6_lhash2_lookup(net, ilb2, skb, doff, + saddr, sport, daddr, hnum, + dif, sdif); + if (result) + return result; + + /* Lookup lhash2 with in6addr_any */ + + hash2 = ipv6_portaddr_hash(net, &in6addr_any, hnum); + ilb2 = inet_lhash2_bucket(hashinfo, hash2); + if (ilb2->count > ilb->count) + goto port_lookup; + + return inet6_lhash2_lookup(net, ilb2, skb, doff, + saddr, sport, daddr, hnum, + dif, sdif); + +port_lookup: sk_for_each(sk, &ilb->head) { score = compute_score(sk, net, hnum, daddr, dif, sdif, exact_dif); if (score > hiscore) { - reuseport = sk->sk_reuseport; - if (reuseport) { + if (sk->sk_reuseport) { phash = inet6_ehashfn(net, daddr, hnum, saddr, sport); result = reuseport_select_sock(sk, phash, skb, doff); if (result) return result; - matches = 1; } result = sk; hiscore = score; - } else if (score == hiscore && reuseport) { - matches++; - if (reciprocal_scale(phash, matches) == 0) - result = sk; - phash = next_pseudo_random32(phash); } } return result; diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 217683d40f12..92b8d8c75eed 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -107,16 +107,13 @@ enum { void fib6_update_sernum(struct rt6_info *rt) { - struct fib6_table *table = rt->rt6i_table; struct net *net = dev_net(rt->dst.dev); struct fib6_node *fn; - spin_lock_bh(&table->tb6_lock); fn = rcu_dereference_protected(rt->rt6i_node, - lockdep_is_held(&table->tb6_lock)); + lockdep_is_held(&rt->rt6i_table->tb6_lock)); if (fn) fn->fn_sernum = fib6_new_sernum(net); - spin_unlock_bh(&table->tb6_lock); } /* @@ -804,12 +801,6 @@ insert_above: return ln; } -static bool rt6_qualify_for_ecmp(struct rt6_info *rt) -{ - return (rt->rt6i_flags & (RTF_GATEWAY|RTF_ADDRCONF|RTF_DYNAMIC)) == - RTF_GATEWAY; -} - static void fib6_copy_metrics(u32 *mp, const struct mx6_config *mxc) { int i; @@ -898,7 +889,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, ins = &fn->leaf; for (iter = leaf; iter; - iter = rcu_dereference_protected(iter->dst.rt6_next, + iter = rcu_dereference_protected(iter->rt6_next, lockdep_is_held(&rt->rt6i_table->tb6_lock))) { /* * Search for duplicates @@ -955,7 +946,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, break; next_iter: - ins = &iter->dst.rt6_next; + ins = &iter->rt6_next; } if (fallback_ins && !found) { @@ -984,7 +975,7 @@ next_iter: &sibling->rt6i_siblings); break; } - sibling = rcu_dereference_protected(sibling->dst.rt6_next, + sibling = rcu_dereference_protected(sibling->rt6_next, lockdep_is_held(&rt->rt6i_table->tb6_lock)); } /* For each sibling in the list, increment the counter of @@ -999,6 +990,7 @@ next_iter: rt6i_nsiblings++; } BUG_ON(rt6i_nsiblings != rt->rt6i_nsiblings); + rt6_multipath_rebalance(temp_sibling); } /* @@ -1014,7 +1006,7 @@ add: if (err) return err; - rcu_assign_pointer(rt->dst.rt6_next, iter); + rcu_assign_pointer(rt->rt6_next, iter); atomic_inc(&rt->rt6i_ref); rcu_assign_pointer(rt->rt6i_node, fn); rcu_assign_pointer(*ins, rt); @@ -1045,7 +1037,7 @@ add: atomic_inc(&rt->rt6i_ref); rcu_assign_pointer(rt->rt6i_node, fn); - rt->dst.rt6_next = iter->dst.rt6_next; + rt->rt6_next = iter->rt6_next; rcu_assign_pointer(*ins, rt); call_fib6_entry_notifiers(info->nl_net, FIB_EVENT_ENTRY_REPLACE, rt, extack); @@ -1064,14 +1056,14 @@ add: if (nsiblings) { /* Replacing an ECMP route, remove all siblings */ - ins = &rt->dst.rt6_next; + ins = &rt->rt6_next; iter = rcu_dereference_protected(*ins, lockdep_is_held(&rt->rt6i_table->tb6_lock)); while (iter) { if (iter->rt6i_metric > rt->rt6i_metric) break; if (rt6_qualify_for_ecmp(iter)) { - *ins = iter->dst.rt6_next; + *ins = iter->rt6_next; iter->rt6i_node = NULL; fib6_purge_rt(iter, fn, info->nl_net); if (rcu_access_pointer(fn->rr_ptr) == iter) @@ -1080,7 +1072,7 @@ add: nsiblings--; info->nl_net->ipv6.rt6_stats->fib_rt_entries--; } else { - ins = &iter->dst.rt6_next; + ins = &iter->rt6_next; } iter = rcu_dereference_protected(*ins, lockdep_is_held(&rt->rt6i_table->tb6_lock)); @@ -1107,8 +1099,8 @@ void fib6_force_start_gc(struct net *net) jiffies + net->ipv6.sysctl.ip6_rt_gc_interval); } -static void fib6_update_sernum_upto_root(struct rt6_info *rt, - int sernum) +static void __fib6_update_sernum_upto_root(struct rt6_info *rt, + int sernum) { struct fib6_node *fn = rcu_dereference_protected(rt->rt6i_node, lockdep_is_held(&rt->rt6i_table->tb6_lock)); @@ -1122,6 +1114,11 @@ static void fib6_update_sernum_upto_root(struct rt6_info *rt, } } +void fib6_update_sernum_upto_root(struct net *net, struct rt6_info *rt) +{ + __fib6_update_sernum_upto_root(rt, fib6_new_sernum(net)); +} + /* * Add routing information to the routing tree. * <destination addr>/<source addr> @@ -1241,7 +1238,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, err = fib6_add_rt2node(fn, rt, info, mxc, extack); if (!err) { - fib6_update_sernum_upto_root(rt, sernum); + __fib6_update_sernum_upto_root(rt, sernum); fib6_start_gc(info->nl_net, rt); } @@ -1670,7 +1667,7 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn, WARN_ON_ONCE(rt->rt6i_flags & RTF_CACHE); /* Unlink it */ - *rtp = rt->dst.rt6_next; + *rtp = rt->rt6_next; rt->rt6i_node = NULL; net->ipv6.rt6_stats->fib_rt_entries--; net->ipv6.rt6_stats->fib_discarded_routes++; @@ -1691,6 +1688,7 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn, sibling->rt6i_nsiblings--; rt->rt6i_nsiblings = 0; list_del_init(&rt->rt6i_siblings); + rt6_multipath_rebalance(next_sibling); } /* Adjust walkers */ @@ -1698,7 +1696,7 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn, FOR_WALKERS(net, w) { if (w->state == FWS_C && w->leaf == rt) { RT6_TRACE("walker %p adjusted by delroute\n", w); - w->leaf = rcu_dereference_protected(rt->dst.rt6_next, + w->leaf = rcu_dereference_protected(rt->rt6_next, lockdep_is_held(&table->tb6_lock)); if (!w->leaf) w->state = FWS_U; @@ -1762,7 +1760,7 @@ int fib6_del(struct rt6_info *rt, struct nl_info *info) fib6_del_route(table, fn, rtp, info); return 0; } - rtp_next = &cur->dst.rt6_next; + rtp_next = &cur->rt6_next; } return -ENOENT; } @@ -1918,7 +1916,7 @@ static int fib6_clean_node(struct fib6_walker *w) for_each_fib6_walker_rt(w) { res = c->func(rt, c->arg); - if (res < 0) { + if (res == -1) { w->leaf = rt; res = fib6_del(rt, &info); if (res) { @@ -1931,6 +1929,12 @@ static int fib6_clean_node(struct fib6_walker *w) continue; } return 0; + } else if (res == -2) { + if (WARN_ON(!rt->rt6i_nsiblings)) + continue; + rt = list_last_entry(&rt->rt6i_siblings, + struct rt6_info, rt6i_siblings); + continue; } WARN_ON(res != 0); } @@ -1942,7 +1946,8 @@ static int fib6_clean_node(struct fib6_walker *w) * Convenient frontend to tree walker. * * func is called on each route. - * It may return -1 -> delete this route. + * It may return -2 -> skip multipath route. + * -1 -> delete this route. * 0 -> continue walking */ @@ -2134,7 +2139,6 @@ static void fib6_net_exit(struct net *net) { unsigned int i; - rt6_ifdown(net, NULL); del_timer_sync(&net->ipv6.ip6_fib_timer); for (i = 0; i < FIB6_TABLE_HASHSZ; i++) { @@ -2173,8 +2177,8 @@ int __init fib6_init(void) if (ret) goto out_kmem_cache_create; - ret = __rtnl_register(PF_INET6, RTM_GETROUTE, NULL, inet6_dump_fib, - 0); + ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETROUTE, NULL, + inet6_dump_fib, 0); if (ret) goto out_unregister_subsys; @@ -2239,7 +2243,7 @@ static int ipv6_route_yield(struct fib6_walker *w) do { iter->w.leaf = rcu_dereference_protected( - iter->w.leaf->dst.rt6_next, + iter->w.leaf->rt6_next, lockdep_is_held(&iter->tbl->tb6_lock)); iter->skip--; if (!iter->skip && iter->w.leaf) @@ -2305,7 +2309,7 @@ static void *ipv6_route_seq_next(struct seq_file *seq, void *v, loff_t *pos) if (!v) goto iter_table; - n = rcu_dereference_bh(((struct rt6_info *)v)->dst.rt6_next); + n = rcu_dereference_bh(((struct rt6_info *)v)->rt6_next); if (n) { ++*pos; return n; diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index 7f59c8fabeeb..3dab664ff503 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -836,7 +836,6 @@ static int ip6fl_seq_release(struct inode *inode, struct file *file) } static const struct file_operations ip6fl_seq_fops = { - .owner = THIS_MODULE, .open = ip6fl_seq_open, .read = seq_read, .llseek = seq_lseek, diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 873549228ccb..05f070e123e4 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -55,6 +55,8 @@ #include <net/ip6_route.h> #include <net/ip6_tunnel.h> #include <net/gre.h> +#include <net/erspan.h> +#include <net/dst_metadata.h> static bool log_ecn_error = true; @@ -68,11 +70,13 @@ static unsigned int ip6gre_net_id __read_mostly; struct ip6gre_net { struct ip6_tnl __rcu *tunnels[4][IP6_GRE_HASH_SIZE]; + struct ip6_tnl __rcu *collect_md_tun; struct net_device *fb_tunnel_dev; }; static struct rtnl_link_ops ip6gre_link_ops __read_mostly; static struct rtnl_link_ops ip6gre_tap_ops __read_mostly; +static struct rtnl_link_ops ip6erspan_tap_ops __read_mostly; static int ip6gre_tunnel_init(struct net_device *dev); static void ip6gre_tunnel_setup(struct net_device *dev); static void ip6gre_tunnel_link(struct ip6gre_net *ign, struct ip6_tnl *t); @@ -121,7 +125,8 @@ static struct ip6_tnl *ip6gre_tunnel_lookup(struct net_device *dev, unsigned int h1 = HASH_KEY(key); struct ip6_tnl *t, *cand = NULL; struct ip6gre_net *ign = net_generic(net, ip6gre_net_id); - int dev_type = (gre_proto == htons(ETH_P_TEB)) ? + int dev_type = (gre_proto == htons(ETH_P_TEB) || + gre_proto == htons(ETH_P_ERSPAN)) ? ARPHRD_ETHER : ARPHRD_IP6GRE; int score, cand_score = 4; @@ -226,6 +231,10 @@ static struct ip6_tnl *ip6gre_tunnel_lookup(struct net_device *dev, if (cand) return cand; + t = rcu_dereference(ign->collect_md_tun); + if (t && t->dev->flags & IFF_UP) + return t; + dev = ign->fb_tunnel_dev; if (dev->flags & IFF_UP) return netdev_priv(dev); @@ -261,6 +270,9 @@ static void ip6gre_tunnel_link(struct ip6gre_net *ign, struct ip6_tnl *t) { struct ip6_tnl __rcu **tp = ip6gre_bucket(ign, t); + if (t->parms.collect_md) + rcu_assign_pointer(ign->collect_md_tun, t); + rcu_assign_pointer(t->next, rtnl_dereference(*tp)); rcu_assign_pointer(*tp, t); } @@ -270,6 +282,9 @@ static void ip6gre_tunnel_unlink(struct ip6gre_net *ign, struct ip6_tnl *t) struct ip6_tnl __rcu **tp; struct ip6_tnl *iter; + if (t->parms.collect_md) + rcu_assign_pointer(ign->collect_md_tun, NULL); + for (tp = ip6gre_bucket(ign, t); (iter = rtnl_dereference(*tp)) != NULL; tp = &iter->next) { @@ -461,7 +476,101 @@ static int ip6gre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi) &ipv6h->saddr, &ipv6h->daddr, tpi->key, tpi->proto); if (tunnel) { - ip6_tnl_rcv(tunnel, skb, tpi, NULL, log_ecn_error); + if (tunnel->parms.collect_md) { + struct metadata_dst *tun_dst; + __be64 tun_id; + __be16 flags; + + flags = tpi->flags; + tun_id = key32_to_tunnel_id(tpi->key); + + tun_dst = ipv6_tun_rx_dst(skb, flags, tun_id, 0); + if (!tun_dst) + return PACKET_REJECT; + + ip6_tnl_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error); + } else { + ip6_tnl_rcv(tunnel, skb, tpi, NULL, log_ecn_error); + } + + return PACKET_RCVD; + } + + return PACKET_REJECT; +} + +static int ip6erspan_rcv(struct sk_buff *skb, int gre_hdr_len, + struct tnl_ptk_info *tpi) +{ + struct erspan_base_hdr *ershdr; + struct erspan_metadata *pkt_md; + const struct ipv6hdr *ipv6h; + struct ip6_tnl *tunnel; + u8 ver; + + if (unlikely(!pskb_may_pull(skb, sizeof(*ershdr)))) + return PACKET_REJECT; + + ipv6h = ipv6_hdr(skb); + ershdr = (struct erspan_base_hdr *)skb->data; + ver = ershdr->ver; + tpi->key = cpu_to_be32(get_session_id(ershdr)); + + tunnel = ip6gre_tunnel_lookup(skb->dev, + &ipv6h->saddr, &ipv6h->daddr, tpi->key, + tpi->proto); + if (tunnel) { + int len = erspan_hdr_len(ver); + + if (unlikely(!pskb_may_pull(skb, len))) + return PACKET_REJECT; + + ershdr = (struct erspan_base_hdr *)skb->data; + pkt_md = (struct erspan_metadata *)(ershdr + 1); + + if (__iptunnel_pull_header(skb, len, + htons(ETH_P_TEB), + false, false) < 0) + return PACKET_REJECT; + + if (tunnel->parms.collect_md) { + struct metadata_dst *tun_dst; + struct ip_tunnel_info *info; + struct erspan_metadata *md; + __be64 tun_id; + __be16 flags; + + tpi->flags |= TUNNEL_KEY; + flags = tpi->flags; + tun_id = key32_to_tunnel_id(tpi->key); + + tun_dst = ipv6_tun_rx_dst(skb, flags, tun_id, + sizeof(*md)); + if (!tun_dst) + return PACKET_REJECT; + + info = &tun_dst->u.tun_info; + md = ip_tunnel_info_opts(info); + + memcpy(md, pkt_md, sizeof(*md)); + md->version = ver; + info->key.tun_flags |= TUNNEL_ERSPAN_OPT; + info->options_len = sizeof(*md); + + ip6_tnl_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error); + + } else { + tunnel->parms.erspan_ver = ver; + + if (ver == 1) { + tunnel->parms.index = ntohl(pkt_md->u.index); + } else { + tunnel->parms.dir = pkt_md->u.md2.dir; + tunnel->parms.hwid = get_hwid(&pkt_md->u.md2); + } + + ip6_tnl_rcv(tunnel, skb, tpi, NULL, log_ecn_error); + } return PACKET_RCVD; } @@ -482,9 +591,17 @@ static int gre_rcv(struct sk_buff *skb) if (iptunnel_pull_header(skb, hdr_len, tpi.proto, false)) goto drop; + if (unlikely(tpi.proto == htons(ETH_P_ERSPAN) || + tpi.proto == htons(ETH_P_ERSPAN2))) { + if (ip6erspan_rcv(skb, hdr_len, &tpi) == PACKET_RCVD) + return 0; + goto out; + } + if (ip6gre_rcv(skb, &tpi) == PACKET_RCVD) return 0; +out: icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0); drop: kfree_skb(skb); @@ -497,6 +614,78 @@ static int gre_handle_offloads(struct sk_buff *skb, bool csum) csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE); } +static void prepare_ip6gre_xmit_ipv4(struct sk_buff *skb, + struct net_device *dev, + struct flowi6 *fl6, __u8 *dsfield, + int *encap_limit) +{ + const struct iphdr *iph = ip_hdr(skb); + struct ip6_tnl *t = netdev_priv(dev); + + if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) + *encap_limit = t->parms.encap_limit; + + memcpy(fl6, &t->fl.u.ip6, sizeof(*fl6)); + + if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS) + *dsfield = ipv4_get_dsfield(iph); + else + *dsfield = ip6_tclass(t->parms.flowinfo); + + if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) + fl6->flowi6_mark = skb->mark; + else + fl6->flowi6_mark = t->parms.fwmark; + + fl6->flowi6_uid = sock_net_uid(dev_net(dev), NULL); +} + +static int prepare_ip6gre_xmit_ipv6(struct sk_buff *skb, + struct net_device *dev, + struct flowi6 *fl6, __u8 *dsfield, + int *encap_limit) +{ + struct ipv6hdr *ipv6h = ipv6_hdr(skb); + struct ip6_tnl *t = netdev_priv(dev); + __u16 offset; + + offset = ip6_tnl_parse_tlv_enc_lim(skb, skb_network_header(skb)); + /* ip6_tnl_parse_tlv_enc_lim() might have reallocated skb->head */ + + if (offset > 0) { + struct ipv6_tlv_tnl_enc_lim *tel; + + tel = (struct ipv6_tlv_tnl_enc_lim *)&skb_network_header(skb)[offset]; + if (tel->encap_limit == 0) { + icmpv6_send(skb, ICMPV6_PARAMPROB, + ICMPV6_HDR_FIELD, offset + 2); + return -1; + } + *encap_limit = tel->encap_limit - 1; + } else if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) { + *encap_limit = t->parms.encap_limit; + } + + memcpy(fl6, &t->fl.u.ip6, sizeof(*fl6)); + + if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS) + *dsfield = ipv6_get_dsfield(ipv6h); + else + *dsfield = ip6_tclass(t->parms.flowinfo); + + if (t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL) + fl6->flowlabel |= ip6_flowlabel(ipv6h); + + if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) + fl6->flowi6_mark = skb->mark; + else + fl6->flowi6_mark = t->parms.fwmark; + + fl6->flowi6_uid = sock_net_uid(dev_net(dev), NULL); + + return 0; +} + static netdev_tx_t __gre6_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield, struct flowi6 *fl6, int encap_limit, @@ -518,8 +707,38 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb, /* Push GRE header. */ protocol = (dev->type == ARPHRD_ETHER) ? htons(ETH_P_TEB) : proto; - gre_build_header(skb, tunnel->tun_hlen, tunnel->parms.o_flags, - protocol, tunnel->parms.o_key, htonl(tunnel->o_seqno)); + + if (tunnel->parms.collect_md) { + struct ip_tunnel_info *tun_info; + const struct ip_tunnel_key *key; + __be16 flags; + + tun_info = skb_tunnel_info(skb); + if (unlikely(!tun_info || + !(tun_info->mode & IP_TUNNEL_INFO_TX) || + ip_tunnel_info_af(tun_info) != AF_INET6)) + return -EINVAL; + + key = &tun_info->key; + memset(fl6, 0, sizeof(*fl6)); + fl6->flowi6_proto = IPPROTO_GRE; + fl6->daddr = key->u.ipv6.dst; + fl6->flowlabel = key->label; + fl6->flowi6_uid = sock_net_uid(dev_net(dev), NULL); + + dsfield = key->tos; + flags = key->tun_flags & (TUNNEL_CSUM | TUNNEL_KEY); + tunnel->tun_hlen = gre_calc_hlen(flags); + + gre_build_header(skb, tunnel->tun_hlen, + flags, protocol, + tunnel_id_to_key32(tun_info->key.tun_id), 0); + + } else { + gre_build_header(skb, tunnel->tun_hlen, tunnel->parms.o_flags, + protocol, tunnel->parms.o_key, + htonl(tunnel->o_seqno)); + } return ip6_tnl_xmit(skb, dev, dsfield, fl6, encap_limit, pmtu, NEXTHDR_GRE); @@ -528,30 +747,17 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb, static inline int ip6gre_xmit_ipv4(struct sk_buff *skb, struct net_device *dev) { struct ip6_tnl *t = netdev_priv(dev); - const struct iphdr *iph = ip_hdr(skb); int encap_limit = -1; struct flowi6 fl6; - __u8 dsfield; + __u8 dsfield = 0; __u32 mtu; int err; memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); - if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) - encap_limit = t->parms.encap_limit; - - memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6)); - - if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS) - dsfield = ipv4_get_dsfield(iph); - else - dsfield = ip6_tclass(t->parms.flowinfo); - if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) - fl6.flowi6_mark = skb->mark; - else - fl6.flowi6_mark = t->parms.fwmark; - - fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL); + if (!t->parms.collect_md) + prepare_ip6gre_xmit_ipv4(skb, dev, &fl6, + &dsfield, &encap_limit); err = gre_handle_offloads(skb, !!(t->parms.o_flags & TUNNEL_CSUM)); if (err) @@ -575,46 +781,17 @@ static inline int ip6gre_xmit_ipv6(struct sk_buff *skb, struct net_device *dev) struct ip6_tnl *t = netdev_priv(dev); struct ipv6hdr *ipv6h = ipv6_hdr(skb); int encap_limit = -1; - __u16 offset; struct flowi6 fl6; - __u8 dsfield; + __u8 dsfield = 0; __u32 mtu; int err; if (ipv6_addr_equal(&t->parms.raddr, &ipv6h->saddr)) return -1; - offset = ip6_tnl_parse_tlv_enc_lim(skb, skb_network_header(skb)); - /* ip6_tnl_parse_tlv_enc_lim() might have reallocated skb->head */ - ipv6h = ipv6_hdr(skb); - - if (offset > 0) { - struct ipv6_tlv_tnl_enc_lim *tel; - tel = (struct ipv6_tlv_tnl_enc_lim *)&skb_network_header(skb)[offset]; - if (tel->encap_limit == 0) { - icmpv6_send(skb, ICMPV6_PARAMPROB, - ICMPV6_HDR_FIELD, offset + 2); - return -1; - } - encap_limit = tel->encap_limit - 1; - } else if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) - encap_limit = t->parms.encap_limit; - - memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6)); - - if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS) - dsfield = ipv6_get_dsfield(ipv6h); - else - dsfield = ip6_tclass(t->parms.flowinfo); - - if (t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL) - fl6.flowlabel |= ip6_flowlabel(ipv6h); - if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) - fl6.flowi6_mark = skb->mark; - else - fl6.flowi6_mark = t->parms.fwmark; - - fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL); + if (!t->parms.collect_md && + prepare_ip6gre_xmit_ipv6(skb, dev, &fl6, &dsfield, &encap_limit)) + return -1; if (gre_handle_offloads(skb, !!(t->parms.o_flags & TUNNEL_CSUM))) return -1; @@ -661,7 +838,8 @@ static int ip6gre_xmit_other(struct sk_buff *skb, struct net_device *dev) if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) encap_limit = t->parms.encap_limit; - memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6)); + if (!t->parms.collect_md) + memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6)); err = gre_handle_offloads(skb, !!(t->parms.o_flags & TUNNEL_CSUM)); if (err) @@ -706,6 +884,137 @@ tx_err: return NETDEV_TX_OK; } +static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb, + struct net_device *dev) +{ + struct ipv6hdr *ipv6h = ipv6_hdr(skb); + struct ip6_tnl *t = netdev_priv(dev); + struct dst_entry *dst = skb_dst(skb); + struct net_device_stats *stats; + bool truncate = false; + int encap_limit = -1; + __u8 dsfield = false; + struct flowi6 fl6; + int err = -EINVAL; + __u32 mtu; + + if (!ip6_tnl_xmit_ctl(t, &t->parms.laddr, &t->parms.raddr)) + goto tx_err; + + if (gre_handle_offloads(skb, false)) + goto tx_err; + + if (skb->len > dev->mtu + dev->hard_header_len) { + pskb_trim(skb, dev->mtu + dev->hard_header_len); + truncate = true; + } + + t->parms.o_flags &= ~TUNNEL_KEY; + IPCB(skb)->flags = 0; + + /* For collect_md mode, derive fl6 from the tunnel key, + * for native mode, call prepare_ip6gre_xmit_{ipv4,ipv6}. + */ + if (t->parms.collect_md) { + struct ip_tunnel_info *tun_info; + const struct ip_tunnel_key *key; + struct erspan_metadata *md; + __be32 tun_id; + + tun_info = skb_tunnel_info(skb); + if (unlikely(!tun_info || + !(tun_info->mode & IP_TUNNEL_INFO_TX) || + ip_tunnel_info_af(tun_info) != AF_INET6)) + return -EINVAL; + + key = &tun_info->key; + memset(&fl6, 0, sizeof(fl6)); + fl6.flowi6_proto = IPPROTO_GRE; + fl6.daddr = key->u.ipv6.dst; + fl6.flowlabel = key->label; + fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL); + + dsfield = key->tos; + md = ip_tunnel_info_opts(tun_info); + if (!md) + goto tx_err; + + tun_id = tunnel_id_to_key32(key->tun_id); + if (md->version == 1) { + erspan_build_header(skb, + ntohl(tun_id), + ntohl(md->u.index), truncate, + false); + } else if (md->version == 2) { + erspan_build_header_v2(skb, + ntohl(tun_id), + md->u.md2.dir, + get_hwid(&md->u.md2), + truncate, false); + } + } else { + switch (skb->protocol) { + case htons(ETH_P_IP): + memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); + prepare_ip6gre_xmit_ipv4(skb, dev, &fl6, + &dsfield, &encap_limit); + break; + case htons(ETH_P_IPV6): + if (ipv6_addr_equal(&t->parms.raddr, &ipv6h->saddr)) + goto tx_err; + if (prepare_ip6gre_xmit_ipv6(skb, dev, &fl6, + &dsfield, &encap_limit)) + goto tx_err; + break; + default: + memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6)); + break; + } + + if (t->parms.erspan_ver == 1) + erspan_build_header(skb, ntohl(t->parms.o_key), + t->parms.index, + truncate, false); + else + erspan_build_header_v2(skb, ntohl(t->parms.o_key), + t->parms.dir, + t->parms.hwid, + truncate, false); + fl6.daddr = t->parms.raddr; + } + + /* Push GRE header. */ + gre_build_header(skb, 8, TUNNEL_SEQ, + htons(ETH_P_ERSPAN), 0, htonl(t->o_seqno++)); + + /* TooBig packet may have updated dst->dev's mtu */ + if (!t->parms.collect_md && dst && dst_mtu(dst) > dst->dev->mtu) + dst->ops->update_pmtu(dst, NULL, skb, dst->dev->mtu); + + err = ip6_tnl_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu, + NEXTHDR_GRE); + if (err != 0) { + /* XXX: send ICMP error even if DF is not set. */ + if (err == -EMSGSIZE) { + if (skb->protocol == htons(ETH_P_IP)) + icmp_send(skb, ICMP_DEST_UNREACH, + ICMP_FRAG_NEEDED, htonl(mtu)); + else + icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); + } + + goto tx_err; + } + return NETDEV_TX_OK; + +tx_err: + stats = &t->dev->stats; + stats->tx_errors++; + stats->tx_dropped++; + kfree_skb(skb); + return NETDEV_TX_OK; +} + static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu) { struct net_device *dev = t->dev; @@ -1079,6 +1388,10 @@ static int ip6gre_tunnel_init_common(struct net_device *dev) if (!(tunnel->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) dev->mtu -= 8; + if (tunnel->parms.collect_md) { + dev->features |= NETIF_F_NETNS_LOCAL; + netif_keep_dst(dev); + } ip6gre_tnl_init_features(dev); return 0; @@ -1095,6 +1408,9 @@ static int ip6gre_tunnel_init(struct net_device *dev) tunnel = netdev_priv(dev); + if (tunnel->parms.collect_md) + return 0; + memcpy(dev->dev_addr, &tunnel->parms.laddr, sizeof(struct in6_addr)); memcpy(dev->broadcast, &tunnel->parms.raddr, sizeof(struct in6_addr)); @@ -1117,7 +1433,6 @@ static void ip6gre_fb_tunnel_init(struct net_device *dev) dev_hold(dev); } - static struct inet6_protocol ip6gre_protocol __read_mostly = { .handler = gre_rcv, .err_handler = ip6gre_err, @@ -1132,7 +1447,8 @@ static void ip6gre_destroy_tunnels(struct net *net, struct list_head *head) for_each_netdev_safe(net, dev, aux) if (dev->rtnl_link_ops == &ip6gre_link_ops || - dev->rtnl_link_ops == &ip6gre_tap_ops) + dev->rtnl_link_ops == &ip6gre_tap_ops || + dev->rtnl_link_ops == &ip6erspan_tap_ops) unregister_netdevice_queue(dev, head); for (prio = 0; prio < 4; prio++) { @@ -1254,6 +1570,70 @@ out: return ip6gre_tunnel_validate(tb, data, extack); } +static int ip6erspan_tap_validate(struct nlattr *tb[], struct nlattr *data[], + struct netlink_ext_ack *extack) +{ + __be16 flags = 0; + int ret, ver = 0; + + if (!data) + return 0; + + ret = ip6gre_tap_validate(tb, data, extack); + if (ret) + return ret; + + /* ERSPAN should only have GRE sequence and key flag */ + if (data[IFLA_GRE_OFLAGS]) + flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]); + if (data[IFLA_GRE_IFLAGS]) + flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]); + if (!data[IFLA_GRE_COLLECT_METADATA] && + flags != (GRE_SEQ | GRE_KEY)) + return -EINVAL; + + /* ERSPAN Session ID only has 10-bit. Since we reuse + * 32-bit key field as ID, check it's range. + */ + if (data[IFLA_GRE_IKEY] && + (ntohl(nla_get_be32(data[IFLA_GRE_IKEY])) & ~ID_MASK)) + return -EINVAL; + + if (data[IFLA_GRE_OKEY] && + (ntohl(nla_get_be32(data[IFLA_GRE_OKEY])) & ~ID_MASK)) + return -EINVAL; + + if (data[IFLA_GRE_ERSPAN_VER]) { + ver = nla_get_u8(data[IFLA_GRE_ERSPAN_VER]); + if (ver != 1 && ver != 2) + return -EINVAL; + } + + if (ver == 1) { + if (data[IFLA_GRE_ERSPAN_INDEX]) { + u32 index = nla_get_u32(data[IFLA_GRE_ERSPAN_INDEX]); + + if (index & ~INDEX_MASK) + return -EINVAL; + } + } else if (ver == 2) { + if (data[IFLA_GRE_ERSPAN_DIR]) { + u16 dir = nla_get_u8(data[IFLA_GRE_ERSPAN_DIR]); + + if (dir & ~(DIR_MASK >> DIR_OFFSET)) + return -EINVAL; + } + + if (data[IFLA_GRE_ERSPAN_HWID]) { + u16 hwid = nla_get_u16(data[IFLA_GRE_ERSPAN_HWID]); + + if (hwid & ~(HWID_MASK >> HWID_OFFSET)) + return -EINVAL; + } + } + + return 0; +} static void ip6gre_netlink_parms(struct nlattr *data[], struct __ip6_tnl_parm *parms) @@ -1300,6 +1680,22 @@ static void ip6gre_netlink_parms(struct nlattr *data[], if (data[IFLA_GRE_FWMARK]) parms->fwmark = nla_get_u32(data[IFLA_GRE_FWMARK]); + + if (data[IFLA_GRE_COLLECT_METADATA]) + parms->collect_md = true; + + if (data[IFLA_GRE_ERSPAN_VER]) + parms->erspan_ver = nla_get_u8(data[IFLA_GRE_ERSPAN_VER]); + + if (parms->erspan_ver == 1) { + if (data[IFLA_GRE_ERSPAN_INDEX]) + parms->index = nla_get_u32(data[IFLA_GRE_ERSPAN_INDEX]); + } else if (parms->erspan_ver == 2) { + if (data[IFLA_GRE_ERSPAN_DIR]) + parms->dir = nla_get_u8(data[IFLA_GRE_ERSPAN_DIR]); + if (data[IFLA_GRE_ERSPAN_HWID]) + parms->hwid = nla_get_u16(data[IFLA_GRE_ERSPAN_HWID]); + } } static int ip6gre_tap_init(struct net_device *dev) @@ -1326,6 +1722,59 @@ static const struct net_device_ops ip6gre_tap_netdev_ops = { .ndo_get_iflink = ip6_tnl_get_iflink, }; +static int ip6erspan_tap_init(struct net_device *dev) +{ + struct ip6_tnl *tunnel; + int t_hlen; + int ret; + + tunnel = netdev_priv(dev); + + tunnel->dev = dev; + tunnel->net = dev_net(dev); + strcpy(tunnel->parms.name, dev->name); + + dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); + if (!dev->tstats) + return -ENOMEM; + + ret = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL); + if (ret) { + free_percpu(dev->tstats); + dev->tstats = NULL; + return ret; + } + + tunnel->tun_hlen = 8; + tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen + + erspan_hdr_len(tunnel->parms.erspan_ver); + t_hlen = tunnel->hlen + sizeof(struct ipv6hdr); + + dev->hard_header_len = LL_MAX_HEADER + t_hlen; + dev->mtu = ETH_DATA_LEN - t_hlen; + if (dev->type == ARPHRD_ETHER) + dev->mtu -= ETH_HLEN; + if (!(tunnel->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) + dev->mtu -= 8; + + dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; + tunnel = netdev_priv(dev); + ip6gre_tnl_link_config(tunnel, 1); + + return 0; +} + +static const struct net_device_ops ip6erspan_netdev_ops = { + .ndo_init = ip6erspan_tap_init, + .ndo_uninit = ip6gre_tunnel_uninit, + .ndo_start_xmit = ip6erspan_tunnel_xmit, + .ndo_set_mac_address = eth_mac_addr, + .ndo_validate_addr = eth_validate_addr, + .ndo_change_mtu = ip6_tnl_change_mtu, + .ndo_get_stats64 = ip_tunnel_get_stats64, + .ndo_get_iflink = ip6_tnl_get_iflink, +}; + static void ip6gre_tap_setup(struct net_device *dev) { @@ -1396,8 +1845,13 @@ static int ip6gre_newlink(struct net *src_net, struct net_device *dev, ip6gre_netlink_parms(data, &nt->parms); - if (ip6gre_tunnel_find(net, &nt->parms, dev->type)) - return -EEXIST; + if (nt->parms.collect_md) { + if (rtnl_dereference(ign->collect_md_tun)) + return -EEXIST; + } else { + if (ip6gre_tunnel_find(net, &nt->parms, dev->type)) + return -EEXIST; + } if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS]) eth_hw_addr_random(dev); @@ -1500,8 +1954,12 @@ static size_t ip6gre_get_size(const struct net_device *dev) nla_total_size(2) + /* IFLA_GRE_ENCAP_DPORT */ nla_total_size(2) + + /* IFLA_GRE_COLLECT_METADATA */ + nla_total_size(0) + /* IFLA_GRE_FWMARK */ nla_total_size(4) + + /* IFLA_GRE_ERSPAN_INDEX */ + nla_total_size(4) + 0; } @@ -1523,7 +1981,8 @@ static int ip6gre_fill_info(struct sk_buff *skb, const struct net_device *dev) nla_put_u8(skb, IFLA_GRE_ENCAP_LIMIT, p->encap_limit) || nla_put_be32(skb, IFLA_GRE_FLOWINFO, p->flowinfo) || nla_put_u32(skb, IFLA_GRE_FLAGS, p->flags) || - nla_put_u32(skb, IFLA_GRE_FWMARK, p->fwmark)) + nla_put_u32(skb, IFLA_GRE_FWMARK, p->fwmark) || + nla_put_u32(skb, IFLA_GRE_ERSPAN_INDEX, p->index)) goto nla_put_failure; if (nla_put_u16(skb, IFLA_GRE_ENCAP_TYPE, @@ -1536,6 +1995,24 @@ static int ip6gre_fill_info(struct sk_buff *skb, const struct net_device *dev) t->encap.flags)) goto nla_put_failure; + if (p->collect_md) { + if (nla_put_flag(skb, IFLA_GRE_COLLECT_METADATA)) + goto nla_put_failure; + } + + if (nla_put_u8(skb, IFLA_GRE_ERSPAN_VER, p->erspan_ver)) + goto nla_put_failure; + + if (p->erspan_ver == 1) { + if (nla_put_u32(skb, IFLA_GRE_ERSPAN_INDEX, p->index)) + goto nla_put_failure; + } else if (p->erspan_ver == 2) { + if (nla_put_u8(skb, IFLA_GRE_ERSPAN_DIR, p->dir)) + goto nla_put_failure; + if (nla_put_u16(skb, IFLA_GRE_ERSPAN_HWID, p->hwid)) + goto nla_put_failure; + } + return 0; nla_put_failure: @@ -1558,9 +2035,28 @@ static const struct nla_policy ip6gre_policy[IFLA_GRE_MAX + 1] = { [IFLA_GRE_ENCAP_FLAGS] = { .type = NLA_U16 }, [IFLA_GRE_ENCAP_SPORT] = { .type = NLA_U16 }, [IFLA_GRE_ENCAP_DPORT] = { .type = NLA_U16 }, + [IFLA_GRE_COLLECT_METADATA] = { .type = NLA_FLAG }, [IFLA_GRE_FWMARK] = { .type = NLA_U32 }, + [IFLA_GRE_ERSPAN_INDEX] = { .type = NLA_U32 }, + [IFLA_GRE_ERSPAN_VER] = { .type = NLA_U8 }, + [IFLA_GRE_ERSPAN_DIR] = { .type = NLA_U8 }, + [IFLA_GRE_ERSPAN_HWID] = { .type = NLA_U16 }, }; +static void ip6erspan_tap_setup(struct net_device *dev) +{ + ether_setup(dev); + + dev->netdev_ops = &ip6erspan_netdev_ops; + dev->needs_free_netdev = true; + dev->priv_destructor = ip6gre_dev_free; + + dev->features |= NETIF_F_NETNS_LOCAL; + dev->priv_flags &= ~IFF_TX_SKB_SHARING; + dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; + netif_keep_dst(dev); +} + static struct rtnl_link_ops ip6gre_link_ops __read_mostly = { .kind = "ip6gre", .maxtype = IFLA_GRE_MAX, @@ -1590,6 +2086,20 @@ static struct rtnl_link_ops ip6gre_tap_ops __read_mostly = { .get_link_net = ip6_tnl_get_link_net, }; +static struct rtnl_link_ops ip6erspan_tap_ops __read_mostly = { + .kind = "ip6erspan", + .maxtype = IFLA_GRE_MAX, + .policy = ip6gre_policy, + .priv_size = sizeof(struct ip6_tnl), + .setup = ip6erspan_tap_setup, + .validate = ip6erspan_tap_validate, + .newlink = ip6gre_newlink, + .changelink = ip6gre_changelink, + .get_size = ip6gre_get_size, + .fill_info = ip6gre_fill_info, + .get_link_net = ip6_tnl_get_link_net, +}; + /* * And now the modules code and kernel interface. */ @@ -1618,9 +2128,15 @@ static int __init ip6gre_init(void) if (err < 0) goto tap_ops_failed; + err = rtnl_link_register(&ip6erspan_tap_ops); + if (err < 0) + goto erspan_link_failed; + out: return err; +erspan_link_failed: + rtnl_link_unregister(&ip6gre_tap_ops); tap_ops_failed: rtnl_link_unregister(&ip6gre_link_ops); rtnl_link_failed: @@ -1634,6 +2150,7 @@ static void __exit ip6gre_fini(void) { rtnl_link_unregister(&ip6gre_tap_ops); rtnl_link_unregister(&ip6gre_link_ops); + rtnl_link_unregister(&ip6erspan_tap_ops); inet6_del_protocol(&ip6gre_protocol, IPPROTO_GRE); unregister_pernet_device(&ip6gre_net_ops); } @@ -1645,4 +2162,5 @@ MODULE_AUTHOR("D. Kozlov (xeb@mail.ru)"); MODULE_DESCRIPTION("GRE over IPv6 tunneling device"); MODULE_ALIAS_RTNL_LINK("ip6gre"); MODULE_ALIAS_RTNL_LINK("ip6gretap"); +MODULE_ALIAS_RTNL_LINK("ip6erspan"); MODULE_ALIAS_NETDEV("ip6gre0"); diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 3763dc01e374..997c7f19ad62 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -138,6 +138,14 @@ static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *s return ret; } +#if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM) + /* Policy lookup after SNAT yielded a new policy */ + if (skb_dst(skb)->xfrm) { + IPCB(skb)->flags |= IPSKB_REROUTED; + return dst_output(net, sk, skb); + } +#endif + if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) || dst_allfrag(skb_dst(skb)) || (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size)) @@ -370,7 +378,7 @@ static inline int ip6_forward_finish(struct net *net, struct sock *sk, return dst_output(net, sk, skb); } -static unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst) +unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst) { unsigned int mtu; struct inet6_dev *idev; @@ -390,6 +398,7 @@ static unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst) return mtu; } +EXPORT_SYMBOL_GPL(ip6_dst_mtu_forward); static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu) { @@ -1209,7 +1218,7 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork, READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst); else mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ? - READ_ONCE(rt->dst.dev->mtu) : dst_mtu(rt->dst.path); + READ_ONCE(rt->dst.dev->mtu) : dst_mtu(xfrm_dst_path(&rt->dst)); if (np->frag_size < mtu) { if (np->frag_size) mtu = np->frag_size; @@ -1217,7 +1226,7 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork, if (mtu < IPV6_MIN_MTU) return -EINVAL; cork->base.fragsize = mtu; - if (dst_allfrag(rt->dst.path)) + if (dst_allfrag(xfrm_dst_path(&rt->dst))) cork->base.flags |= IPCORK_ALLFRAG; cork->base.length = 0; diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 1ee5584c3555..4b15fe928278 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -860,7 +860,7 @@ int ip6_tnl_rcv(struct ip6_tnl *t, struct sk_buff *skb, struct metadata_dst *tun_dst, bool log_ecn_err) { - return __ip6_tnl_rcv(t, skb, tpi, NULL, ip6ip6_dscp_ecn_decapsulate, + return __ip6_tnl_rcv(t, skb, tpi, tun_dst, ip6ip6_dscp_ecn_decapsulate, log_ecn_err); } EXPORT_SYMBOL(ip6_tnl_rcv); @@ -978,6 +978,9 @@ int ip6_tnl_xmit_ctl(struct ip6_tnl *t, int ret = 0; struct net *net = t->net; + if (t->parms.collect_md) + return 1; + if ((p->flags & IP6_TNL_F_CAP_XMIT) || ((p->flags & IP6_TNL_F_CAP_PER_PACKET) && (ip6_tnl_get_cap(t, laddr, raddr) & IP6_TNL_F_CAP_XMIT))) { diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 8c184f84f353..fa3ae1cb50d3 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -626,6 +626,7 @@ static void vti6_link_config(struct ip6_tnl *t) { struct net_device *dev = t->dev; struct __ip6_tnl_parm *p = &t->parms; + struct net_device *tdev = NULL; memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr)); memcpy(dev->broadcast, &p->raddr, sizeof(struct in6_addr)); @@ -638,6 +639,25 @@ static void vti6_link_config(struct ip6_tnl *t) dev->flags |= IFF_POINTOPOINT; else dev->flags &= ~IFF_POINTOPOINT; + + if (p->flags & IP6_TNL_F_CAP_XMIT) { + int strict = (ipv6_addr_type(&p->raddr) & + (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL)); + struct rt6_info *rt = rt6_lookup(t->net, + &p->raddr, &p->laddr, + p->link, strict); + + if (rt) + tdev = rt->dst.dev; + ip6_rt_put(rt); + } + + if (!tdev && p->link) + tdev = __dev_get_by_index(t->net, p->link); + + if (tdev) + dev->mtu = max_t(int, tdev->mtu - dev->hard_header_len, + IPV6_MIN_MTU); } /** diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index a2e1a864eb46..9f6cace9c817 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -477,7 +477,6 @@ static int ip6mr_vif_open(struct inode *inode, struct file *file) } static const struct file_operations ip6mr_vif_fops = { - .owner = THIS_MODULE, .open = ip6mr_vif_open, .read = seq_read, .llseek = seq_lseek, @@ -495,6 +494,7 @@ static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos) return ERR_PTR(-ENOENT); it->mrt = mrt; + it->cache = NULL; return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1) : SEQ_START_TOKEN; } @@ -609,7 +609,6 @@ static int ipmr_mfc_open(struct inode *inode, struct file *file) } static const struct file_operations ip6mr_mfc_fops = { - .owner = THIS_MODULE, .open = ipmr_mfc_open, .read = seq_read, .llseek = seq_lseek, @@ -1425,10 +1424,13 @@ int __init ip6_mr_init(void) goto add_proto_fail; } #endif - rtnl_register(RTNL_FAMILY_IP6MR, RTM_GETROUTE, NULL, - ip6mr_rtm_dumproute, 0); - return 0; + err = rtnl_register_module(THIS_MODULE, RTNL_FAMILY_IP6MR, RTM_GETROUTE, + NULL, ip6mr_rtm_dumproute, 0); + if (err == 0) + return 0; + #ifdef CONFIG_IPV6_PIMSM_V2 + inet6_del_protocol(&pim6_protocol, IPPROTO_PIM); add_proto_fail: unregister_netdevice_notifier(&ip6_mr_notifier); #endif diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 844642682b83..6a5d0e39bb87 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -1655,8 +1655,6 @@ static void mld_sendpack(struct sk_buff *skb) if (err) goto err_out; - payload_len = skb->len; - err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, net->ipv6.igmp_sk, skb, NULL, skb->dev, dst_output); @@ -2758,7 +2756,6 @@ static int igmp6_mc_seq_open(struct inode *inode, struct file *file) } static const struct file_operations igmp6_mc_seq_fops = { - .owner = THIS_MODULE, .open = igmp6_mc_seq_open, .read = seq_read, .llseek = seq_lseek, @@ -2913,7 +2910,6 @@ static int igmp6_mcf_seq_open(struct inode *inode, struct file *file) } static const struct file_operations igmp6_mcf_seq_fops = { - .owner = THIS_MODULE, .open = igmp6_mcf_seq_open, .read = seq_read, .llseek = seq_lseek, diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index b3cea200c85e..f61a5b613b52 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -566,6 +566,11 @@ static void ndisc_send_unsol_na(struct net_device *dev) read_lock_bh(&idev->lock); list_for_each_entry(ifa, &idev->addr_list, if_list) { + /* skip tentative addresses until dad completes */ + if (ifa->flags & IFA_F_TENTATIVE && + !(ifa->flags & IFA_F_OPTIMISTIC)) + continue; + ndisc_send_na(dev, &in6addr_linklocal_allnodes, &ifa->addr, /*router=*/ !!idev->cnf.forwarding, /*solicited=*/ false, /*override=*/ true, diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index 39970e212ad5..d95ceca7ff8f 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -68,32 +68,7 @@ int ip6_route_me_harder(struct net *net, struct sk_buff *skb) } EXPORT_SYMBOL(ip6_route_me_harder); -/* - * Extra routing may needed on local out, as the QUEUE target never - * returns control to the table. - */ - -struct ip6_rt_info { - struct in6_addr daddr; - struct in6_addr saddr; - u_int32_t mark; -}; - -static void nf_ip6_saveroute(const struct sk_buff *skb, - struct nf_queue_entry *entry) -{ - struct ip6_rt_info *rt_info = nf_queue_entry_reroute(entry); - - if (entry->state.hook == NF_INET_LOCAL_OUT) { - const struct ipv6hdr *iph = ipv6_hdr(skb); - - rt_info->daddr = iph->daddr; - rt_info->saddr = iph->saddr; - rt_info->mark = skb->mark; - } -} - -static int nf_ip6_reroute(struct net *net, struct sk_buff *skb, +static int nf_ip6_reroute(struct sk_buff *skb, const struct nf_queue_entry *entry) { struct ip6_rt_info *rt_info = nf_queue_entry_reroute(entry); @@ -103,7 +78,7 @@ static int nf_ip6_reroute(struct net *net, struct sk_buff *skb, if (!ipv6_addr_equal(&iph->daddr, &rt_info->daddr) || !ipv6_addr_equal(&iph->saddr, &rt_info->saddr) || skb->mark != rt_info->mark) - return ip6_route_me_harder(net, skb); + return ip6_route_me_harder(entry->state.net, skb); } return 0; } @@ -190,25 +165,19 @@ static __sum16 nf_ip6_checksum_partial(struct sk_buff *skb, unsigned int hook, }; static const struct nf_ipv6_ops ipv6ops = { - .chk_addr = ipv6_chk_addr, - .route_input = ip6_route_input, - .fragment = ip6_fragment -}; - -static const struct nf_afinfo nf_ip6_afinfo = { - .family = AF_INET6, + .chk_addr = ipv6_chk_addr, + .route_input = ip6_route_input, + .fragment = ip6_fragment, .checksum = nf_ip6_checksum, .checksum_partial = nf_ip6_checksum_partial, .route = nf_ip6_route, - .saveroute = nf_ip6_saveroute, .reroute = nf_ip6_reroute, - .route_key_size = sizeof(struct ip6_rt_info), }; int __init ipv6_netfilter_init(void) { RCU_INIT_POINTER(nf_ipv6_ops, &ipv6ops); - return nf_register_afinfo(&nf_ip6_afinfo); + return 0; } /* This can be called from inet6_init() on errors, so it cannot @@ -217,5 +186,4 @@ int __init ipv6_netfilter_init(void) void ipv6_netfilter_fini(void) { RCU_INIT_POINTER(nf_ipv6_ops, NULL); - nf_unregister_afinfo(&nf_ip6_afinfo); } diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index 6acb2eecd986..4a634b7a2c80 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -71,6 +71,15 @@ config NFT_FIB_IPV6 endif # NF_TABLES_IPV6 endif # NF_TABLES +config NF_FLOW_TABLE_IPV6 + tristate "Netfilter flow table IPv6 module" + depends on NF_CONNTRACK && NF_TABLES + select NF_FLOW_TABLE + help + This option adds the flow table IPv6 support. + + To compile it as a module, choose M here. + config NF_DUP_IPV6 tristate "Netfilter IPv6 packet duplication to alternate destination" depends on !NF_CONNTRACK || NF_CONNTRACK @@ -232,6 +241,15 @@ config IP6_NF_MATCH_RT To compile it as a module, choose M here. If unsure, say N. +config IP6_NF_MATCH_SRH + tristate '"srh" Segment Routing header match support' + depends on NETFILTER_ADVANCED + help + srh matching allows you to match packets based on the segment + routing header of the packet. + + To compile it as a module, choose M here. If unsure, say N. + # The targets config IP6_NF_TARGET_HL tristate '"HL" hoplimit target support' diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile index c6ee0cdd0ba9..d984057b8395 100644 --- a/net/ipv6/netfilter/Makefile +++ b/net/ipv6/netfilter/Makefile @@ -45,6 +45,9 @@ obj-$(CONFIG_NFT_REDIR_IPV6) += nft_redir_ipv6.o obj-$(CONFIG_NFT_DUP_IPV6) += nft_dup_ipv6.o obj-$(CONFIG_NFT_FIB_IPV6) += nft_fib_ipv6.o +# flow table support +obj-$(CONFIG_NF_FLOW_TABLE_IPV6) += nf_flow_table_ipv6.o + # matches obj-$(CONFIG_IP6_NF_MATCH_AH) += ip6t_ah.o obj-$(CONFIG_IP6_NF_MATCH_EUI64) += ip6t_eui64.o @@ -54,6 +57,7 @@ obj-$(CONFIG_IP6_NF_MATCH_MH) += ip6t_mh.o obj-$(CONFIG_IP6_NF_MATCH_OPTS) += ip6t_hbh.o obj-$(CONFIG_IP6_NF_MATCH_RPFILTER) += ip6t_rpfilter.o obj-$(CONFIG_IP6_NF_MATCH_RT) += ip6t_rt.o +obj-$(CONFIG_IP6_NF_MATCH_SRH) += ip6t_srh.o # targets obj-$(CONFIG_IP6_NF_TARGET_MASQUERADE) += ip6t_MASQUERADE.o diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 66a8c69a3db4..af4c917e0836 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -986,9 +986,8 @@ static int get_info(struct net *net, void __user *user, if (compat) xt_compat_lock(AF_INET6); #endif - t = try_then_request_module(xt_find_table_lock(net, AF_INET6, name), - "ip6table_%s", name); - if (t) { + t = xt_request_find_table_lock(net, AF_INET6, name); + if (!IS_ERR(t)) { struct ip6t_getinfo info; const struct xt_table_info *private = t->private; #ifdef CONFIG_COMPAT @@ -1018,7 +1017,7 @@ static int get_info(struct net *net, void __user *user, xt_table_unlock(t); module_put(t->me); } else - ret = -ENOENT; + ret = PTR_ERR(t); #ifdef CONFIG_COMPAT if (compat) xt_compat_unlock(AF_INET6); @@ -1044,7 +1043,7 @@ get_entries(struct net *net, struct ip6t_get_entries __user *uptr, get.name[sizeof(get.name) - 1] = '\0'; t = xt_find_table_lock(net, AF_INET6, get.name); - if (t) { + if (!IS_ERR(t)) { struct xt_table_info *private = t->private; if (get.size == private->size) ret = copy_entries_to_user(private->size, @@ -1055,7 +1054,7 @@ get_entries(struct net *net, struct ip6t_get_entries __user *uptr, module_put(t->me); xt_table_unlock(t); } else - ret = -ENOENT; + ret = PTR_ERR(t); return ret; } @@ -1078,10 +1077,9 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks, goto out; } - t = try_then_request_module(xt_find_table_lock(net, AF_INET6, name), - "ip6table_%s", name); - if (!t) { - ret = -ENOENT; + t = xt_request_find_table_lock(net, AF_INET6, name); + if (IS_ERR(t)) { + ret = PTR_ERR(t); goto free_newinfo_counters_untrans; } @@ -1194,8 +1192,8 @@ do_add_counters(struct net *net, const void __user *user, unsigned int len, if (IS_ERR(paddc)) return PTR_ERR(paddc); t = xt_find_table_lock(net, AF_INET6, tmp.name); - if (!t) { - ret = -ENOENT; + if (IS_ERR(t)) { + ret = PTR_ERR(t); goto free; } @@ -1631,7 +1629,7 @@ compat_get_entries(struct net *net, struct compat_ip6t_get_entries __user *uptr, xt_compat_lock(AF_INET6); t = xt_find_table_lock(net, AF_INET6, get.name); - if (t) { + if (!IS_ERR(t)) { const struct xt_table_info *private = t->private; struct xt_table_info info; ret = compat_table_info(private, &info); @@ -1645,7 +1643,7 @@ compat_get_entries(struct net *net, struct compat_ip6t_get_entries __user *uptr, module_put(t->me); xt_table_unlock(t); } else - ret = -ENOENT; + ret = PTR_ERR(t); xt_compat_unlock(AF_INET6); return ret; @@ -1949,7 +1947,6 @@ static int __init ip6_tables_init(void) if (ret < 0) goto err5; - pr_info("(C) 2000-2006 Netfilter Core Team\n"); return 0; err5: diff --git a/net/ipv6/netfilter/ip6t_srh.c b/net/ipv6/netfilter/ip6t_srh.c new file mode 100644 index 000000000000..9642164107ce --- /dev/null +++ b/net/ipv6/netfilter/ip6t_srh.c @@ -0,0 +1,161 @@ +/* Kernel module to match Segment Routing Header (SRH) parameters. */ + +/* Author: + * Ahmed Abdelsalam <amsalam20@gmail.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include <linux/module.h> +#include <linux/skbuff.h> +#include <linux/ipv6.h> +#include <linux/types.h> +#include <net/ipv6.h> +#include <net/seg6.h> + +#include <linux/netfilter/x_tables.h> +#include <linux/netfilter_ipv6/ip6t_srh.h> +#include <linux/netfilter_ipv6/ip6_tables.h> + +/* Test a struct->mt_invflags and a boolean for inequality */ +#define NF_SRH_INVF(ptr, flag, boolean) \ + ((boolean) ^ !!((ptr)->mt_invflags & (flag))) + +static bool srh_mt6(const struct sk_buff *skb, struct xt_action_param *par) +{ + const struct ip6t_srh *srhinfo = par->matchinfo; + struct ipv6_sr_hdr *srh; + struct ipv6_sr_hdr _srh; + int hdrlen, srhoff = 0; + + if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0) + return false; + srh = skb_header_pointer(skb, srhoff, sizeof(_srh), &_srh); + if (!srh) + return false; + + hdrlen = ipv6_optlen(srh); + if (skb->len - srhoff < hdrlen) + return false; + + if (srh->type != IPV6_SRCRT_TYPE_4) + return false; + + if (srh->segments_left > srh->first_segment) + return false; + + /* Next Header matching */ + if (srhinfo->mt_flags & IP6T_SRH_NEXTHDR) + if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_NEXTHDR, + !(srh->nexthdr == srhinfo->next_hdr))) + return false; + + /* Header Extension Length matching */ + if (srhinfo->mt_flags & IP6T_SRH_LEN_EQ) + if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_LEN_EQ, + !(srh->hdrlen == srhinfo->hdr_len))) + return false; + + if (srhinfo->mt_flags & IP6T_SRH_LEN_GT) + if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_LEN_GT, + !(srh->hdrlen > srhinfo->hdr_len))) + return false; + + if (srhinfo->mt_flags & IP6T_SRH_LEN_LT) + if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_LEN_LT, + !(srh->hdrlen < srhinfo->hdr_len))) + return false; + + /* Segments Left matching */ + if (srhinfo->mt_flags & IP6T_SRH_SEGS_EQ) + if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_SEGS_EQ, + !(srh->segments_left == srhinfo->segs_left))) + return false; + + if (srhinfo->mt_flags & IP6T_SRH_SEGS_GT) + if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_SEGS_GT, + !(srh->segments_left > srhinfo->segs_left))) + return false; + + if (srhinfo->mt_flags & IP6T_SRH_SEGS_LT) + if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_SEGS_LT, + !(srh->segments_left < srhinfo->segs_left))) + return false; + + /** + * Last Entry matching + * Last_Entry field was introduced in revision 6 of the SRH draft. + * It was called First_Segment in the previous revision + */ + if (srhinfo->mt_flags & IP6T_SRH_LAST_EQ) + if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_LAST_EQ, + !(srh->first_segment == srhinfo->last_entry))) + return false; + + if (srhinfo->mt_flags & IP6T_SRH_LAST_GT) + if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_LAST_GT, + !(srh->first_segment > srhinfo->last_entry))) + return false; + + if (srhinfo->mt_flags & IP6T_SRH_LAST_LT) + if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_LAST_LT, + !(srh->first_segment < srhinfo->last_entry))) + return false; + + /** + * Tag matchig + * Tag field was introduced in revision 6 of the SRH draft. + */ + if (srhinfo->mt_flags & IP6T_SRH_TAG) + if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_TAG, + !(srh->tag == srhinfo->tag))) + return false; + return true; +} + +static int srh_mt6_check(const struct xt_mtchk_param *par) +{ + const struct ip6t_srh *srhinfo = par->matchinfo; + + if (srhinfo->mt_flags & ~IP6T_SRH_MASK) { + pr_err("unknown srh match flags %X\n", srhinfo->mt_flags); + return -EINVAL; + } + + if (srhinfo->mt_invflags & ~IP6T_SRH_INV_MASK) { + pr_err("unknown srh invflags %X\n", srhinfo->mt_invflags); + return -EINVAL; + } + + return 0; +} + +static struct xt_match srh_mt6_reg __read_mostly = { + .name = "srh", + .family = NFPROTO_IPV6, + .match = srh_mt6, + .matchsize = sizeof(struct ip6t_srh), + .checkentry = srh_mt6_check, + .me = THIS_MODULE, +}; + +static int __init srh_mt6_init(void) +{ + return xt_register_match(&srh_mt6_reg); +} + +static void __exit srh_mt6_exit(void) +{ + xt_unregister_match(&srh_mt6_reg); +} + +module_init(srh_mt6_init); +module_exit(srh_mt6_exit); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Xtables: IPv6 Segment Routing Header match"); +MODULE_AUTHOR("Ahmed Abdelsalam <amsalam20@gmail.com>"); diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c index 2b1a9dcdbcb3..b0524b18c4fb 100644 --- a/net/ipv6/netfilter/ip6table_mangle.c +++ b/net/ipv6/netfilter/ip6table_mangle.c @@ -42,14 +42,6 @@ ip6t_mangle_out(struct sk_buff *skb, const struct nf_hook_state *state) u_int8_t hop_limit; u_int32_t flowlabel, mark; int err; -#if 0 - /* root is playing with raw sockets. */ - if (skb->len < sizeof(struct iphdr) || - ip_hdrlen(skb) < sizeof(struct iphdr)) { - net_warn_ratelimited("ip6t_hook: happy cracking\n"); - return NF_ACCEPT; - } -#endif /* save source/dest address, mark, hoplimit, flowlabel, priority, */ memcpy(&saddr, &ipv6_hdr(skb)->saddr, sizeof(saddr)); diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c index 991512576c8c..47306e45a80a 100644 --- a/net/ipv6/netfilter/ip6table_nat.c +++ b/net/ipv6/netfilter/ip6table_nat.c @@ -74,6 +74,7 @@ static const struct nf_hook_ops nf_nat_ipv6_ops[] = { { .hook = ip6table_nat_in, .pf = NFPROTO_IPV6, + .nat_hook = true, .hooknum = NF_INET_PRE_ROUTING, .priority = NF_IP6_PRI_NAT_DST, }, @@ -81,6 +82,7 @@ static const struct nf_hook_ops nf_nat_ipv6_ops[] = { { .hook = ip6table_nat_out, .pf = NFPROTO_IPV6, + .nat_hook = true, .hooknum = NF_INET_POST_ROUTING, .priority = NF_IP6_PRI_NAT_SRC, }, @@ -88,12 +90,14 @@ static const struct nf_hook_ops nf_nat_ipv6_ops[] = { { .hook = ip6table_nat_local_fn, .pf = NFPROTO_IPV6, + .nat_hook = true, .hooknum = NF_INET_LOCAL_OUT, .priority = NF_IP6_PRI_NAT_DST, }, /* After packet filtering, change source */ { .hook = ip6table_nat_fn, + .nat_hook = true, .pf = NFPROTO_IPV6, .hooknum = NF_INET_LOCAL_IN, .priority = NF_IP6_PRI_NAT_SRC, diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c index d4bc56443dc1..710fa0806c37 100644 --- a/net/ipv6/netfilter/ip6table_raw.c +++ b/net/ipv6/netfilter/ip6table_raw.c @@ -3,6 +3,7 @@ * * Copyright (C) 2003 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu> */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> #include <linux/netfilter_ipv6/ip6_tables.h> #include <linux/slab.h> @@ -11,6 +12,10 @@ static int __net_init ip6table_raw_table_init(struct net *net); +static bool raw_before_defrag __read_mostly; +MODULE_PARM_DESC(raw_before_defrag, "Enable raw table before defrag"); +module_param(raw_before_defrag, bool, 0000); + static const struct xt_table packet_raw = { .name = "raw", .valid_hooks = RAW_VALID_HOOKS, @@ -20,6 +25,15 @@ static const struct xt_table packet_raw = { .table_init = ip6table_raw_table_init, }; +static const struct xt_table packet_raw_before_defrag = { + .name = "raw", + .valid_hooks = RAW_VALID_HOOKS, + .me = THIS_MODULE, + .af = NFPROTO_IPV6, + .priority = NF_IP6_PRI_RAW_BEFORE_DEFRAG, + .table_init = ip6table_raw_table_init, +}; + /* The work comes in here from netfilter.c. */ static unsigned int ip6table_raw_hook(void *priv, struct sk_buff *skb, @@ -33,15 +47,19 @@ static struct nf_hook_ops *rawtable_ops __read_mostly; static int __net_init ip6table_raw_table_init(struct net *net) { struct ip6t_replace *repl; + const struct xt_table *table = &packet_raw; int ret; + if (raw_before_defrag) + table = &packet_raw_before_defrag; + if (net->ipv6.ip6table_raw) return 0; - repl = ip6t_alloc_initial_table(&packet_raw); + repl = ip6t_alloc_initial_table(table); if (repl == NULL) return -ENOMEM; - ret = ip6t_register_table(net, &packet_raw, repl, rawtable_ops, + ret = ip6t_register_table(net, table, repl, rawtable_ops, &net->ipv6.ip6table_raw); kfree(repl); return ret; @@ -62,9 +80,16 @@ static struct pernet_operations ip6table_raw_net_ops = { static int __init ip6table_raw_init(void) { int ret; + const struct xt_table *table = &packet_raw; + + if (raw_before_defrag) { + table = &packet_raw_before_defrag; + + pr_info("Enabling raw table before defrag\n"); + } /* Register hooks */ - rawtable_ops = xt_hook_ops_alloc(&packet_raw, ip6table_raw_hook); + rawtable_ops = xt_hook_ops_alloc(table, ip6table_raw_hook); if (IS_ERR(rawtable_ops)) return PTR_ERR(rawtable_ops); diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index 3b80a38f62b8..11a313fd9273 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -176,11 +176,6 @@ static unsigned int ipv6_conntrack_local(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { - /* root is playing with raw sockets. */ - if (skb->len < sizeof(struct ipv6hdr)) { - net_notice_ratelimited("ipv6_conntrack_local: packet too short\n"); - return NF_ACCEPT; - } return nf_conntrack_in(state->net, PF_INET6, state->hook, skb); } @@ -368,7 +363,7 @@ static struct nf_sockopt_ops so_getorigdst6 = { .owner = THIS_MODULE, }; -static struct nf_conntrack_l4proto *builtin_l4proto6[] = { +static const struct nf_conntrack_l4proto * const builtin_l4proto6[] = { &nf_conntrack_l4proto_tcp6, &nf_conntrack_l4proto_udp6, &nf_conntrack_l4proto_icmpv6, diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c index 3ac0d826afc4..2548e2c8aedd 100644 --- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c @@ -27,7 +27,7 @@ #include <net/netfilter/ipv6/nf_conntrack_icmpv6.h> #include <net/netfilter/nf_log.h> -static unsigned int nf_ct_icmpv6_timeout __read_mostly = 30*HZ; +static const unsigned int nf_ct_icmpv6_timeout = 30*HZ; static inline struct nf_icmp_net *icmpv6_pernet(struct net *net) { @@ -352,7 +352,7 @@ static struct nf_proto_net *icmpv6_get_net_proto(struct net *net) return &net->ct.nf_ct_proto.icmpv6.pn; } -struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6 __read_mostly = +const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6 = { .l3proto = PF_INET6, .l4proto = IPPROTO_ICMPV6, diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 977d8900cfd1..ce53dcfda88a 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -231,7 +231,7 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb, if ((unsigned int)end > IPV6_MAXPLEN) { pr_debug("offset is too large.\n"); - return -1; + return -EINVAL; } ecn = ip6_frag_ecn(ipv6_hdr(skb)); @@ -264,7 +264,7 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb, * this case. -DaveM */ pr_debug("end of fragment not rounded to 8 bytes.\n"); - return -1; + return -EPROTO; } if (end > fq->q.len) { /* Some bits beyond end -> corruption. */ @@ -358,7 +358,7 @@ found: discard_fq: inet_frag_kill(&fq->q, &nf_frags); err: - return -1; + return -EINVAL; } /* @@ -567,6 +567,7 @@ find_prev_fhdr(struct sk_buff *skb, u8 *prevhdrp, int *prevhoff, int *fhoff) int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user) { + u16 savethdr = skb->transport_header; struct net_device *dev = skb->dev; int fhoff, nhoff, ret; struct frag_hdr *fhdr; @@ -600,8 +601,12 @@ int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user) spin_lock_bh(&fq->q.lock); - if (nf_ct_frag6_queue(fq, skb, fhdr, nhoff) < 0) { - ret = -EINVAL; + ret = nf_ct_frag6_queue(fq, skb, fhdr, nhoff); + if (ret < 0) { + if (ret == -EPROTO) { + skb->transport_header = savethdr; + ret = 0; + } goto out_unlock; } diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c index b326da59257f..c87b48359e8f 100644 --- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c +++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c @@ -63,6 +63,9 @@ static unsigned int ipv6_defrag(void *priv, /* Previously seen (loopback)? */ if (skb_nfct(skb) && !nf_ct_is_template((struct nf_conn *)skb_nfct(skb))) return NF_ACCEPT; + + if (skb->_nfct == IP_CT_UNTRACKED) + return NF_ACCEPT; #endif err = nf_ct_frag6_gather(state->net, skb, diff --git a/net/ipv6/netfilter/nf_flow_table_ipv6.c b/net/ipv6/netfilter/nf_flow_table_ipv6.c new file mode 100644 index 000000000000..fff21602875a --- /dev/null +++ b/net/ipv6/netfilter/nf_flow_table_ipv6.c @@ -0,0 +1,277 @@ +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/netfilter.h> +#include <linux/rhashtable.h> +#include <linux/ipv6.h> +#include <linux/netdevice.h> +#include <net/ipv6.h> +#include <net/ip6_route.h> +#include <net/neighbour.h> +#include <net/netfilter/nf_flow_table.h> +#include <net/netfilter/nf_tables.h> +/* For layer 4 checksum field offset. */ +#include <linux/tcp.h> +#include <linux/udp.h> + +static int nf_flow_nat_ipv6_tcp(struct sk_buff *skb, unsigned int thoff, + struct in6_addr *addr, + struct in6_addr *new_addr) +{ + struct tcphdr *tcph; + + if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) || + skb_try_make_writable(skb, thoff + sizeof(*tcph))) + return -1; + + tcph = (void *)(skb_network_header(skb) + thoff); + inet_proto_csum_replace16(&tcph->check, skb, addr->s6_addr32, + new_addr->s6_addr32, true); + + return 0; +} + +static int nf_flow_nat_ipv6_udp(struct sk_buff *skb, unsigned int thoff, + struct in6_addr *addr, + struct in6_addr *new_addr) +{ + struct udphdr *udph; + + if (!pskb_may_pull(skb, thoff + sizeof(*udph)) || + skb_try_make_writable(skb, thoff + sizeof(*udph))) + return -1; + + udph = (void *)(skb_network_header(skb) + thoff); + if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) { + inet_proto_csum_replace16(&udph->check, skb, addr->s6_addr32, + new_addr->s6_addr32, true); + if (!udph->check) + udph->check = CSUM_MANGLED_0; + } + + return 0; +} + +static int nf_flow_nat_ipv6_l4proto(struct sk_buff *skb, struct ipv6hdr *ip6h, + unsigned int thoff, struct in6_addr *addr, + struct in6_addr *new_addr) +{ + switch (ip6h->nexthdr) { + case IPPROTO_TCP: + if (nf_flow_nat_ipv6_tcp(skb, thoff, addr, new_addr) < 0) + return NF_DROP; + break; + case IPPROTO_UDP: + if (nf_flow_nat_ipv6_udp(skb, thoff, addr, new_addr) < 0) + return NF_DROP; + break; + } + + return 0; +} + +static int nf_flow_snat_ipv6(const struct flow_offload *flow, + struct sk_buff *skb, struct ipv6hdr *ip6h, + unsigned int thoff, + enum flow_offload_tuple_dir dir) +{ + struct in6_addr addr, new_addr; + + switch (dir) { + case FLOW_OFFLOAD_DIR_ORIGINAL: + addr = ip6h->saddr; + new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v6; + ip6h->saddr = new_addr; + break; + case FLOW_OFFLOAD_DIR_REPLY: + addr = ip6h->daddr; + new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6; + ip6h->daddr = new_addr; + break; + default: + return -1; + } + + return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr); +} + +static int nf_flow_dnat_ipv6(const struct flow_offload *flow, + struct sk_buff *skb, struct ipv6hdr *ip6h, + unsigned int thoff, + enum flow_offload_tuple_dir dir) +{ + struct in6_addr addr, new_addr; + + switch (dir) { + case FLOW_OFFLOAD_DIR_ORIGINAL: + addr = ip6h->daddr; + new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v6; + ip6h->daddr = new_addr; + break; + case FLOW_OFFLOAD_DIR_REPLY: + addr = ip6h->saddr; + new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6; + ip6h->saddr = new_addr; + break; + default: + return -1; + } + + return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr); +} + +static int nf_flow_nat_ipv6(const struct flow_offload *flow, + struct sk_buff *skb, + enum flow_offload_tuple_dir dir) +{ + struct ipv6hdr *ip6h = ipv6_hdr(skb); + unsigned int thoff = sizeof(*ip6h); + + if (flow->flags & FLOW_OFFLOAD_SNAT && + (nf_flow_snat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 || + nf_flow_snat_ipv6(flow, skb, ip6h, thoff, dir) < 0)) + return -1; + if (flow->flags & FLOW_OFFLOAD_DNAT && + (nf_flow_dnat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 || + nf_flow_dnat_ipv6(flow, skb, ip6h, thoff, dir) < 0)) + return -1; + + return 0; +} + +static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev, + struct flow_offload_tuple *tuple) +{ + struct flow_ports *ports; + struct ipv6hdr *ip6h; + unsigned int thoff; + + if (!pskb_may_pull(skb, sizeof(*ip6h))) + return -1; + + ip6h = ipv6_hdr(skb); + + if (ip6h->nexthdr != IPPROTO_TCP && + ip6h->nexthdr != IPPROTO_UDP) + return -1; + + thoff = sizeof(*ip6h); + if (!pskb_may_pull(skb, thoff + sizeof(*ports))) + return -1; + + ports = (struct flow_ports *)(skb_network_header(skb) + thoff); + + tuple->src_v6 = ip6h->saddr; + tuple->dst_v6 = ip6h->daddr; + tuple->src_port = ports->source; + tuple->dst_port = ports->dest; + tuple->l3proto = AF_INET6; + tuple->l4proto = ip6h->nexthdr; + tuple->iifidx = dev->ifindex; + + return 0; +} + +/* Based on ip_exceeds_mtu(). */ +static bool __nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu) +{ + if (skb->len <= mtu) + return false; + + if (skb_is_gso(skb) && skb_gso_validate_mtu(skb, mtu)) + return false; + + return true; +} + +static bool nf_flow_exceeds_mtu(struct sk_buff *skb, const struct rt6_info *rt) +{ + u32 mtu; + + mtu = ip6_dst_mtu_forward(&rt->dst); + if (__nf_flow_exceeds_mtu(skb, mtu)) + return true; + + return false; +} + +unsigned int +nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb, + const struct nf_hook_state *state) +{ + struct flow_offload_tuple_rhash *tuplehash; + struct nf_flowtable *flow_table = priv; + struct flow_offload_tuple tuple = {}; + enum flow_offload_tuple_dir dir; + struct flow_offload *flow; + struct net_device *outdev; + struct in6_addr *nexthop; + struct ipv6hdr *ip6h; + struct rt6_info *rt; + + if (skb->protocol != htons(ETH_P_IPV6)) + return NF_ACCEPT; + + if (nf_flow_tuple_ipv6(skb, state->in, &tuple) < 0) + return NF_ACCEPT; + + tuplehash = flow_offload_lookup(flow_table, &tuple); + if (tuplehash == NULL) + return NF_ACCEPT; + + outdev = dev_get_by_index_rcu(state->net, tuplehash->tuple.oifidx); + if (!outdev) + return NF_ACCEPT; + + dir = tuplehash->tuple.dir; + flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]); + + rt = (struct rt6_info *)flow->tuplehash[dir].tuple.dst_cache; + if (unlikely(nf_flow_exceeds_mtu(skb, rt))) + return NF_ACCEPT; + + if (skb_try_make_writable(skb, sizeof(*ip6h))) + return NF_DROP; + + if (flow->flags & (FLOW_OFFLOAD_SNAT | FLOW_OFFLOAD_DNAT) && + nf_flow_nat_ipv6(flow, skb, dir) < 0) + return NF_DROP; + + flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT; + ip6h = ipv6_hdr(skb); + ip6h->hop_limit--; + + skb->dev = outdev; + nexthop = rt6_nexthop(rt, &flow->tuplehash[!dir].tuple.src_v6); + neigh_xmit(NEIGH_ND_TABLE, outdev, nexthop, skb); + + return NF_STOLEN; +} +EXPORT_SYMBOL_GPL(nf_flow_offload_ipv6_hook); + +static struct nf_flowtable_type flowtable_ipv6 = { + .family = NFPROTO_IPV6, + .params = &nf_flow_offload_rhash_params, + .gc = nf_flow_offload_work_gc, + .hook = nf_flow_offload_ipv6_hook, + .owner = THIS_MODULE, +}; + +static int __init nf_flow_ipv6_module_init(void) +{ + nft_register_flowtable_type(&flowtable_ipv6); + + return 0; +} + +static void __exit nf_flow_ipv6_module_exit(void) +{ + nft_unregister_flowtable_type(&flowtable_ipv6); +} + +module_init(nf_flow_ipv6_module_init); +module_exit(nf_flow_ipv6_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>"); +MODULE_ALIAS_NF_FLOWTABLE(AF_INET6); diff --git a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c index 1d2fb9267d6f..bed57ee65f7b 100644 --- a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c @@ -369,10 +369,6 @@ nf_nat_ipv6_out(void *priv, struct sk_buff *skb, #endif unsigned int ret; - /* root is playing with raw sockets. */ - if (skb->len < sizeof(struct ipv6hdr)) - return NF_ACCEPT; - ret = nf_nat_ipv6_fn(priv, skb, state, do_chain); #ifdef CONFIG_XFRM if (ret != NF_DROP && ret != NF_STOLEN && @@ -408,10 +404,6 @@ nf_nat_ipv6_local_fn(void *priv, struct sk_buff *skb, unsigned int ret; int err; - /* root is playing with raw sockets. */ - if (skb->len < sizeof(struct ipv6hdr)) - return NF_ACCEPT; - ret = nf_nat_ipv6_fn(priv, skb, state, do_chain); if (ret != NF_DROP && ret != NF_STOLEN && (ct = nf_ct_get(skb, &ctinfo)) != NULL) { diff --git a/net/ipv6/netfilter/nf_tables_ipv6.c b/net/ipv6/netfilter/nf_tables_ipv6.c index d6e4ba5de916..17e03589331c 100644 --- a/net/ipv6/netfilter/nf_tables_ipv6.c +++ b/net/ipv6/netfilter/nf_tables_ipv6.c @@ -22,68 +22,12 @@ static unsigned int nft_do_chain_ipv6(void *priv, { struct nft_pktinfo pkt; - nft_set_pktinfo_ipv6(&pkt, skb, state); + nft_set_pktinfo(&pkt, skb, state); + nft_set_pktinfo_ipv6(&pkt, skb); return nft_do_chain(&pkt, priv); } -static unsigned int nft_ipv6_output(void *priv, - struct sk_buff *skb, - const struct nf_hook_state *state) -{ - if (unlikely(skb->len < sizeof(struct ipv6hdr))) { - if (net_ratelimit()) - pr_info("nf_tables_ipv6: ignoring short SOCK_RAW " - "packet\n"); - return NF_ACCEPT; - } - - return nft_do_chain_ipv6(priv, skb, state); -} - -struct nft_af_info nft_af_ipv6 __read_mostly = { - .family = NFPROTO_IPV6, - .nhooks = NF_INET_NUMHOOKS, - .owner = THIS_MODULE, - .nops = 1, - .hooks = { - [NF_INET_LOCAL_IN] = nft_do_chain_ipv6, - [NF_INET_LOCAL_OUT] = nft_ipv6_output, - [NF_INET_FORWARD] = nft_do_chain_ipv6, - [NF_INET_PRE_ROUTING] = nft_do_chain_ipv6, - [NF_INET_POST_ROUTING] = nft_do_chain_ipv6, - }, -}; -EXPORT_SYMBOL_GPL(nft_af_ipv6); - -static int nf_tables_ipv6_init_net(struct net *net) -{ - net->nft.ipv6 = kmalloc(sizeof(struct nft_af_info), GFP_KERNEL); - if (net->nft.ipv6 == NULL) - return -ENOMEM; - - memcpy(net->nft.ipv6, &nft_af_ipv6, sizeof(nft_af_ipv6)); - - if (nft_register_afinfo(net, net->nft.ipv6) < 0) - goto err; - - return 0; -err: - kfree(net->nft.ipv6); - return -ENOMEM; -} - -static void nf_tables_ipv6_exit_net(struct net *net) -{ - nft_unregister_afinfo(net, net->nft.ipv6); - kfree(net->nft.ipv6); -} - -static struct pernet_operations nf_tables_ipv6_net_ops = { - .init = nf_tables_ipv6_init_net, - .exit = nf_tables_ipv6_exit_net, -}; - static const struct nf_chain_type filter_ipv6 = { .name = "filter", .type = NFT_CHAIN_T_DEFAULT, @@ -94,26 +38,22 @@ static const struct nf_chain_type filter_ipv6 = { (1 << NF_INET_FORWARD) | (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_POST_ROUTING), + .hooks = { + [NF_INET_LOCAL_IN] = nft_do_chain_ipv6, + [NF_INET_LOCAL_OUT] = nft_do_chain_ipv6, + [NF_INET_FORWARD] = nft_do_chain_ipv6, + [NF_INET_PRE_ROUTING] = nft_do_chain_ipv6, + [NF_INET_POST_ROUTING] = nft_do_chain_ipv6, + }, }; static int __init nf_tables_ipv6_init(void) { - int ret; - - ret = nft_register_chain_type(&filter_ipv6); - if (ret < 0) - return ret; - - ret = register_pernet_subsys(&nf_tables_ipv6_net_ops); - if (ret < 0) - nft_unregister_chain_type(&filter_ipv6); - - return ret; + return nft_register_chain_type(&filter_ipv6); } static void __exit nf_tables_ipv6_exit(void) { - unregister_pernet_subsys(&nf_tables_ipv6_net_ops); nft_unregister_chain_type(&filter_ipv6); } @@ -122,4 +62,4 @@ module_exit(nf_tables_ipv6_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); -MODULE_ALIAS_NFT_FAMILY(AF_INET6); +MODULE_ALIAS_NFT_CHAIN(AF_INET6, "filter"); diff --git a/net/ipv6/netfilter/nft_chain_nat_ipv6.c b/net/ipv6/netfilter/nft_chain_nat_ipv6.c index 443cd306c0b0..73fe2bd13fcf 100644 --- a/net/ipv6/netfilter/nft_chain_nat_ipv6.c +++ b/net/ipv6/netfilter/nft_chain_nat_ipv6.c @@ -31,7 +31,8 @@ static unsigned int nft_nat_do_chain(void *priv, { struct nft_pktinfo pkt; - nft_set_pktinfo_ipv6(&pkt, skb, state); + nft_set_pktinfo(&pkt, skb, state); + nft_set_pktinfo_ipv6(&pkt, skb); return nft_do_chain(&pkt, priv); } diff --git a/net/ipv6/netfilter/nft_chain_route_ipv6.c b/net/ipv6/netfilter/nft_chain_route_ipv6.c index f2727475895e..11d3c3b9aa18 100644 --- a/net/ipv6/netfilter/nft_chain_route_ipv6.c +++ b/net/ipv6/netfilter/nft_chain_route_ipv6.c @@ -33,7 +33,8 @@ static unsigned int nf_route_table_hook(void *priv, u32 mark, flowlabel; int err; - nft_set_pktinfo_ipv6(&pkt, skb, state); + nft_set_pktinfo(&pkt, skb, state); + nft_set_pktinfo_ipv6(&pkt, skb); /* save source/dest address, mark, hoplimit, flowlabel, priority */ memcpy(&saddr, &ipv6_hdr(skb)->saddr, sizeof(saddr)); diff --git a/net/ipv6/netfilter/nft_fib_ipv6.c b/net/ipv6/netfilter/nft_fib_ipv6.c index 54b5899543ef..cc5174c7254c 100644 --- a/net/ipv6/netfilter/nft_fib_ipv6.c +++ b/net/ipv6/netfilter/nft_fib_ipv6.c @@ -60,7 +60,6 @@ static u32 __nft_fib6_eval_type(const struct nft_fib *priv, { const struct net_device *dev = NULL; const struct nf_ipv6_ops *v6ops; - const struct nf_afinfo *afinfo; int route_err, addrtype; struct rt6_info *rt; struct flowi6 fl6 = { @@ -69,8 +68,8 @@ static u32 __nft_fib6_eval_type(const struct nft_fib *priv, }; u32 ret = 0; - afinfo = nf_get_afinfo(NFPROTO_IPV6); - if (!afinfo) + v6ops = nf_get_ipv6_ops(); + if (!v6ops) return RTN_UNREACHABLE; if (priv->flags & NFTA_FIB_F_IIF) @@ -80,12 +79,11 @@ static u32 __nft_fib6_eval_type(const struct nft_fib *priv, nft_fib6_flowi_init(&fl6, priv, pkt, dev, iph); - v6ops = nf_get_ipv6_ops(); - if (dev && v6ops && v6ops->chk_addr(nft_net(pkt), &fl6.daddr, dev, true)) + if (dev && v6ops->chk_addr(nft_net(pkt), &fl6.daddr, dev, true)) ret = RTN_LOCAL; - route_err = afinfo->route(nft_net(pkt), (struct dst_entry **)&rt, - flowi6_to_flowi(&fl6), false); + route_err = v6ops->route(nft_net(pkt), (struct dst_entry **)&rt, + flowi6_to_flowi(&fl6), false); if (route_err) goto err; diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c index e88bcb8ff0fd..b67814242f78 100644 --- a/net/ipv6/proc.c +++ b/net/ipv6/proc.c @@ -58,7 +58,6 @@ static int sockstat6_seq_open(struct inode *inode, struct file *file) } static const struct file_operations sockstat6_seq_fops = { - .owner = THIS_MODULE, .open = sockstat6_seq_open, .read = seq_read, .llseek = seq_lseek, @@ -248,7 +247,6 @@ static int snmp6_seq_open(struct inode *inode, struct file *file) } static const struct file_operations snmp6_seq_fops = { - .owner = THIS_MODULE, .open = snmp6_seq_open, .read = seq_read, .llseek = seq_lseek, @@ -274,7 +272,6 @@ static int snmp6_dev_seq_open(struct inode *inode, struct file *file) } static const struct file_operations snmp6_dev_seq_fops = { - .owner = THIS_MODULE, .open = snmp6_dev_seq_open, .read = seq_read, .llseek = seq_lseek, diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 761a473a07c5..ddda7eb3c623 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -1308,7 +1308,6 @@ static int raw6_seq_open(struct inode *inode, struct file *file) } static const struct file_operations raw6_seq_fops = { - .owner = THIS_MODULE, .open = raw6_seq_open, .read = seq_read, .llseek = seq_lseek, diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 0458b761f3c5..fb2d251c0500 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -186,7 +186,7 @@ static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev) static u32 *rt6_pcpu_cow_metrics(struct rt6_info *rt) { - return dst_metrics_write_ptr(rt->dst.from); + return dst_metrics_write_ptr(&rt->from->dst); } static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old) @@ -391,7 +391,7 @@ static void ip6_dst_destroy(struct dst_entry *dst) { struct rt6_info *rt = (struct rt6_info *)dst; struct rt6_exception_bucket *bucket; - struct dst_entry *from = dst->from; + struct rt6_info *from = rt->from; struct inet6_dev *idev; dst_destroy_metrics_generic(dst); @@ -409,8 +409,8 @@ static void ip6_dst_destroy(struct dst_entry *dst) kfree(bucket); } - dst->from = NULL; - dst_release(from); + rt->from = NULL; + dst_release(&from->dst); } static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, @@ -443,9 +443,9 @@ static bool rt6_check_expired(const struct rt6_info *rt) if (rt->rt6i_flags & RTF_EXPIRES) { if (time_after(jiffies, rt->dst.expires)) return true; - } else if (rt->dst.from) { + } else if (rt->from) { return rt->dst.obsolete != DST_OBSOLETE_FORCE_CHK || - rt6_check_expired((struct rt6_info *)rt->dst.from); + rt6_check_expired(rt->from); } return false; } @@ -455,7 +455,6 @@ static struct rt6_info *rt6_multipath_select(struct rt6_info *match, int strict) { struct rt6_info *sibling, *next_sibling; - int route_choosen; /* We might have already computed the hash for ICMPv6 errors. In such * case it will always be non-zero. Otherwise now is the time to do it. @@ -463,26 +462,19 @@ static struct rt6_info *rt6_multipath_select(struct rt6_info *match, if (!fl6->mp_hash) fl6->mp_hash = rt6_multipath_hash(fl6, NULL); - route_choosen = fl6->mp_hash % (match->rt6i_nsiblings + 1); - /* Don't change the route, if route_choosen == 0 - * (siblings does not include ourself) - */ - if (route_choosen) - list_for_each_entry_safe(sibling, next_sibling, - &match->rt6i_siblings, rt6i_siblings) { - route_choosen--; - if (route_choosen == 0) { - struct inet6_dev *idev = sibling->rt6i_idev; - - if (!netif_carrier_ok(sibling->dst.dev) && - idev->cnf.ignore_routes_with_linkdown) - break; - if (rt6_score_route(sibling, oif, strict) < 0) - break; - match = sibling; - break; - } - } + if (fl6->mp_hash <= atomic_read(&match->rt6i_nh_upper_bound)) + return match; + + list_for_each_entry_safe(sibling, next_sibling, &match->rt6i_siblings, + rt6i_siblings) { + if (fl6->mp_hash > atomic_read(&sibling->rt6i_nh_upper_bound)) + continue; + if (rt6_score_route(sibling, oif, strict) < 0) + break; + match = sibling; + break; + } + return match; } @@ -499,12 +491,15 @@ static inline struct rt6_info *rt6_device_match(struct net *net, struct rt6_info *local = NULL; struct rt6_info *sprt; - if (!oif && ipv6_addr_any(saddr)) - goto out; + if (!oif && ipv6_addr_any(saddr) && !(rt->rt6i_nh_flags & RTNH_F_DEAD)) + return rt; - for (sprt = rt; sprt; sprt = rcu_dereference(sprt->dst.rt6_next)) { + for (sprt = rt; sprt; sprt = rcu_dereference(sprt->rt6_next)) { struct net_device *dev = sprt->dst.dev; + if (sprt->rt6i_nh_flags & RTNH_F_DEAD) + continue; + if (oif) { if (dev->ifindex == oif) return sprt; @@ -533,8 +528,8 @@ static inline struct rt6_info *rt6_device_match(struct net *net, if (flags & RT6_LOOKUP_F_IFACE) return net->ipv6.ip6_null_entry; } -out: - return rt; + + return rt->rt6i_nh_flags & RTNH_F_DEAD ? net->ipv6.ip6_null_entry : rt; } #ifdef CONFIG_IPV6_ROUTER_PREF @@ -679,10 +674,12 @@ static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict, int m; bool match_do_rr = false; struct inet6_dev *idev = rt->rt6i_idev; - struct net_device *dev = rt->dst.dev; - if (dev && !netif_carrier_ok(dev) && - idev->cnf.ignore_routes_with_linkdown && + if (rt->rt6i_nh_flags & RTNH_F_DEAD) + goto out; + + if (idev->cnf.ignore_routes_with_linkdown && + rt->rt6i_nh_flags & RTNH_F_LINKDOWN && !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE)) goto out; @@ -721,7 +718,7 @@ static struct rt6_info *find_rr_leaf(struct fib6_node *fn, match = NULL; cont = NULL; - for (rt = rr_head; rt; rt = rcu_dereference(rt->dst.rt6_next)) { + for (rt = rr_head; rt; rt = rcu_dereference(rt->rt6_next)) { if (rt->rt6i_metric != metric) { cont = rt; break; @@ -731,7 +728,7 @@ static struct rt6_info *find_rr_leaf(struct fib6_node *fn, } for (rt = leaf; rt && rt != rr_head; - rt = rcu_dereference(rt->dst.rt6_next)) { + rt = rcu_dereference(rt->rt6_next)) { if (rt->rt6i_metric != metric) { cont = rt; break; @@ -743,7 +740,7 @@ static struct rt6_info *find_rr_leaf(struct fib6_node *fn, if (match || !cont) return match; - for (rt = cont; rt; rt = rcu_dereference(rt->dst.rt6_next)) + for (rt = cont; rt; rt = rcu_dereference(rt->rt6_next)) match = find_match(rt, oif, strict, &mpri, match, do_rr); return match; @@ -781,7 +778,7 @@ static struct rt6_info *rt6_select(struct net *net, struct fib6_node *fn, &do_rr); if (do_rr) { - struct rt6_info *next = rcu_dereference(rt0->dst.rt6_next); + struct rt6_info *next = rcu_dereference(rt0->rt6_next); /* no entries matched; do round-robin */ if (!next || next->rt6i_metric != rt0->rt6i_metric) @@ -1054,7 +1051,7 @@ static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort, */ if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU)) - ort = (struct rt6_info *)ort->dst.from; + ort = ort->from; rcu_read_lock(); dev = ip6_rt_get_dev_rcu(ort); @@ -1274,7 +1271,7 @@ static int rt6_insert_exception(struct rt6_info *nrt, /* ort can't be a cache or pcpu route */ if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU)) - ort = (struct rt6_info *)ort->dst.from; + ort = ort->from; WARN_ON_ONCE(ort->rt6i_flags & (RTF_CACHE | RTF_PCPU)); spin_lock_bh(&rt6_exception_lock); @@ -1346,7 +1343,9 @@ out: /* Update fn->fn_sernum to invalidate all cached dst */ if (!err) { + spin_lock_bh(&ort->rt6i_table->tb6_lock); fib6_update_sernum(ort); + spin_unlock_bh(&ort->rt6i_table->tb6_lock); fib6_force_start_gc(net); } @@ -1415,8 +1414,8 @@ static struct rt6_info *rt6_find_cached_rt(struct rt6_info *rt, /* Remove the passed in cached rt from the hash table that contains it */ int rt6_remove_exception_rt(struct rt6_info *rt) { - struct rt6_info *from = (struct rt6_info *)rt->dst.from; struct rt6_exception_bucket *bucket; + struct rt6_info *from = rt->from; struct in6_addr *src_key = NULL; struct rt6_exception *rt6_ex; int err; @@ -1460,8 +1459,8 @@ int rt6_remove_exception_rt(struct rt6_info *rt) */ static void rt6_update_exception_stamp_rt(struct rt6_info *rt) { - struct rt6_info *from = (struct rt6_info *)rt->dst.from; struct rt6_exception_bucket *bucket; + struct rt6_info *from = rt->from; struct in6_addr *src_key = NULL; struct rt6_exception *rt6_ex; @@ -1586,12 +1585,19 @@ static void rt6_age_examine_exception(struct rt6_exception_bucket *bucket, * EXPIRES exceptions - e.g. pmtu-generated ones are pruned when * expired, independently from their aging, as per RFC 8201 section 4 */ - if (!(rt->rt6i_flags & RTF_EXPIRES) && - time_after_eq(now, rt->dst.lastuse + gc_args->timeout)) { - RT6_TRACE("aging clone %p\n", rt); + if (!(rt->rt6i_flags & RTF_EXPIRES)) { + if (time_after_eq(now, rt->dst.lastuse + gc_args->timeout)) { + RT6_TRACE("aging clone %p\n", rt); + rt6_remove_exception(bucket, rt6_ex); + return; + } + } else if (time_after(jiffies, rt->dst.expires)) { + RT6_TRACE("purging expired route %p\n", rt); rt6_remove_exception(bucket, rt6_ex); return; - } else if (rt->rt6i_flags & RTF_GATEWAY) { + } + + if (rt->rt6i_flags & RTF_GATEWAY) { struct neighbour *neigh; __u8 neigh_flags = 0; @@ -1606,11 +1612,8 @@ static void rt6_age_examine_exception(struct rt6_exception_bucket *bucket, rt6_remove_exception(bucket, rt6_ex); return; } - } else if (__rt6_check_expired(rt)) { - RT6_TRACE("purging expired route %p\n", rt); - rt6_remove_exception(bucket, rt6_ex); - return; } + gc_args->more++; } @@ -1824,10 +1827,10 @@ u32 rt6_multipath_hash(const struct flowi6 *fl6, const struct sk_buff *skb) if (skb) { ip6_multipath_l3_keys(skb, &hash_keys); - return flow_hash_from_keys(&hash_keys); + return flow_hash_from_keys(&hash_keys) >> 1; } - return get_hash_from_flowi6(fl6); + return get_hash_from_flowi6(fl6) >> 1; } void ip6_route_input(struct sk_buff *skb) @@ -1929,9 +1932,9 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori static void rt6_dst_from_metrics_check(struct rt6_info *rt) { - if (rt->dst.from && - dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(rt->dst.from)) - dst_init_metrics(&rt->dst, dst_metrics_ptr(rt->dst.from), true); + if (rt->from && + dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(&rt->from->dst)) + dst_init_metrics(&rt->dst, dst_metrics_ptr(&rt->from->dst), true); } static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie) @@ -1951,7 +1954,7 @@ static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie) { if (!__rt6_check_expired(rt) && rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK && - rt6_check((struct rt6_info *)(rt->dst.from), cookie)) + rt6_check(rt->from, cookie)) return &rt->dst; else return NULL; @@ -1971,7 +1974,7 @@ static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie) rt6_dst_from_metrics_check(rt); if (rt->rt6i_flags & RTF_PCPU || - (unlikely(!list_empty(&rt->rt6i_uncached)) && rt->dst.from)) + (unlikely(!list_empty(&rt->rt6i_uncached)) && rt->from)) return rt6_dst_from_check(rt, cookie); else return rt6_check(rt, cookie); @@ -2154,6 +2157,8 @@ static struct rt6_info *__ip6_route_redirect(struct net *net, fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); restart: for_each_fib6_node_rt_rcu(fn) { + if (rt->rt6i_nh_flags & RTNH_F_DEAD) + continue; if (rt6_check_expired(rt)) continue; if (rt->dst.error) @@ -2344,7 +2349,7 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev, rt->rt6i_idev = idev; dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0); - /* Add this dst into uncached_list so that rt6_ifdown() can + /* Add this dst into uncached_list so that rt6_disable_ip() can * do proper release of the net_device */ rt6_uncached_list_add(rt); @@ -2439,7 +2444,8 @@ static int ip6_convert_metrics(struct mx6_config *mxc, static struct rt6_info *ip6_nh_lookup_table(struct net *net, struct fib6_config *cfg, - const struct in6_addr *gw_addr) + const struct in6_addr *gw_addr, + u32 tbid, int flags) { struct flowi6 fl6 = { .flowi6_oif = cfg->fc_ifindex, @@ -2448,15 +2454,15 @@ static struct rt6_info *ip6_nh_lookup_table(struct net *net, }; struct fib6_table *table; struct rt6_info *rt; - int flags = RT6_LOOKUP_F_IFACE | RT6_LOOKUP_F_IGNORE_LINKSTATE; - table = fib6_get_table(net, cfg->fc_table); + table = fib6_get_table(net, tbid); if (!table) return NULL; if (!ipv6_addr_any(&cfg->fc_prefsrc)) flags |= RT6_LOOKUP_F_HAS_SADDR; + flags |= RT6_LOOKUP_F_IGNORE_LINKSTATE; rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, flags); /* if table lookup failed, fall back to full lookup */ @@ -2468,6 +2474,82 @@ static struct rt6_info *ip6_nh_lookup_table(struct net *net, return rt; } +static int ip6_route_check_nh_onlink(struct net *net, + struct fib6_config *cfg, + struct net_device *dev, + struct netlink_ext_ack *extack) +{ + u32 tbid = l3mdev_fib_table(dev) ? : RT_TABLE_LOCAL; + const struct in6_addr *gw_addr = &cfg->fc_gateway; + u32 flags = RTF_LOCAL | RTF_ANYCAST | RTF_REJECT; + struct rt6_info *grt; + int err; + + err = 0; + grt = ip6_nh_lookup_table(net, cfg, gw_addr, tbid, 0); + if (grt) { + if (grt->rt6i_flags & flags || dev != grt->dst.dev) { + NL_SET_ERR_MSG(extack, "Nexthop has invalid gateway"); + err = -EINVAL; + } + + ip6_rt_put(grt); + } + + return err; +} + +static int ip6_route_check_nh(struct net *net, + struct fib6_config *cfg, + struct net_device **_dev, + struct inet6_dev **idev) +{ + const struct in6_addr *gw_addr = &cfg->fc_gateway; + struct net_device *dev = _dev ? *_dev : NULL; + struct rt6_info *grt = NULL; + int err = -EHOSTUNREACH; + + if (cfg->fc_table) { + int flags = RT6_LOOKUP_F_IFACE; + + grt = ip6_nh_lookup_table(net, cfg, gw_addr, + cfg->fc_table, flags); + if (grt) { + if (grt->rt6i_flags & RTF_GATEWAY || + (dev && dev != grt->dst.dev)) { + ip6_rt_put(grt); + grt = NULL; + } + } + } + + if (!grt) + grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1); + + if (!grt) + goto out; + + if (dev) { + if (dev != grt->dst.dev) { + ip6_rt_put(grt); + goto out; + } + } else { + *_dev = dev = grt->dst.dev; + *idev = grt->rt6i_idev; + dev_hold(dev); + in6_dev_hold(grt->rt6i_idev); + } + + if (!(grt->rt6i_flags & RTF_GATEWAY)) + err = 0; + + ip6_rt_put(grt); + +out: + return err; +} + static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg, struct netlink_ext_ack *extack) { @@ -2519,6 +2601,21 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg, if (cfg->fc_metric == 0) cfg->fc_metric = IP6_RT_PRIO_USER; + if (cfg->fc_flags & RTNH_F_ONLINK) { + if (!dev) { + NL_SET_ERR_MSG(extack, + "Nexthop device required for onlink"); + err = -ENODEV; + goto out; + } + + if (!(dev->flags & IFF_UP)) { + NL_SET_ERR_MSG(extack, "Nexthop device is not up"); + err = -ENETDOWN; + goto out; + } + } + err = -ENOBUFS; if (cfg->fc_nlinfo.nlh && !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) { @@ -2593,6 +2690,7 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg, #endif rt->rt6i_metric = cfg->fc_metric; + rt->rt6i_nh_weight = 1; /* We cannot add true routes via loopback here, they would result in kernel looping; promote them to reject routes @@ -2662,8 +2760,6 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg, rt->rt6i_gateway = *gw_addr; if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) { - struct rt6_info *grt = NULL; - /* IPv6 strictly inhibits using not link-local addresses as nexthop address. Otherwise, router will not able to send redirects. @@ -2680,40 +2776,12 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg, goto out; } - if (cfg->fc_table) { - grt = ip6_nh_lookup_table(net, cfg, gw_addr); - - if (grt) { - if (grt->rt6i_flags & RTF_GATEWAY || - (dev && dev != grt->dst.dev)) { - ip6_rt_put(grt); - grt = NULL; - } - } - } - - if (!grt) - grt = rt6_lookup(net, gw_addr, NULL, - cfg->fc_ifindex, 1); - - err = -EHOSTUNREACH; - if (!grt) - goto out; - if (dev) { - if (dev != grt->dst.dev) { - ip6_rt_put(grt); - goto out; - } + if (cfg->fc_flags & RTNH_F_ONLINK) { + err = ip6_route_check_nh_onlink(net, cfg, dev, + extack); } else { - dev = grt->dst.dev; - idev = grt->rt6i_idev; - dev_hold(dev); - in6_dev_hold(grt->rt6i_idev); + err = ip6_route_check_nh(net, cfg, &dev, &idev); } - if (!(grt->rt6i_flags & RTF_GATEWAY)) - err = 0; - ip6_rt_put(grt); - if (err) goto out; } @@ -2732,6 +2800,12 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg, if (!dev) goto out; + if (!(dev->flags & IFF_UP)) { + NL_SET_ERR_MSG(extack, "Nexthop device is not up"); + err = -ENETDOWN; + goto out; + } + if (!ipv6_addr_any(&cfg->fc_prefsrc)) { if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) { NL_SET_ERR_MSG(extack, "Invalid source address"); @@ -2746,6 +2820,10 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg, rt->rt6i_flags = cfg->fc_flags; install_route: + if (!(rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST)) && + !netif_carrier_ok(dev)) + rt->rt6i_nh_flags |= RTNH_F_LINKDOWN; + rt->rt6i_nh_flags |= (cfg->fc_flags & RTNH_F_ONLINK); rt->dst.dev = dev; rt->rt6i_idev = idev; rt->rt6i_table = table; @@ -3056,11 +3134,11 @@ out: static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from) { - BUG_ON(from->dst.from); + BUG_ON(from->from); rt->rt6i_flags &= ~RTF_EXPIRES; dst_hold(&from->dst); - rt->dst.from = &from->dst; + rt->from = from; dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true); } @@ -3459,37 +3537,249 @@ void rt6_clean_tohost(struct net *net, struct in6_addr *gateway) fib6_clean_all(net, fib6_clean_tohost, gateway); } -struct arg_dev_net { - struct net_device *dev; - struct net *net; +struct arg_netdev_event { + const struct net_device *dev; + union { + unsigned int nh_flags; + unsigned long event; + }; }; +static struct rt6_info *rt6_multipath_first_sibling(const struct rt6_info *rt) +{ + struct rt6_info *iter; + struct fib6_node *fn; + + fn = rcu_dereference_protected(rt->rt6i_node, + lockdep_is_held(&rt->rt6i_table->tb6_lock)); + iter = rcu_dereference_protected(fn->leaf, + lockdep_is_held(&rt->rt6i_table->tb6_lock)); + while (iter) { + if (iter->rt6i_metric == rt->rt6i_metric && + rt6_qualify_for_ecmp(iter)) + return iter; + iter = rcu_dereference_protected(iter->rt6_next, + lockdep_is_held(&rt->rt6i_table->tb6_lock)); + } + + return NULL; +} + +static bool rt6_is_dead(const struct rt6_info *rt) +{ + if (rt->rt6i_nh_flags & RTNH_F_DEAD || + (rt->rt6i_nh_flags & RTNH_F_LINKDOWN && + rt->rt6i_idev->cnf.ignore_routes_with_linkdown)) + return true; + + return false; +} + +static int rt6_multipath_total_weight(const struct rt6_info *rt) +{ + struct rt6_info *iter; + int total = 0; + + if (!rt6_is_dead(rt)) + total += rt->rt6i_nh_weight; + + list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings) { + if (!rt6_is_dead(iter)) + total += iter->rt6i_nh_weight; + } + + return total; +} + +static void rt6_upper_bound_set(struct rt6_info *rt, int *weight, int total) +{ + int upper_bound = -1; + + if (!rt6_is_dead(rt)) { + *weight += rt->rt6i_nh_weight; + upper_bound = DIV_ROUND_CLOSEST_ULL((u64) (*weight) << 31, + total) - 1; + } + atomic_set(&rt->rt6i_nh_upper_bound, upper_bound); +} + +static void rt6_multipath_upper_bound_set(struct rt6_info *rt, int total) +{ + struct rt6_info *iter; + int weight = 0; + + rt6_upper_bound_set(rt, &weight, total); + + list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings) + rt6_upper_bound_set(iter, &weight, total); +} + +void rt6_multipath_rebalance(struct rt6_info *rt) +{ + struct rt6_info *first; + int total; + + /* In case the entire multipath route was marked for flushing, + * then there is no need to rebalance upon the removal of every + * sibling route. + */ + if (!rt->rt6i_nsiblings || rt->should_flush) + return; + + /* During lookup routes are evaluated in order, so we need to + * make sure upper bounds are assigned from the first sibling + * onwards. + */ + first = rt6_multipath_first_sibling(rt); + if (WARN_ON_ONCE(!first)) + return; + + total = rt6_multipath_total_weight(first); + rt6_multipath_upper_bound_set(first, total); +} + +static int fib6_ifup(struct rt6_info *rt, void *p_arg) +{ + const struct arg_netdev_event *arg = p_arg; + const struct net *net = dev_net(arg->dev); + + if (rt != net->ipv6.ip6_null_entry && rt->dst.dev == arg->dev) { + rt->rt6i_nh_flags &= ~arg->nh_flags; + fib6_update_sernum_upto_root(dev_net(rt->dst.dev), rt); + rt6_multipath_rebalance(rt); + } + + return 0; +} + +void rt6_sync_up(struct net_device *dev, unsigned int nh_flags) +{ + struct arg_netdev_event arg = { + .dev = dev, + { + .nh_flags = nh_flags, + }, + }; + + if (nh_flags & RTNH_F_DEAD && netif_carrier_ok(dev)) + arg.nh_flags |= RTNH_F_LINKDOWN; + + fib6_clean_all(dev_net(dev), fib6_ifup, &arg); +} + +static bool rt6_multipath_uses_dev(const struct rt6_info *rt, + const struct net_device *dev) +{ + struct rt6_info *iter; + + if (rt->dst.dev == dev) + return true; + list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings) + if (iter->dst.dev == dev) + return true; + + return false; +} + +static void rt6_multipath_flush(struct rt6_info *rt) +{ + struct rt6_info *iter; + + rt->should_flush = 1; + list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings) + iter->should_flush = 1; +} + +static unsigned int rt6_multipath_dead_count(const struct rt6_info *rt, + const struct net_device *down_dev) +{ + struct rt6_info *iter; + unsigned int dead = 0; + + if (rt->dst.dev == down_dev || rt->rt6i_nh_flags & RTNH_F_DEAD) + dead++; + list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings) + if (iter->dst.dev == down_dev || + iter->rt6i_nh_flags & RTNH_F_DEAD) + dead++; + + return dead; +} + +static void rt6_multipath_nh_flags_set(struct rt6_info *rt, + const struct net_device *dev, + unsigned int nh_flags) +{ + struct rt6_info *iter; + + if (rt->dst.dev == dev) + rt->rt6i_nh_flags |= nh_flags; + list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings) + if (iter->dst.dev == dev) + iter->rt6i_nh_flags |= nh_flags; +} + /* called with write lock held for table with rt */ -static int fib6_ifdown(struct rt6_info *rt, void *arg) +static int fib6_ifdown(struct rt6_info *rt, void *p_arg) { - const struct arg_dev_net *adn = arg; - const struct net_device *dev = adn->dev; + const struct arg_netdev_event *arg = p_arg; + const struct net_device *dev = arg->dev; + const struct net *net = dev_net(dev); - if ((rt->dst.dev == dev || !dev) && - rt != adn->net->ipv6.ip6_null_entry && - (rt->rt6i_nsiblings == 0 || - (dev && netdev_unregistering(dev)) || - !rt->rt6i_idev->cnf.ignore_routes_with_linkdown)) - return -1; + if (rt == net->ipv6.ip6_null_entry) + return 0; + + switch (arg->event) { + case NETDEV_UNREGISTER: + return rt->dst.dev == dev ? -1 : 0; + case NETDEV_DOWN: + if (rt->should_flush) + return -1; + if (!rt->rt6i_nsiblings) + return rt->dst.dev == dev ? -1 : 0; + if (rt6_multipath_uses_dev(rt, dev)) { + unsigned int count; + + count = rt6_multipath_dead_count(rt, dev); + if (rt->rt6i_nsiblings + 1 == count) { + rt6_multipath_flush(rt); + return -1; + } + rt6_multipath_nh_flags_set(rt, dev, RTNH_F_DEAD | + RTNH_F_LINKDOWN); + fib6_update_sernum(rt); + rt6_multipath_rebalance(rt); + } + return -2; + case NETDEV_CHANGE: + if (rt->dst.dev != dev || + rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST)) + break; + rt->rt6i_nh_flags |= RTNH_F_LINKDOWN; + rt6_multipath_rebalance(rt); + break; + } return 0; } -void rt6_ifdown(struct net *net, struct net_device *dev) +void rt6_sync_down_dev(struct net_device *dev, unsigned long event) { - struct arg_dev_net adn = { + struct arg_netdev_event arg = { .dev = dev, - .net = net, + { + .event = event, + }, }; - fib6_clean_all(net, fib6_ifdown, &adn); - if (dev) - rt6_uncached_list_flush_dev(net, dev); + fib6_clean_all(dev_net(dev), fib6_ifdown, &arg); +} + +void rt6_disable_ip(struct net_device *dev, unsigned long event) +{ + rt6_sync_down_dev(dev, event); + rt6_uncached_list_flush_dev(dev_net(dev), dev); + neigh_ifdown(&nd_tbl, dev); } struct rt6_mtu_change_arg { @@ -3603,6 +3893,8 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, if (rtm->rtm_flags & RTM_F_CLONED) cfg->fc_flags |= RTF_CACHE; + cfg->fc_flags |= (rtm->rtm_flags & RTNH_F_ONLINK); + cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid; cfg->fc_nlinfo.nlh = nlh; cfg->fc_nlinfo.nl_net = sock_net(skb->sk); @@ -3812,6 +4104,8 @@ static int ip6_route_multipath_add(struct fib6_config *cfg, goto cleanup; } + rt->rt6i_nh_weight = rtnh->rtnh_hops + 1; + err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg); if (err) { dst_release_immediate(&rt->dst); @@ -3992,7 +4286,10 @@ static size_t rt6_nlmsg_size(struct rt6_info *rt) static int rt6_nexthop_info(struct sk_buff *skb, struct rt6_info *rt, unsigned int *flags, bool skip_oif) { - if (!netif_running(rt->dst.dev) || !netif_carrier_ok(rt->dst.dev)) { + if (rt->rt6i_nh_flags & RTNH_F_DEAD) + *flags |= RTNH_F_DEAD; + + if (rt->rt6i_nh_flags & RTNH_F_LINKDOWN) { *flags |= RTNH_F_LINKDOWN; if (rt->rt6i_idev->cnf.ignore_routes_with_linkdown) *flags |= RTNH_F_DEAD; @@ -4003,6 +4300,7 @@ static int rt6_nexthop_info(struct sk_buff *skb, struct rt6_info *rt, goto nla_put_failure; } + *flags |= (rt->rt6i_nh_flags & RTNH_F_ONLINK); if (rt->rt6i_nh_flags & RTNH_F_OFFLOAD) *flags |= RTNH_F_OFFLOAD; @@ -4031,7 +4329,7 @@ static int rt6_add_nexthop(struct sk_buff *skb, struct rt6_info *rt) if (!rtnh) goto nla_put_failure; - rtnh->rtnh_hops = 0; + rtnh->rtnh_hops = rt->rt6i_nh_weight - 1; rtnh->rtnh_ifindex = rt->dst.dev ? rt->dst.dev->ifindex : 0; if (rt6_nexthop_info(skb, rt, &flags, true) < 0) @@ -4321,9 +4619,8 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, goto errout; } - if (fibmatch && rt->dst.from) { - struct rt6_info *ort = container_of(rt->dst.from, - struct rt6_info, dst); + if (fibmatch && rt->from) { + struct rt6_info *ort = rt->from; dst_hold(&ort->dst); ip6_rt_put(rt); @@ -4427,7 +4724,6 @@ static int ip6_route_dev_notify(struct notifier_block *this, #ifdef CONFIG_PROC_FS static const struct file_operations ipv6_route_proc_fops = { - .owner = THIS_MODULE, .open = ipv6_route_open, .read = seq_read, .llseek = seq_lseek, @@ -4455,7 +4751,6 @@ static int rt6_stats_seq_open(struct inode *inode, struct file *file) } static const struct file_operations rt6_stats_seq_fops = { - .owner = THIS_MODULE, .open = rt6_stats_seq_open, .read = seq_read, .llseek = seq_lseek, @@ -4600,8 +4895,6 @@ static int __net_init ip6_route_net_init(struct net *net) GFP_KERNEL); if (!net->ipv6.ip6_null_entry) goto out_ip6_dst_entries; - net->ipv6.ip6_null_entry->dst.path = - (struct dst_entry *)net->ipv6.ip6_null_entry; net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops; dst_init_metrics(&net->ipv6.ip6_null_entry->dst, ip6_template_metrics, true); @@ -4613,8 +4906,6 @@ static int __net_init ip6_route_net_init(struct net *net) GFP_KERNEL); if (!net->ipv6.ip6_prohibit_entry) goto out_ip6_null_entry; - net->ipv6.ip6_prohibit_entry->dst.path = - (struct dst_entry *)net->ipv6.ip6_prohibit_entry; net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops; dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst, ip6_template_metrics, true); @@ -4624,8 +4915,6 @@ static int __net_init ip6_route_net_init(struct net *net) GFP_KERNEL); if (!net->ipv6.ip6_blk_hole_entry) goto out_ip6_prohibit_entry; - net->ipv6.ip6_blk_hole_entry->dst.path = - (struct dst_entry *)net->ipv6.ip6_blk_hole_entry; net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops; dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst, ip6_template_metrics, true); @@ -4782,11 +5071,20 @@ int __init ip6_route_init(void) if (ret) goto fib6_rules_init; - ret = -ENOBUFS; - if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, 0) || - __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, 0) || - __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, - RTNL_FLAG_DOIT_UNLOCKED)) + ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_NEWROUTE, + inet6_rtm_newroute, NULL, 0); + if (ret < 0) + goto out_register_late_subsys; + + ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_DELROUTE, + inet6_rtm_delroute, NULL, 0); + if (ret < 0) + goto out_register_late_subsys; + + ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETROUTE, + inet6_rtm_getroute, NULL, + RTNL_FLAG_DOIT_UNLOCKED); + if (ret < 0) goto out_register_late_subsys; ret = register_netdevice_notifier(&ip6_route_dev_notifier); @@ -4804,6 +5102,7 @@ out: return ret; out_register_late_subsys: + rtnl_unregister_all(PF_INET6); unregister_pernet_subsys(&ip6_route_net_late_ops); fib6_rules_init: fib6_rules_cleanup(); diff --git a/net/ipv6/seg6.c b/net/ipv6/seg6.c index c81407770956..7f5621d09571 100644 --- a/net/ipv6/seg6.c +++ b/net/ipv6/seg6.c @@ -306,9 +306,7 @@ static int seg6_genl_dumphmac(struct sk_buff *skb, struct netlink_callback *cb) struct seg6_hmac_info *hinfo; int ret; - ret = rhashtable_walk_start(iter); - if (ret && ret != -EAGAIN) - goto done; + rhashtable_walk_start(iter); for (;;) { hinfo = rhashtable_walk_next(iter); diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c index 825b8e01f947..ba3767ef5e93 100644 --- a/net/ipv6/seg6_local.c +++ b/net/ipv6/seg6_local.c @@ -501,7 +501,7 @@ static struct seg6_action_desc *__get_action_desc(int action) struct seg6_action_desc *desc; int i, count; - count = sizeof(seg6_action_table) / sizeof(struct seg6_action_desc); + count = ARRAY_SIZE(seg6_action_table); for (i = 0; i < count; i++) { desc = &seg6_action_table[i]; if (desc->action == action) diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 7178476b3d2f..a1ab29e2ab3b 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -176,8 +176,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, /* If interface is set while binding, indices * must coincide. */ - if (sk->sk_bound_dev_if && - sk->sk_bound_dev_if != usin->sin6_scope_id) + if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id)) return -EINVAL; sk->sk_bound_dev_if = usin->sin6_scope_id; @@ -1795,7 +1794,7 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) timer_expires = jiffies; } - state = sk_state_load(sp); + state = inet_sk_state_load(sp); if (state == TCP_LISTEN) rx_queue = sp->sk_ack_backlog; else @@ -1884,7 +1883,6 @@ out: } static const struct file_operations tcp6_afinfo_seq_fops = { - .owner = THIS_MODULE, .open = tcp_seq_open, .read = seq_read, .llseek = seq_lseek, diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 3f30fa313bf2..52e3ea0e6f50 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -89,28 +89,12 @@ static u32 udp6_ehashfn(const struct net *net, udp_ipv6_hash_secret + net_hash_mix(net)); } -static u32 udp6_portaddr_hash(const struct net *net, - const struct in6_addr *addr6, - unsigned int port) -{ - unsigned int hash, mix = net_hash_mix(net); - - if (ipv6_addr_any(addr6)) - hash = jhash_1word(0, mix); - else if (ipv6_addr_v4mapped(addr6)) - hash = jhash_1word((__force u32)addr6->s6_addr32[3], mix); - else - hash = jhash2((__force u32 *)addr6->s6_addr32, 4, mix); - - return hash ^ port; -} - int udp_v6_get_port(struct sock *sk, unsigned short snum) { unsigned int hash2_nulladdr = - udp6_portaddr_hash(sock_net(sk), &in6addr_any, snum); + ipv6_portaddr_hash(sock_net(sk), &in6addr_any, snum); unsigned int hash2_partial = - udp6_portaddr_hash(sock_net(sk), &sk->sk_v6_rcv_saddr, 0); + ipv6_portaddr_hash(sock_net(sk), &sk->sk_v6_rcv_saddr, 0); /* precompute partial secondary hash */ udp_sk(sk)->udp_portaddr_hash = hash2_partial; @@ -119,7 +103,7 @@ int udp_v6_get_port(struct sock *sk, unsigned short snum) static void udp_v6_rehash(struct sock *sk) { - u16 new_hash = udp6_portaddr_hash(sock_net(sk), + u16 new_hash = ipv6_portaddr_hash(sock_net(sk), &sk->sk_v6_rcv_saddr, inet_sk(sk)->inet_num); @@ -184,7 +168,7 @@ static struct sock *udp6_lib_lookup2(struct net *net, struct udp_hslot *hslot2, struct sk_buff *skb) { struct sock *sk, *result; - int score, badness, matches = 0, reuseport = 0; + int score, badness; u32 hash = 0; result = NULL; @@ -193,8 +177,7 @@ static struct sock *udp6_lib_lookup2(struct net *net, score = compute_score(sk, net, saddr, sport, daddr, hnum, dif, sdif, exact_dif); if (score > badness) { - reuseport = sk->sk_reuseport; - if (reuseport) { + if (sk->sk_reuseport) { hash = udp6_ehashfn(net, daddr, hnum, saddr, sport); @@ -202,15 +185,9 @@ static struct sock *udp6_lib_lookup2(struct net *net, sizeof(struct udphdr)); if (result) return result; - matches = 1; } result = sk; badness = score; - } else if (score == badness && reuseport) { - matches++; - if (reciprocal_scale(hash, matches) == 0) - result = sk; - hash = next_pseudo_random32(hash); } } return result; @@ -228,11 +205,11 @@ struct sock *__udp6_lib_lookup(struct net *net, unsigned int hash2, slot2, slot = udp_hashfn(net, hnum, udptable->mask); struct udp_hslot *hslot2, *hslot = &udptable->hash[slot]; bool exact_dif = udp6_lib_exact_dif_match(net, skb); - int score, badness, matches = 0, reuseport = 0; + int score, badness; u32 hash = 0; if (hslot->count > 10) { - hash2 = udp6_portaddr_hash(net, daddr, hnum); + hash2 = ipv6_portaddr_hash(net, daddr, hnum); slot2 = hash2 & udptable->mask; hslot2 = &udptable->hash2[slot2]; if (hslot->count < hslot2->count) @@ -243,7 +220,7 @@ struct sock *__udp6_lib_lookup(struct net *net, hslot2, skb); if (!result) { unsigned int old_slot2 = slot2; - hash2 = udp6_portaddr_hash(net, &in6addr_any, hnum); + hash2 = ipv6_portaddr_hash(net, &in6addr_any, hnum); slot2 = hash2 & udptable->mask; /* avoid searching the same slot again. */ if (unlikely(slot2 == old_slot2)) @@ -267,23 +244,16 @@ begin: score = compute_score(sk, net, saddr, sport, daddr, hnum, dif, sdif, exact_dif); if (score > badness) { - reuseport = sk->sk_reuseport; - if (reuseport) { + if (sk->sk_reuseport) { hash = udp6_ehashfn(net, daddr, hnum, saddr, sport); result = reuseport_select_sock(sk, hash, skb, sizeof(struct udphdr)); if (result) return result; - matches = 1; } result = sk; badness = score; - } else if (score == badness && reuseport) { - matches++; - if (reciprocal_scale(hash, matches) == 0) - result = sk; - hash = next_pseudo_random32(hash); } } return result; @@ -719,9 +689,9 @@ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb, struct sk_buff *nskb; if (use_hash2) { - hash2_any = udp6_portaddr_hash(net, &in6addr_any, hnum) & + hash2_any = ipv6_portaddr_hash(net, &in6addr_any, hnum) & udptable->mask; - hash2 = udp6_portaddr_hash(net, daddr, hnum) & udptable->mask; + hash2 = ipv6_portaddr_hash(net, daddr, hnum) & udptable->mask; start_lookup: hslot = &udptable->hash2[hash2]; offset = offsetof(typeof(*sk), __sk_common.skc_portaddr_node); @@ -909,7 +879,7 @@ static struct sock *__udp6_lib_demux_lookup(struct net *net, int dif, int sdif) { unsigned short hnum = ntohs(loc_port); - unsigned int hash2 = udp6_portaddr_hash(net, loc_addr, hnum); + unsigned int hash2 = ipv6_portaddr_hash(net, loc_addr, hnum); unsigned int slot2 = hash2 & udp_table.mask; struct udp_hslot *hslot2 = &udp_table.hash2[slot2]; const __portpair ports = INET_COMBINED_PORTS(rmt_port, hnum); @@ -1509,7 +1479,6 @@ int udp6_seq_show(struct seq_file *seq, void *v) } static const struct file_operations udp6_afinfo_seq_fops = { - .owner = THIS_MODULE, .open = udp_seq_open, .read = seq_read, .llseek = seq_lseek, diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c index 2784cc363f2b..14ae32bb1f3d 100644 --- a/net/ipv6/udplite.c +++ b/net/ipv6/udplite.c @@ -94,7 +94,6 @@ void udplitev6_exit(void) #ifdef CONFIG_PROC_FS static const struct file_operations udplite6_afinfo_seq_fops = { - .owner = THIS_MODULE, .open = udp_seq_open, .read = seq_read, .llseek = seq_lseek, diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c index dc93002ff9d1..bb935a3b7fea 100644 --- a/net/ipv6/xfrm6_mode_tunnel.c +++ b/net/ipv6/xfrm6_mode_tunnel.c @@ -59,7 +59,7 @@ static int xfrm6_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) if (x->props.flags & XFRM_STATE_NOECN) dsfield &= ~INET_ECN_MASK; ipv6_change_dsfield(top_iph, 0, dsfield); - top_iph->hop_limit = ip6_dst_hoplimit(dst->child); + top_iph->hop_limit = ip6_dst_hoplimit(xfrm_dst_child(dst)); top_iph->saddr = *(struct in6_addr *)&x->props.saddr; top_iph->daddr = *(struct in6_addr *)&x->id.daddr; return 0; @@ -106,17 +106,14 @@ static struct sk_buff *xfrm6_mode_tunnel_gso_segment(struct xfrm_state *x, { __skb_push(skb, skb->mac_len); return skb_mac_gso_segment(skb, features); - } static void xfrm6_mode_tunnel_xmit(struct xfrm_state *x, struct sk_buff *skb) { struct xfrm_offload *xo = xfrm_offload(skb); - if (xo->flags & XFRM_GSO_SEGMENT) { - skb->network_header = skb->network_header - x->props.header_len; + if (xo->flags & XFRM_GSO_SEGMENT) skb->transport_header = skb->network_header + sizeof(struct ipv6hdr); - } skb_reset_mac_len(skb); pskb_pull(skb, skb->mac_len + x->props.header_len); diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 885ade234a49..09fb44ee3b45 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -265,7 +265,7 @@ static void xfrm6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, in6_dev_put(xdst->u.rt6.rt6i_idev); xdst->u.rt6.rt6i_idev = loopback_idev; in6_dev_hold(loopback_idev); - xdst = (struct xfrm_dst *)xdst->u.dst.child; + xdst = (struct xfrm_dst *)xfrm_dst_child(&xdst->u.dst); } while (xdst->u.dst.xfrm); __in6_dev_put(loopback_idev); diff --git a/net/ipx/ipx_proc.c b/net/ipx/ipx_proc.c index 38a3d51d9ead..b9232e4e2ed4 100644 --- a/net/ipx/ipx_proc.c +++ b/net/ipx/ipx_proc.c @@ -260,7 +260,6 @@ static int ipx_seq_socket_open(struct inode *inode, struct file *file) } static const struct file_operations ipx_seq_interface_fops = { - .owner = THIS_MODULE, .open = ipx_seq_interface_open, .read = seq_read, .llseek = seq_lseek, @@ -268,7 +267,6 @@ static const struct file_operations ipx_seq_interface_fops = { }; static const struct file_operations ipx_seq_route_fops = { - .owner = THIS_MODULE, .open = ipx_seq_route_open, .read = seq_read, .llseek = seq_lseek, @@ -276,7 +274,6 @@ static const struct file_operations ipx_seq_route_fops = { }; static const struct file_operations ipx_seq_socket_fops = { - .owner = THIS_MODULE, .open = ipx_seq_socket_open, .read = seq_read, .llseek = seq_lseek, diff --git a/net/kcm/kcmproc.c b/net/kcm/kcmproc.c index bd5723315069..9d5649e4e8b7 100644 --- a/net/kcm/kcmproc.c +++ b/net/kcm/kcmproc.c @@ -247,7 +247,6 @@ static int kcm_seq_show(struct seq_file *seq, void *v) } static const struct file_operations kcm_seq_fops = { - .owner = THIS_MODULE, .open = kcm_seq_open, .read = seq_read, .llseek = seq_lseek, @@ -397,7 +396,6 @@ static int kcm_stats_seq_open(struct inode *inode, struct file *file) } static const struct file_operations kcm_stats_seq_fops = { - .owner = THIS_MODULE, .open = kcm_stats_seq_open, .read = seq_read, .llseek = seq_lseek, diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 115918ad8eca..194a7483bb93 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -662,10 +662,9 @@ discard: * |x|S|x|x|x|x|x|x| Sequence Number | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * - * Cookie value, sublayer format and offset (pad) are negotiated with - * the peer when the session is set up. Unlike L2TPv2, we do not need - * to parse the packet header to determine if optional fields are - * present. + * Cookie value and sublayer format are negotiated with the peer when + * the session is set up. Unlike L2TPv2, we do not need to parse the + * packet header to determine if optional fields are present. * * Caller must already have parsed the frame and determined that it is * a data (not control) frame before coming here. Fields up to the @@ -731,11 +730,9 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, "%s: recv data ns=%u, session nr=%u\n", session->name, ns, session->nr); } + ptr += 4; } - /* Advance past L2-specific header, if present */ - ptr += session->l2specific_len; - if (L2TP_SKB_CB(skb)->has_seq) { /* Received a packet with sequence numbers. If we're the LNS, * check if we sre sending sequence numbers and if not, @@ -780,10 +777,8 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, } } - /* Session data offset is handled differently for L2TPv2 and - * L2TPv3. For L2TPv2, there is an optional 16-bit value in - * the header. For L2TPv3, the offset is negotiated using AVPs - * in the session setup control protocol. + /* Session data offset is defined only for L2TPv2 and is + * indicated by an optional 16-bit value in the header. */ if (tunnel->version == L2TP_HDR_VER_2) { /* If offset bit set, skip it. */ @@ -791,8 +786,7 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, offset = ntohs(*(__be16 *)ptr); ptr += 2 + offset; } - } else - ptr += session->offset; + } offset = ptr - optr; if (!pskb_may_pull(skb, offset)) @@ -1052,24 +1046,21 @@ static int l2tp_build_l2tpv3_header(struct l2tp_session *session, void *buf) memcpy(bufp, &session->cookie[0], session->cookie_len); bufp += session->cookie_len; } - if (session->l2specific_len) { - if (session->l2specific_type == L2TP_L2SPECTYPE_DEFAULT) { - u32 l2h = 0; - if (session->send_seq) { - l2h = 0x40000000 | session->ns; - session->ns++; - session->ns &= 0xffffff; - l2tp_dbg(session, L2TP_MSG_SEQ, - "%s: updated ns to %u\n", - session->name, session->ns); - } + if (session->l2specific_type == L2TP_L2SPECTYPE_DEFAULT) { + u32 l2h = 0; - *((__be32 *) bufp) = htonl(l2h); + if (session->send_seq) { + l2h = 0x40000000 | session->ns; + session->ns++; + session->ns &= 0xffffff; + l2tp_dbg(session, L2TP_MSG_SEQ, + "%s: updated ns to %u\n", + session->name, session->ns); } - bufp += session->l2specific_len; + + *((__be32 *)bufp) = htonl(l2h); + bufp += 4; } - if (session->offset) - bufp += session->offset; return bufp - optr; } @@ -1725,7 +1716,7 @@ int l2tp_session_delete(struct l2tp_session *session) EXPORT_SYMBOL_GPL(l2tp_session_delete); /* We come here whenever a session's send_seq, cookie_len or - * l2specific_len parameters are set. + * l2specific_type parameters are set. */ void l2tp_session_set_header_len(struct l2tp_session *session, int version) { @@ -1734,7 +1725,8 @@ void l2tp_session_set_header_len(struct l2tp_session *session, int version) if (session->send_seq) session->hdr_len += 4; } else { - session->hdr_len = 4 + session->cookie_len + session->l2specific_len + session->offset; + session->hdr_len = 4 + session->cookie_len; + session->hdr_len += l2tp_get_l2specific_len(session); if (session->tunnel->encap == L2TP_ENCAPTYPE_UDP) session->hdr_len += 4; } @@ -1784,9 +1776,7 @@ struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunn session->recv_seq = cfg->recv_seq; session->lns_mode = cfg->lns_mode; session->reorder_timeout = cfg->reorder_timeout; - session->offset = cfg->offset; session->l2specific_type = cfg->l2specific_type; - session->l2specific_len = cfg->l2specific_len; session->cookie_len = cfg->cookie_len; memcpy(&session->cookie[0], &cfg->cookie[0], cfg->cookie_len); session->peer_cookie_len = cfg->peer_cookie_len; diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h index 9534e16965cc..9bbee90e9963 100644 --- a/net/l2tp/l2tp_core.h +++ b/net/l2tp/l2tp_core.h @@ -59,8 +59,6 @@ struct l2tp_session_cfg { int debug; /* bitmask of debug message * categories */ u16 vlan_id; /* VLAN pseudowire only */ - u16 offset; /* offset to payload */ - u16 l2specific_len; /* Layer 2 specific length */ u16 l2specific_type; /* Layer 2 specific type */ u8 cookie[8]; /* optional cookie */ int cookie_len; /* 0, 4 or 8 bytes */ @@ -86,9 +84,6 @@ struct l2tp_session { int cookie_len; u8 peer_cookie[8]; int peer_cookie_len; - u16 offset; /* offset from end of L2TP header - to beginning of data */ - u16 l2specific_len; u16 l2specific_type; u16 hdr_len; u32 nr; /* session NR state (receive) */ @@ -305,6 +300,17 @@ static inline void l2tp_session_dec_refcount(struct l2tp_session *session) l2tp_session_free(session); } +static inline int l2tp_get_l2specific_len(struct l2tp_session *session) +{ + switch (session->l2specific_type) { + case L2TP_L2SPECTYPE_DEFAULT: + return 4; + case L2TP_L2SPECTYPE_NONE: + default: + return 0; + } +} + #define l2tp_printk(ptr, type, func, fmt, ...) \ do { \ if (((ptr)->debug) & (type)) \ diff --git a/net/l2tp/l2tp_debugfs.c b/net/l2tp/l2tp_debugfs.c index eb69411bcb47..72e713da4733 100644 --- a/net/l2tp/l2tp_debugfs.c +++ b/net/l2tp/l2tp_debugfs.c @@ -180,8 +180,8 @@ static void l2tp_dfs_seq_session_show(struct seq_file *m, void *v) session->lns_mode ? "LNS" : "LAC", session->debug, jiffies_to_msecs(session->reorder_timeout)); - seq_printf(m, " offset %hu l2specific %hu/%hu\n", - session->offset, session->l2specific_type, session->l2specific_len); + seq_printf(m, " offset 0 l2specific %hu/%hu\n", + session->l2specific_type, l2tp_get_l2specific_len(session)); if (session->cookie_len) { seq_printf(m, " cookie %02x%02x%02x%02x", session->cookie[0], session->cookie[1], diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c index a1f24fb2be98..e7ea9c4b89ff 100644 --- a/net/l2tp/l2tp_netlink.c +++ b/net/l2tp/l2tp_netlink.c @@ -547,19 +547,19 @@ static int l2tp_nl_cmd_session_create(struct sk_buff *skb, struct genl_info *inf } if (tunnel->version > 2) { - if (info->attrs[L2TP_ATTR_OFFSET]) - cfg.offset = nla_get_u16(info->attrs[L2TP_ATTR_OFFSET]); - if (info->attrs[L2TP_ATTR_DATA_SEQ]) cfg.data_seq = nla_get_u8(info->attrs[L2TP_ATTR_DATA_SEQ]); - cfg.l2specific_type = L2TP_L2SPECTYPE_DEFAULT; - if (info->attrs[L2TP_ATTR_L2SPEC_TYPE]) + if (info->attrs[L2TP_ATTR_L2SPEC_TYPE]) { cfg.l2specific_type = nla_get_u8(info->attrs[L2TP_ATTR_L2SPEC_TYPE]); - - cfg.l2specific_len = 4; - if (info->attrs[L2TP_ATTR_L2SPEC_LEN]) - cfg.l2specific_len = nla_get_u8(info->attrs[L2TP_ATTR_L2SPEC_LEN]); + if (cfg.l2specific_type != L2TP_L2SPECTYPE_DEFAULT && + cfg.l2specific_type != L2TP_L2SPECTYPE_NONE) { + ret = -EINVAL; + goto out_tunnel; + } + } else { + cfg.l2specific_type = L2TP_L2SPECTYPE_DEFAULT; + } if (info->attrs[L2TP_ATTR_COOKIE]) { u16 len = nla_len(info->attrs[L2TP_ATTR_COOKIE]); @@ -620,27 +620,6 @@ static int l2tp_nl_cmd_session_create(struct sk_buff *skb, struct genl_info *inf goto out_tunnel; } - /* Check that pseudowire-specific params are present */ - switch (cfg.pw_type) { - case L2TP_PWTYPE_NONE: - break; - case L2TP_PWTYPE_ETH_VLAN: - if (!info->attrs[L2TP_ATTR_VLAN_ID]) { - ret = -EINVAL; - goto out_tunnel; - } - break; - case L2TP_PWTYPE_ETH: - break; - case L2TP_PWTYPE_PPP: - case L2TP_PWTYPE_PPP_AC: - break; - case L2TP_PWTYPE_IP: - default: - ret = -EPROTONOSUPPORT; - break; - } - ret = l2tp_nl_cmd_ops[cfg.pw_type]->session_create(net, tunnel, session_id, peer_session_id, diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index b412fc3351dc..59f246d7b290 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -1734,7 +1734,6 @@ static int pppol2tp_proc_open(struct inode *inode, struct file *file) } static const struct file_operations pppol2tp_proc_fops = { - .owner = THIS_MODULE, .open = pppol2tp_proc_open, .read = seq_read, .llseek = seq_lseek, diff --git a/net/llc/llc_proc.c b/net/llc/llc_proc.c index 29c509c54bb2..66821e8a2b7a 100644 --- a/net/llc/llc_proc.c +++ b/net/llc/llc_proc.c @@ -225,7 +225,6 @@ static int llc_seq_core_open(struct inode *inode, struct file *file) } static const struct file_operations llc_seq_socket_fops = { - .owner = THIS_MODULE, .open = llc_seq_socket_open, .read = seq_read, .llseek = seq_lseek, @@ -233,7 +232,6 @@ static const struct file_operations llc_seq_socket_fops = { }; static const struct file_operations llc_seq_core_fops = { - .owner = THIS_MODULE, .open = llc_seq_core_open, .read = seq_read, .llseek = seq_lseek, diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c index d444752dbf40..a8b1616cec41 100644 --- a/net/mac80211/agg-rx.c +++ b/net/mac80211/agg-rx.c @@ -153,27 +153,16 @@ EXPORT_SYMBOL(ieee80211_stop_rx_ba_session); */ static void sta_rx_agg_session_timer_expired(struct timer_list *t) { - struct tid_ampdu_rx *tid_rx_timer = - from_timer(tid_rx_timer, t, session_timer); - struct sta_info *sta = tid_rx_timer->sta; - u8 tid = tid_rx_timer->tid; - struct tid_ampdu_rx *tid_rx; + struct tid_ampdu_rx *tid_rx = from_timer(tid_rx, t, session_timer); + struct sta_info *sta = tid_rx->sta; + u8 tid = tid_rx->tid; unsigned long timeout; - rcu_read_lock(); - tid_rx = rcu_dereference(sta->ampdu_mlme.tid_rx[tid]); - if (!tid_rx) { - rcu_read_unlock(); - return; - } - timeout = tid_rx->last_rx + TU_TO_JIFFIES(tid_rx->timeout); if (time_is_after_jiffies(timeout)) { mod_timer(&tid_rx->session_timer, timeout); - rcu_read_unlock(); return; } - rcu_read_unlock(); ht_dbg(sta->sdata, "RX session timer expired on %pM tid %d\n", sta->sta.addr, tid); @@ -415,10 +404,11 @@ end: timeout); } -void __ieee80211_start_rx_ba_session(struct sta_info *sta, - u8 dialog_token, u16 timeout, - u16 start_seq_num, u16 ba_policy, u16 tid, - u16 buf_size, bool tx, bool auto_seq) +static void __ieee80211_start_rx_ba_session(struct sta_info *sta, + u8 dialog_token, u16 timeout, + u16 start_seq_num, u16 ba_policy, + u16 tid, u16 buf_size, bool tx, + bool auto_seq) { mutex_lock(&sta->ampdu_mlme.mtx); ___ieee80211_start_rx_ba_session(sta, dialog_token, timeout, diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c index 5f8ab5be369f..595c662a61e8 100644 --- a/net/mac80211/agg-tx.c +++ b/net/mac80211/agg-tx.c @@ -392,7 +392,8 @@ int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid, * telling the driver. New packets will not go through since * the aggregation session is no longer OPERATIONAL. */ - synchronize_net(); + if (!local->in_reconfig) + synchronize_net(); tid_tx->stop_initiator = reason == AGG_STOP_PEER_REQUEST ? WLAN_BACK_RECIPIENT : @@ -429,18 +430,12 @@ int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid, */ static void sta_addba_resp_timer_expired(struct timer_list *t) { - struct tid_ampdu_tx *tid_tx_timer = - from_timer(tid_tx_timer, t, addba_resp_timer); - struct sta_info *sta = tid_tx_timer->sta; - u8 tid = tid_tx_timer->tid; - struct tid_ampdu_tx *tid_tx; + struct tid_ampdu_tx *tid_tx = from_timer(tid_tx, t, addba_resp_timer); + struct sta_info *sta = tid_tx->sta; + u8 tid = tid_tx->tid; /* check if the TID waits for addBA response */ - rcu_read_lock(); - tid_tx = rcu_dereference(sta->ampdu_mlme.tid_tx[tid]); - if (!tid_tx || - test_bit(HT_AGG_STATE_RESPONSE_RECEIVED, &tid_tx->state)) { - rcu_read_unlock(); + if (test_bit(HT_AGG_STATE_RESPONSE_RECEIVED, &tid_tx->state)) { ht_dbg(sta->sdata, "timer expired on %pM tid %d not expecting addBA response\n", sta->sta.addr, tid); @@ -451,7 +446,6 @@ static void sta_addba_resp_timer_expired(struct timer_list *t) sta->sta.addr, tid); ieee80211_stop_tx_ba_session(&sta->sta, tid); - rcu_read_unlock(); } void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid) @@ -529,29 +523,21 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid) */ static void sta_tx_agg_session_timer_expired(struct timer_list *t) { - struct tid_ampdu_tx *tid_tx_timer = - from_timer(tid_tx_timer, t, session_timer); - struct sta_info *sta = tid_tx_timer->sta; - u8 tid = tid_tx_timer->tid; - struct tid_ampdu_tx *tid_tx; + struct tid_ampdu_tx *tid_tx = from_timer(tid_tx, t, session_timer); + struct sta_info *sta = tid_tx->sta; + u8 tid = tid_tx->tid; unsigned long timeout; - rcu_read_lock(); - tid_tx = rcu_dereference(sta->ampdu_mlme.tid_tx[tid]); - if (!tid_tx || test_bit(HT_AGG_STATE_STOPPING, &tid_tx->state)) { - rcu_read_unlock(); + if (test_bit(HT_AGG_STATE_STOPPING, &tid_tx->state)) { return; } timeout = tid_tx->last_tx + TU_TO_JIFFIES(tid_tx->timeout); if (time_is_after_jiffies(timeout)) { mod_timer(&tid_tx->session_timer, timeout); - rcu_read_unlock(); return; } - rcu_read_unlock(); - ht_dbg(sta->sdata, "tx session timer expired on %pM tid %d\n", sta->sta.addr, tid); diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index fb15d3b97cb2..46028e12e216 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -573,10 +573,12 @@ static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev, case WLAN_CIPHER_SUITE_BIP_CMAC_256: BUILD_BUG_ON(offsetof(typeof(kseq), ccmp) != offsetof(typeof(kseq), aes_cmac)); + /* fall through */ case WLAN_CIPHER_SUITE_BIP_GMAC_128: case WLAN_CIPHER_SUITE_BIP_GMAC_256: BUILD_BUG_ON(offsetof(typeof(kseq), ccmp) != offsetof(typeof(kseq), aes_gmac)); + /* fall through */ case WLAN_CIPHER_SUITE_GCMP: case WLAN_CIPHER_SUITE_GCMP_256: BUILD_BUG_ON(offsetof(typeof(kseq), ccmp) != @@ -2205,6 +2207,7 @@ static int ieee80211_scan(struct wiphy *wiphy, * for now fall through to allow scanning only when * beaconing hasn't been configured yet */ + /* fall through */ case NL80211_IFTYPE_AP: /* * If the scan has been forced (and the driver supports @@ -2373,10 +2376,17 @@ static int ieee80211_set_tx_power(struct wiphy *wiphy, struct ieee80211_sub_if_data *sdata; enum nl80211_tx_power_setting txp_type = type; bool update_txp_type = false; + bool has_monitor = false; if (wdev) { sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); + if (sdata->vif.type == NL80211_IFTYPE_MONITOR) { + sdata = rtnl_dereference(local->monitor_sdata); + if (!sdata) + return -EOPNOTSUPP; + } + switch (type) { case NL80211_TX_POWER_AUTOMATIC: sdata->user_power_level = IEEE80211_UNSET_POWER_LEVEL; @@ -2415,15 +2425,34 @@ static int ieee80211_set_tx_power(struct wiphy *wiphy, mutex_lock(&local->iflist_mtx); list_for_each_entry(sdata, &local->interfaces, list) { + if (sdata->vif.type == NL80211_IFTYPE_MONITOR) { + has_monitor = true; + continue; + } sdata->user_power_level = local->user_power_level; if (txp_type != sdata->vif.bss_conf.txpower_type) update_txp_type = true; sdata->vif.bss_conf.txpower_type = txp_type; } - list_for_each_entry(sdata, &local->interfaces, list) + list_for_each_entry(sdata, &local->interfaces, list) { + if (sdata->vif.type == NL80211_IFTYPE_MONITOR) + continue; ieee80211_recalc_txpower(sdata, update_txp_type); + } mutex_unlock(&local->iflist_mtx); + if (has_monitor) { + sdata = rtnl_dereference(local->monitor_sdata); + if (sdata) { + sdata->user_power_level = local->user_power_level; + if (txp_type != sdata->vif.bss_conf.txpower_type) + update_txp_type = true; + sdata->vif.bss_conf.txpower_type = txp_type; + + ieee80211_recalc_txpower(sdata, update_txp_type); + } + } + return 0; } diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c index 5fae001f286c..1f466d12a6bc 100644 --- a/net/mac80211/debugfs.c +++ b/net/mac80211/debugfs.c @@ -211,6 +211,7 @@ static const char *hw_flag_names[] = { FLAG(TX_FRAG_LIST), FLAG(REPORTS_LOW_ACK), FLAG(SUPPORTS_TX_FRAG), + FLAG(SUPPORTS_TDLS_BUFFER_STA), #undef FLAG }; diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c index b15412c21ac9..444ea8d127fe 100644 --- a/net/mac80211/debugfs_sta.c +++ b/net/mac80211/debugfs_sta.c @@ -420,7 +420,7 @@ static ssize_t sta_vht_capa_read(struct file *file, char __user *userbuf, default: p += scnprintf(p, sizeof(buf) + buf - p, "\t\tMAX-MPDU-UNKNOWN\n"); - }; + } switch (vhtc->cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK) { case 0: p += scnprintf(p, sizeof(buf) + buf - p, @@ -438,7 +438,7 @@ static ssize_t sta_vht_capa_read(struct file *file, char __user *userbuf, p += scnprintf(p, sizeof(buf) + buf - p, "\t\tUNKNOWN-MHZ: 0x%x\n", (vhtc->cap >> 2) & 0x3); - }; + } PFLAG(RXLDPC, "RXLDPC"); PFLAG(SHORT_GI_80, "SHORT-GI-80"); PFLAG(SHORT_GI_160, "SHORT-GI-160"); diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index c7f93fd9ca7a..4d82fe7d627c 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -165,7 +165,8 @@ static inline void drv_bss_info_changed(struct ieee80211_local *local, if (WARN_ON_ONCE(sdata->vif.type == NL80211_IFTYPE_P2P_DEVICE || sdata->vif.type == NL80211_IFTYPE_NAN || (sdata->vif.type == NL80211_IFTYPE_MONITOR && - !sdata->vif.mu_mimo_owner))) + !sdata->vif.mu_mimo_owner && + !(changed & BSS_CHANGED_TXPOWER)))) return; if (!check_sdata_in_driver(sdata)) diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c index 1621b6ab17ba..d7523530d3f8 100644 --- a/net/mac80211/ht.c +++ b/net/mac80211/ht.c @@ -492,6 +492,7 @@ int ieee80211_send_smps_action(struct ieee80211_sub_if_data *sdata, case IEEE80211_SMPS_AUTOMATIC: case IEEE80211_SMPS_NUM_MODES: WARN_ON(1); + /* fall through */ case IEEE80211_SMPS_OFF: action_frame->u.action.u.ht_smps.smps_control = WLAN_HT_SMPS_CONTROL_DISABLED; diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 885d00b41911..26900025de2f 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1757,10 +1757,6 @@ void ___ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid, u16 initiator, u16 reason, bool stop); void __ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid, u16 initiator, u16 reason, bool stop); -void __ieee80211_start_rx_ba_session(struct sta_info *sta, - u8 dialog_token, u16 timeout, - u16 start_seq_num, u16 ba_policy, u16 tid, - u16 buf_size, bool tx, bool auto_seq); void ___ieee80211_start_rx_ba_session(struct sta_info *sta, u8 dialog_token, u16 timeout, u16 start_seq_num, u16 ba_policy, u16 tid, diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 13b16f90e1cf..5fe01f82df12 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -1474,7 +1474,7 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata, break; case NL80211_IFTYPE_UNSPECIFIED: case NUM_NL80211_IFTYPES: - BUG(); + WARN_ON(1); break; } @@ -1633,7 +1633,7 @@ static void ieee80211_assign_perm_addr(struct ieee80211_local *local, goto out_unlock; } } - /* otherwise fall through */ + /* fall through */ default: /* assign a new address if possible -- try n_addresses first */ for (i = 0; i < local->hw.wiphy->n_addresses; i++) { diff --git a/net/mac80211/key.c b/net/mac80211/key.c index 938049395f90..aee05ec3f7ea 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -178,13 +178,17 @@ static int ieee80211_key_enable_hw_accel(struct ieee80211_key *key) if (!ret) { key->flags |= KEY_FLAG_UPLOADED_TO_HARDWARE; - if (!((key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_MMIC) || + if (!((key->conf.flags & (IEEE80211_KEY_FLAG_GENERATE_MMIC | + IEEE80211_KEY_FLAG_PUT_MIC_SPACE)) || (key->conf.flags & IEEE80211_KEY_FLAG_RESERVE_TAILROOM))) decrease_tailroom_need_count(sdata, 1); WARN_ON((key->conf.flags & IEEE80211_KEY_FLAG_PUT_IV_SPACE) && (key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV)); + WARN_ON((key->conf.flags & IEEE80211_KEY_FLAG_PUT_MIC_SPACE) && + (key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_MMIC)); + return 0; } @@ -237,7 +241,8 @@ static void ieee80211_key_disable_hw_accel(struct ieee80211_key *key) sta = key->sta; sdata = key->sdata; - if (!((key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_MMIC) || + if (!((key->conf.flags & (IEEE80211_KEY_FLAG_GENERATE_MMIC | + IEEE80211_KEY_FLAG_PUT_MIC_SPACE)) || (key->conf.flags & IEEE80211_KEY_FLAG_RESERVE_TAILROOM))) increment_tailroom_need_count(sdata); @@ -1104,7 +1109,8 @@ void ieee80211_remove_key(struct ieee80211_key_conf *keyconf) if (key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) { key->flags &= ~KEY_FLAG_UPLOADED_TO_HARDWARE; - if (!((key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_MMIC) || + if (!((key->conf.flags & (IEEE80211_KEY_FLAG_GENERATE_MMIC | + IEEE80211_KEY_FLAG_PUT_MIC_SPACE)) || (key->conf.flags & IEEE80211_KEY_FLAG_RESERVE_TAILROOM))) increment_tailroom_need_count(key->sdata); } diff --git a/net/mac80211/main.c b/net/mac80211/main.c index e054a2fd8d38..0785d04a80bc 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -263,6 +263,9 @@ static void ieee80211_restart_work(struct work_struct *work) flush_delayed_work(&local->roc_work); flush_work(&local->hw_roc_done); + /* wait for all packet processing to be done */ + synchronize_net(); + ieee80211_reconfig(local); rtnl_unlock(); } diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index 5e27364e10ac..73ac607beb5d 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -989,8 +989,10 @@ ieee80211_mesh_process_chnswitch(struct ieee80211_sub_if_data *sdata, switch (sdata->vif.bss_conf.chandef.width) { case NL80211_CHAN_WIDTH_20_NOHT: sta_flags |= IEEE80211_STA_DISABLE_HT; + /* fall through */ case NL80211_CHAN_WIDTH_20: sta_flags |= IEEE80211_STA_DISABLE_40MHZ; + /* fall through */ case NL80211_CHAN_WIDTH_40: sta_flags |= IEEE80211_STA_DISABLE_VHT; break; diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c index 4394463a0c2e..35ad3983ae4b 100644 --- a/net/mac80211/mesh_hwmp.c +++ b/net/mac80211/mesh_hwmp.c @@ -1250,6 +1250,7 @@ void mesh_path_tx_root_frame(struct ieee80211_sub_if_data *sdata) break; case IEEE80211_PROACTIVE_PREQ_WITH_PREP: flags |= IEEE80211_PREQ_PROACTIVE_PREP_FLAG; + /* fall through */ case IEEE80211_PROACTIVE_PREQ_NO_PREP: interval = ifmsh->mshcfg.dot11MeshHWMPactivePathToRootTimeout; target_flags |= IEEE80211_PREQ_TO_FLAG | diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c index 86c8dfef56a4..a5125624a76d 100644 --- a/net/mac80211/mesh_pathtbl.c +++ b/net/mac80211/mesh_pathtbl.c @@ -257,9 +257,7 @@ __mesh_path_lookup_by_idx(struct mesh_table *tbl, int idx) if (ret) return NULL; - ret = rhashtable_walk_start(&iter); - if (ret && ret != -EAGAIN) - goto err; + rhashtable_walk_start(&iter); while ((mpath = rhashtable_walk_next(&iter))) { if (IS_ERR(mpath) && PTR_ERR(mpath) == -EAGAIN) @@ -269,7 +267,6 @@ __mesh_path_lookup_by_idx(struct mesh_table *tbl, int idx) if (i++ == idx) break; } -err: rhashtable_walk_stop(&iter); rhashtable_walk_exit(&iter); @@ -513,9 +510,7 @@ void mesh_plink_broken(struct sta_info *sta) if (ret) return; - ret = rhashtable_walk_start(&iter); - if (ret && ret != -EAGAIN) - goto out; + rhashtable_walk_start(&iter); while ((mpath = rhashtable_walk_next(&iter))) { if (IS_ERR(mpath) && PTR_ERR(mpath) == -EAGAIN) @@ -535,7 +530,6 @@ void mesh_plink_broken(struct sta_info *sta) WLAN_REASON_MESH_PATH_DEST_UNREACHABLE, bcast); } } -out: rhashtable_walk_stop(&iter); rhashtable_walk_exit(&iter); } @@ -584,9 +578,7 @@ void mesh_path_flush_by_nexthop(struct sta_info *sta) if (ret) return; - ret = rhashtable_walk_start(&iter); - if (ret && ret != -EAGAIN) - goto out; + rhashtable_walk_start(&iter); while ((mpath = rhashtable_walk_next(&iter))) { if (IS_ERR(mpath) && PTR_ERR(mpath) == -EAGAIN) @@ -597,7 +589,7 @@ void mesh_path_flush_by_nexthop(struct sta_info *sta) if (rcu_access_pointer(mpath->next_hop) == sta) __mesh_path_del(tbl, mpath); } -out: + rhashtable_walk_stop(&iter); rhashtable_walk_exit(&iter); } @@ -614,9 +606,7 @@ static void mpp_flush_by_proxy(struct ieee80211_sub_if_data *sdata, if (ret) return; - ret = rhashtable_walk_start(&iter); - if (ret && ret != -EAGAIN) - goto out; + rhashtable_walk_start(&iter); while ((mpath = rhashtable_walk_next(&iter))) { if (IS_ERR(mpath) && PTR_ERR(mpath) == -EAGAIN) @@ -627,7 +617,7 @@ static void mpp_flush_by_proxy(struct ieee80211_sub_if_data *sdata, if (ether_addr_equal(mpath->mpp, proxy)) __mesh_path_del(tbl, mpath); } -out: + rhashtable_walk_stop(&iter); rhashtable_walk_exit(&iter); } @@ -642,9 +632,7 @@ static void table_flush_by_iface(struct mesh_table *tbl) if (ret) return; - ret = rhashtable_walk_start(&iter); - if (ret && ret != -EAGAIN) - goto out; + rhashtable_walk_start(&iter); while ((mpath = rhashtable_walk_next(&iter))) { if (IS_ERR(mpath) && PTR_ERR(mpath) == -EAGAIN) @@ -653,7 +641,7 @@ static void table_flush_by_iface(struct mesh_table *tbl) break; __mesh_path_del(tbl, mpath); } -out: + rhashtable_walk_stop(&iter); rhashtable_walk_exit(&iter); } @@ -873,9 +861,7 @@ void mesh_path_tbl_expire(struct ieee80211_sub_if_data *sdata, if (ret) return; - ret = rhashtable_walk_start(&iter); - if (ret && ret != -EAGAIN) - goto out; + rhashtable_walk_start(&iter); while ((mpath = rhashtable_walk_next(&iter))) { if (IS_ERR(mpath) && PTR_ERR(mpath) == -EAGAIN) @@ -887,7 +873,7 @@ void mesh_path_tbl_expire(struct ieee80211_sub_if_data *sdata, time_after(jiffies, mpath->exp_time + MESH_PATH_EXPIRE)) __mesh_path_del(tbl, mpath); } -out: + rhashtable_walk_stop(&iter); rhashtable_walk_exit(&iter); } diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c index e2d00cce3c17..0f6c9ca59062 100644 --- a/net/mac80211/mesh_plink.c +++ b/net/mac80211/mesh_plink.c @@ -672,7 +672,7 @@ void mesh_plink_timer(struct timer_list *t) break; } reason = WLAN_REASON_MESH_MAX_RETRIES; - /* fall through on else */ + /* fall through */ case NL80211_PLINK_CNF_RCVD: /* confirm timer */ if (!reason) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index c244691deab9..39b660b9a908 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -473,6 +473,7 @@ static void ieee80211_add_ht_ie(struct ieee80211_sub_if_data *sdata, case IEEE80211_SMPS_AUTOMATIC: case IEEE80211_SMPS_NUM_MODES: WARN_ON(1); + /* fall through */ case IEEE80211_SMPS_OFF: cap |= WLAN_HT_CAP_SM_PS_DISABLED << IEEE80211_HT_CAP_SM_PS_SHIFT; @@ -2861,10 +2862,11 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata, aid = le16_to_cpu(mgmt->u.assoc_resp.aid); capab_info = le16_to_cpu(mgmt->u.assoc_resp.capab_info); - if ((aid & (BIT(15) | BIT(14))) != (BIT(15) | BIT(14))) - sdata_info(sdata, "invalid AID value 0x%x; bits 15:14 not set\n", - aid); - aid &= ~(BIT(15) | BIT(14)); + /* + * The 5 MSB of the AID field are reserved + * (802.11-2016 9.4.1.8 AID field) + */ + aid &= 0x7ff; ifmgd->broken_ap = false; diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c index faf4f6055000..f1d40b6645ff 100644 --- a/net/mac80211/offchannel.c +++ b/net/mac80211/offchannel.c @@ -801,14 +801,14 @@ int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev, case NL80211_IFTYPE_ADHOC: if (!sdata->vif.bss_conf.ibss_joined) need_offchan = true; - /* fall through */ #ifdef CONFIG_MAC80211_MESH + /* fall through */ case NL80211_IFTYPE_MESH_POINT: if (ieee80211_vif_is_mesh(&sdata->vif) && !sdata->u.mesh.mesh_id_len) need_offchan = true; - /* fall through */ #endif + /* fall through */ case NL80211_IFTYPE_AP: case NL80211_IFTYPE_AP_VLAN: case NL80211_IFTYPE_P2P_GO: diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 4daafb07602f..fd580614085b 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -1607,23 +1607,16 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx) /* * Change STA power saving mode only at the end of a frame - * exchange sequence. + * exchange sequence, and only for a data or management + * frame as specified in IEEE 802.11-2016 11.2.3.2 */ if (!ieee80211_hw_check(&sta->local->hw, AP_LINK_PS) && !ieee80211_has_morefrags(hdr->frame_control) && - !ieee80211_is_back_req(hdr->frame_control) && + (ieee80211_is_mgmt(hdr->frame_control) || + ieee80211_is_data(hdr->frame_control)) && !(status->rx_flags & IEEE80211_RX_DEFERRED_RELEASE) && (rx->sdata->vif.type == NL80211_IFTYPE_AP || - rx->sdata->vif.type == NL80211_IFTYPE_AP_VLAN) && - /* - * PM bit is only checked in frames where it isn't reserved, - * in AP mode it's reserved in non-bufferable management frames - * (cf. IEEE 802.11-2012 8.2.4.1.7 Power Management field) - * BAR frames should be ignored as specified in - * IEEE 802.11-2012 10.2.1.2. - */ - (!ieee80211_is_mgmt(hdr->frame_control) || - ieee80211_is_bufferable_mmpdu(hdr->frame_control))) { + rx->sdata->vif.type == NL80211_IFTYPE_AP_VLAN)) { if (test_sta_flag(sta, WLAN_STA_PS_STA)) { if (!ieee80211_has_pm(hdr->frame_control)) sta_ps_end(sta); diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c index 91093d4a2f84..5cd5e6e5834e 100644 --- a/net/mac80211/tdls.c +++ b/net/mac80211/tdls.c @@ -47,6 +47,8 @@ static void ieee80211_tdls_add_ext_capab(struct ieee80211_sub_if_data *sdata, NL80211_FEATURE_TDLS_CHANNEL_SWITCH; bool wider_band = ieee80211_hw_check(&local->hw, TDLS_WIDER_BW) && !ifmgd->tdls_wider_bw_prohibited; + bool buffer_sta = ieee80211_hw_check(&local->hw, + SUPPORTS_TDLS_BUFFER_STA); struct ieee80211_supported_band *sband = ieee80211_get_sband(sdata); bool vht = sband && sband->vht_cap.vht_supported; u8 *pos = skb_put(skb, 10); @@ -56,7 +58,8 @@ static void ieee80211_tdls_add_ext_capab(struct ieee80211_sub_if_data *sdata, *pos++ = 0x0; *pos++ = 0x0; *pos++ = 0x0; - *pos++ = chan_switch ? WLAN_EXT_CAPA4_TDLS_CHAN_SWITCH : 0; + *pos++ = (chan_switch ? WLAN_EXT_CAPA4_TDLS_CHAN_SWITCH : 0) | + (buffer_sta ? WLAN_EXT_CAPA4_TDLS_BUFFER_STA : 0); *pos++ = WLAN_EXT_CAPA5_TDLS_ENABLED; *pos++ = 0; *pos++ = 0; @@ -236,6 +239,7 @@ static enum ieee80211_ac_numbers ieee80211_ac_from_wmm(int ac) switch (ac) { default: WARN_ON_ONCE(1); + /* fall through */ case 0: return IEEE80211_AC_BE; case 1: diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 3160954fc406..25904af38839 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -2922,7 +2922,9 @@ void ieee80211_check_fast_xmit(struct sta_info *sta) gen_iv = build.key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV; iv_spc = build.key->conf.flags & IEEE80211_KEY_FLAG_PUT_IV_SPACE; - mmic = build.key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_MMIC; + mmic = build.key->conf.flags & + (IEEE80211_KEY_FLAG_GENERATE_MMIC | + IEEE80211_KEY_FLAG_PUT_MIC_SPACE); /* don't handle software crypto */ if (!(build.key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE)) diff --git a/net/mac80211/util.c b/net/mac80211/util.c index d57e5f6bd8b6..1f82191ce601 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -2110,15 +2110,6 @@ int ieee80211_reconfig(struct ieee80211_local *local) cfg80211_sched_scan_stopped_rtnl(local->hw.wiphy, 0); wake_up: - if (local->in_reconfig) { - local->in_reconfig = false; - barrier(); - - /* Restart deferred ROCs */ - mutex_lock(&local->mtx); - ieee80211_start_next_roc(local); - mutex_unlock(&local->mtx); - } if (local->monitors == local->open_count && local->monitors > 0) ieee80211_add_virtual_monitor(local); @@ -2146,6 +2137,16 @@ int ieee80211_reconfig(struct ieee80211_local *local) mutex_unlock(&local->sta_mtx); } + if (local->in_reconfig) { + local->in_reconfig = false; + barrier(); + + /* Restart deferred ROCs */ + mutex_lock(&local->mtx); + ieee80211_start_next_roc(local); + mutex_unlock(&local->mtx); + } + ieee80211_wake_queues_by_reason(hw, IEEE80211_MAX_QUEUE_MAP, IEEE80211_QUEUE_STOP_REASON_SUSPEND, false); diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c index 3e3d3014e9ab..5f7c96368b11 100644 --- a/net/mac80211/wme.c +++ b/net/mac80211/wme.c @@ -165,6 +165,7 @@ u16 ieee80211_select_queue(struct ieee80211_sub_if_data *sdata, qos = sta->sta.wme; break; } + /* fall through */ case NL80211_IFTYPE_AP: ra = skb->data; break; diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c index b58722d9de37..785056cb76f6 100644 --- a/net/mac80211/wpa.c +++ b/net/mac80211/wpa.c @@ -1,7 +1,7 @@ /* * Copyright 2002-2004, Instant802 Networks, Inc. * Copyright 2008, Jouni Malinen <j@w1.fi> - * Copyright (C) 2016 Intel Deutschland GmbH + * Copyright (C) 2016-2017 Intel Deutschland GmbH * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -59,8 +59,9 @@ ieee80211_tx_h_michael_mic_add(struct ieee80211_tx_data *tx) if (info->control.hw_key && (info->flags & IEEE80211_TX_CTL_DONTFRAG || ieee80211_hw_check(&tx->local->hw, SUPPORTS_TX_FRAG)) && - !(tx->key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_MMIC)) { - /* hwaccel - with no need for SW-generated MMIC */ + !(tx->key->conf.flags & (IEEE80211_KEY_FLAG_GENERATE_MMIC | + IEEE80211_KEY_FLAG_PUT_MIC_SPACE))) { + /* hwaccel - with no need for SW-generated MMIC or MIC space */ return TX_CONTINUE; } @@ -75,8 +76,15 @@ ieee80211_tx_h_michael_mic_add(struct ieee80211_tx_data *tx) skb_tailroom(skb), tail)) return TX_DROP; - key = &tx->key->conf.key[NL80211_TKIP_DATA_OFFSET_TX_MIC_KEY]; mic = skb_put(skb, MICHAEL_MIC_LEN); + + if (tx->key->conf.flags & IEEE80211_KEY_FLAG_PUT_MIC_SPACE) { + /* Zeroed MIC can help with debug */ + memset(mic, 0, MICHAEL_MIC_LEN); + return TX_CONTINUE; + } + + key = &tx->key->conf.key[NL80211_TKIP_DATA_OFFSET_TX_MIC_KEY]; michael_mic(key, hdr, data, data_len, mic); if (unlikely(info->flags & IEEE80211_TX_INTFL_TKIP_MIC_FAILURE)) mic[0]++; diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index 8ca9915befc8..5dce8336d33f 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -2510,12 +2510,15 @@ static int __init mpls_init(void) rtnl_af_register(&mpls_af_ops); - rtnl_register(PF_MPLS, RTM_NEWROUTE, mpls_rtm_newroute, NULL, 0); - rtnl_register(PF_MPLS, RTM_DELROUTE, mpls_rtm_delroute, NULL, 0); - rtnl_register(PF_MPLS, RTM_GETROUTE, mpls_getroute, mpls_dump_routes, - 0); - rtnl_register(PF_MPLS, RTM_GETNETCONF, mpls_netconf_get_devconf, - mpls_netconf_dump_devconf, 0); + rtnl_register_module(THIS_MODULE, PF_MPLS, RTM_NEWROUTE, + mpls_rtm_newroute, NULL, 0); + rtnl_register_module(THIS_MODULE, PF_MPLS, RTM_DELROUTE, + mpls_rtm_delroute, NULL, 0); + rtnl_register_module(THIS_MODULE, PF_MPLS, RTM_GETROUTE, + mpls_getroute, mpls_dump_routes, 0); + rtnl_register_module(THIS_MODULE, PF_MPLS, RTM_GETNETCONF, + mpls_netconf_get_devconf, + mpls_netconf_dump_devconf, 0); err = ipgre_tunnel_encap_add_mpls_ops(); if (err) pr_err("Can't add mpls over gre tunnel ops\n"); diff --git a/net/ncsi/ncsi-aen.c b/net/ncsi/ncsi-aen.c index 67e708e98ccf..e7b05de1e6d1 100644 --- a/net/ncsi/ncsi-aen.c +++ b/net/ncsi/ncsi-aen.c @@ -143,43 +143,14 @@ static int ncsi_aen_handler_hncdsc(struct ncsi_dev_priv *ndp, if (!nc) return -ENODEV; - /* If the channel is active one, we need reconfigure it */ spin_lock_irqsave(&nc->lock, flags); ncm = &nc->modes[NCSI_MODE_LINK]; hncdsc = (struct ncsi_aen_hncdsc_pkt *)h; ncm->data[3] = ntohl(hncdsc->status); - netdev_info(ndp->ndev.dev, "NCSI: HNCDSC AEN - channel %u state %s\n", - nc->id, ncm->data[3] & 0x3 ? "up" : "down"); - if (!list_empty(&nc->link) || - nc->state != NCSI_CHANNEL_ACTIVE) { - spin_unlock_irqrestore(&nc->lock, flags); - return 0; - } - - spin_unlock_irqrestore(&nc->lock, flags); - if (!(ndp->flags & NCSI_DEV_HWA) && !(ncm->data[3] & 0x1)) - ndp->flags |= NCSI_DEV_RESHUFFLE; - - /* If this channel is the active one and the link doesn't - * work, we have to choose another channel to be active one. - * The logic here is exactly similar to what we do when link - * is down on the active channel. - * - * On the other hand, we need configure it when host driver - * state on the active channel becomes ready. - */ - ncsi_stop_channel_monitor(nc); - - spin_lock_irqsave(&nc->lock, flags); - nc->state = (ncm->data[3] & 0x1) ? NCSI_CHANNEL_INACTIVE : - NCSI_CHANNEL_ACTIVE; spin_unlock_irqrestore(&nc->lock, flags); - - spin_lock_irqsave(&ndp->lock, flags); - list_add_tail_rcu(&nc->link, &ndp->channel_queue); - spin_unlock_irqrestore(&ndp->lock, flags); - - ncsi_process_next_channel(ndp); + netdev_printk(KERN_DEBUG, ndp->ndev.dev, + "NCSI: host driver %srunning on channel %u\n", + ncm->data[3] & 0x1 ? "" : "not ", nc->id); return 0; } diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index e4a13cc8a2e7..9019fa98003d 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -12,6 +12,12 @@ config NETFILTER_INGRESS config NETFILTER_NETLINK tristate +config NETFILTER_FAMILY_BRIDGE + bool + +config NETFILTER_FAMILY_ARP + bool + config NETFILTER_NETLINK_ACCT tristate "Netfilter NFACCT over NFNETLINK interface" depends on NETFILTER_ADVANCED @@ -62,6 +68,8 @@ config NF_LOG_NETDEV select NF_LOG_COMMON if NF_CONNTRACK +config NETFILTER_CONNCOUNT + tristate config NF_CONNTRACK_MARK bool 'Connection mark tracking support' @@ -497,6 +505,13 @@ config NFT_CT This option adds the "ct" expression that you can use to match connection tracking information such as the flow state. +config NFT_FLOW_OFFLOAD + depends on NF_CONNTRACK && NF_FLOW_TABLE + tristate "Netfilter nf_tables hardware flow offload module" + help + This option adds the "flow_offload" expression that you can use to + choose what flows are placed into the hardware. + config NFT_SET_RBTREE tristate "Netfilter nf_tables rbtree set module" help @@ -649,6 +664,23 @@ endif # NF_TABLES_NETDEV endif # NF_TABLES +config NF_FLOW_TABLE_INET + tristate "Netfilter flow table mixed IPv4/IPv6 module" + depends on NF_FLOW_TABLE_IPV4 && NF_FLOW_TABLE_IPV6 + select NF_FLOW_TABLE + help + This option adds the flow table mixed IPv4/IPv6 support. + + To compile it as a module, choose M here. + +config NF_FLOW_TABLE + tristate "Netfilter flow table module" + depends on NF_CONNTRACK && NF_TABLES + help + This option adds the flow table core infrastructure. + + To compile it as a module, choose M here. + config NETFILTER_XTABLES tristate "Netfilter Xtables support (required for ip_tables)" default m if NETFILTER_ADVANCED=n @@ -1120,6 +1152,7 @@ config NETFILTER_XT_MATCH_CONNLIMIT tristate '"connlimit" match support' depends on NF_CONNTRACK depends on NETFILTER_ADVANCED + select NETFILTER_CONNCOUNT ---help--- This match allows you to match against the number of parallel connections to a server per client IP address (or address block). diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index f78ed2470831..5d9b8b959e58 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -netfilter-objs := core.o nf_log.o nf_queue.o nf_sockopt.o +netfilter-objs := core.o nf_log.o nf_queue.o nf_sockopt.o utils.o nf_conntrack-y := nf_conntrack_core.o nf_conntrack_standalone.o nf_conntrack_expect.o nf_conntrack_helper.o nf_conntrack_proto.o nf_conntrack_l3proto_generic.o nf_conntrack_proto_generic.o nf_conntrack_proto_tcp.o nf_conntrack_proto_udp.o nf_conntrack_extend.o nf_conntrack_acct.o nf_conntrack_seqadj.o nf_conntrack-$(CONFIG_NF_CONNTRACK_TIMEOUT) += nf_conntrack_timeout.o @@ -67,6 +67,8 @@ obj-$(CONFIG_NF_NAT_TFTP) += nf_nat_tftp.o # SYNPROXY obj-$(CONFIG_NETFILTER_SYNPROXY) += nf_synproxy_core.o +obj-$(CONFIG_NETFILTER_CONNCOUNT) += nf_conncount.o + # generic packet duplication from netdev family obj-$(CONFIG_NF_DUP_NETDEV) += nf_dup_netdev.o @@ -84,6 +86,7 @@ obj-$(CONFIG_NFT_META) += nft_meta.o obj-$(CONFIG_NFT_RT) += nft_rt.o obj-$(CONFIG_NFT_NUMGEN) += nft_numgen.o obj-$(CONFIG_NFT_CT) += nft_ct.o +obj-$(CONFIG_NFT_FLOW_OFFLOAD) += nft_flow_offload.o obj-$(CONFIG_NFT_LIMIT) += nft_limit.o obj-$(CONFIG_NFT_NAT) += nft_nat.o obj-$(CONFIG_NFT_OBJREF) += nft_objref.o @@ -107,6 +110,10 @@ obj-$(CONFIG_NFT_FIB_NETDEV) += nft_fib_netdev.o obj-$(CONFIG_NFT_DUP_NETDEV) += nft_dup_netdev.o obj-$(CONFIG_NFT_FWD_NETDEV) += nft_fwd_netdev.o +# flow table infrastructure +obj-$(CONFIG_NF_FLOW_TABLE) += nf_flow_table.o +obj-$(CONFIG_NF_FLOW_TABLE_INET) += nf_flow_table_inet.o + # generic X tables obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 52cd2901a097..0f6b8172fb9a 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -4,8 +4,7 @@ * Thanks to Rob `CmdrTaco' Malda for not influencing this code in any * way. * - * Rusty Russell (C)2000 -- This code is GPL. - * Patrick McHardy (c) 2006-2012 + * This code is GPL. */ #include <linux/kernel.h> #include <linux/netfilter.h> @@ -28,34 +27,12 @@ #include "nf_internals.h" -static DEFINE_MUTEX(afinfo_mutex); - -const struct nf_afinfo __rcu *nf_afinfo[NFPROTO_NUMPROTO] __read_mostly; -EXPORT_SYMBOL(nf_afinfo); const struct nf_ipv6_ops __rcu *nf_ipv6_ops __read_mostly; EXPORT_SYMBOL_GPL(nf_ipv6_ops); DEFINE_PER_CPU(bool, nf_skb_duplicated); EXPORT_SYMBOL_GPL(nf_skb_duplicated); -int nf_register_afinfo(const struct nf_afinfo *afinfo) -{ - mutex_lock(&afinfo_mutex); - RCU_INIT_POINTER(nf_afinfo[afinfo->family], afinfo); - mutex_unlock(&afinfo_mutex); - return 0; -} -EXPORT_SYMBOL_GPL(nf_register_afinfo); - -void nf_unregister_afinfo(const struct nf_afinfo *afinfo) -{ - mutex_lock(&afinfo_mutex); - RCU_INIT_POINTER(nf_afinfo[afinfo->family], NULL); - mutex_unlock(&afinfo_mutex); - synchronize_rcu(); -} -EXPORT_SYMBOL_GPL(nf_unregister_afinfo); - #ifdef HAVE_JUMP_LABEL struct static_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS]; EXPORT_SYMBOL(nf_hooks_needed); @@ -74,7 +51,8 @@ static struct nf_hook_entries *allocate_hook_entries_size(u16 num) struct nf_hook_entries *e; size_t alloc = sizeof(*e) + sizeof(struct nf_hook_entry) * num + - sizeof(struct nf_hook_ops *) * num; + sizeof(struct nf_hook_ops *) * num + + sizeof(struct nf_hook_entries_rcu_head); if (num == 0) return NULL; @@ -85,6 +63,30 @@ static struct nf_hook_entries *allocate_hook_entries_size(u16 num) return e; } +static void __nf_hook_entries_free(struct rcu_head *h) +{ + struct nf_hook_entries_rcu_head *head; + + head = container_of(h, struct nf_hook_entries_rcu_head, head); + kvfree(head->allocation); +} + +static void nf_hook_entries_free(struct nf_hook_entries *e) +{ + struct nf_hook_entries_rcu_head *head; + struct nf_hook_ops **ops; + unsigned int num; + + if (!e) + return; + + num = e->num_hook_entries; + ops = nf_hook_entries_get_hook_ops(e); + head = (void *)&ops[num]; + head->allocation = e; + call_rcu(&head->head, __nf_hook_entries_free); +} + static unsigned int accept_all(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) @@ -135,6 +137,12 @@ nf_hook_entries_grow(const struct nf_hook_entries *old, ++i; continue; } + + if (reg->nat_hook && orig_ops[i]->nat_hook) { + kvfree(new); + return ERR_PTR(-EBUSY); + } + if (inserted || reg->priority > orig_ops[i]->priority) { new_ops[nhooks] = (void *)orig_ops[i]; new->hooks[nhooks] = old->hooks[i]; @@ -237,27 +245,61 @@ out_assign: return old; } -static struct nf_hook_entries __rcu **nf_hook_entry_head(struct net *net, const struct nf_hook_ops *reg) +static struct nf_hook_entries __rcu ** +nf_hook_entry_head(struct net *net, int pf, unsigned int hooknum, + struct net_device *dev) { - if (reg->pf != NFPROTO_NETDEV) - return net->nf.hooks[reg->pf]+reg->hooknum; + switch (pf) { + case NFPROTO_NETDEV: + break; +#ifdef CONFIG_NETFILTER_FAMILY_ARP + case NFPROTO_ARP: + if (WARN_ON_ONCE(ARRAY_SIZE(net->nf.hooks_arp) <= hooknum)) + return NULL; + return net->nf.hooks_arp + hooknum; +#endif +#ifdef CONFIG_NETFILTER_FAMILY_BRIDGE + case NFPROTO_BRIDGE: + if (WARN_ON_ONCE(ARRAY_SIZE(net->nf.hooks_bridge) <= hooknum)) + return NULL; + return net->nf.hooks_bridge + hooknum; +#endif + case NFPROTO_IPV4: + if (WARN_ON_ONCE(ARRAY_SIZE(net->nf.hooks_ipv4) <= hooknum)) + return NULL; + return net->nf.hooks_ipv4 + hooknum; + case NFPROTO_IPV6: + if (WARN_ON_ONCE(ARRAY_SIZE(net->nf.hooks_ipv6) <= hooknum)) + return NULL; + return net->nf.hooks_ipv6 + hooknum; +#if IS_ENABLED(CONFIG_DECNET) + case NFPROTO_DECNET: + if (WARN_ON_ONCE(ARRAY_SIZE(net->nf.hooks_decnet) <= hooknum)) + return NULL; + return net->nf.hooks_decnet + hooknum; +#endif + default: + WARN_ON_ONCE(1); + return NULL; + } #ifdef CONFIG_NETFILTER_INGRESS - if (reg->hooknum == NF_NETDEV_INGRESS) { - if (reg->dev && dev_net(reg->dev) == net) - return ®->dev->nf_hooks_ingress; + if (hooknum == NF_NETDEV_INGRESS) { + if (dev && dev_net(dev) == net) + return &dev->nf_hooks_ingress; } #endif WARN_ON_ONCE(1); return NULL; } -int nf_register_net_hook(struct net *net, const struct nf_hook_ops *reg) +static int __nf_register_net_hook(struct net *net, int pf, + const struct nf_hook_ops *reg) { struct nf_hook_entries *p, *new_hooks; struct nf_hook_entries __rcu **pp; - if (reg->pf == NFPROTO_NETDEV) { + if (pf == NFPROTO_NETDEV) { #ifndef CONFIG_NETFILTER_INGRESS if (reg->hooknum == NF_NETDEV_INGRESS) return -EOPNOTSUPP; @@ -267,7 +309,7 @@ int nf_register_net_hook(struct net *net, const struct nf_hook_ops *reg) return -EINVAL; } - pp = nf_hook_entry_head(net, reg); + pp = nf_hook_entry_head(net, pf, reg->hooknum, reg->dev); if (!pp) return -EINVAL; @@ -285,21 +327,19 @@ int nf_register_net_hook(struct net *net, const struct nf_hook_ops *reg) hooks_validate(new_hooks); #ifdef CONFIG_NETFILTER_INGRESS - if (reg->pf == NFPROTO_NETDEV && reg->hooknum == NF_NETDEV_INGRESS) + if (pf == NFPROTO_NETDEV && reg->hooknum == NF_NETDEV_INGRESS) net_inc_ingress_queue(); #endif #ifdef HAVE_JUMP_LABEL - static_key_slow_inc(&nf_hooks_needed[reg->pf][reg->hooknum]); + static_key_slow_inc(&nf_hooks_needed[pf][reg->hooknum]); #endif - synchronize_net(); BUG_ON(p == new_hooks); - kvfree(p); + nf_hook_entries_free(p); return 0; } -EXPORT_SYMBOL(nf_register_net_hook); /* - * __nf_unregister_net_hook - remove a hook from blob + * nf_remove_net_hook - remove a hook from blob * * @oldp: current address of hook blob * @unreg: hook to unregister @@ -307,8 +347,8 @@ EXPORT_SYMBOL(nf_register_net_hook); * This cannot fail, hook unregistration must always succeed. * Therefore replace the to-be-removed hook with a dummy hook. */ -static void __nf_unregister_net_hook(struct nf_hook_entries *old, - const struct nf_hook_ops *unreg) +static void nf_remove_net_hook(struct nf_hook_entries *old, + const struct nf_hook_ops *unreg, int pf) { struct nf_hook_ops **orig_ops; bool found = false; @@ -326,24 +366,24 @@ static void __nf_unregister_net_hook(struct nf_hook_entries *old, if (found) { #ifdef CONFIG_NETFILTER_INGRESS - if (unreg->pf == NFPROTO_NETDEV && unreg->hooknum == NF_NETDEV_INGRESS) + if (pf == NFPROTO_NETDEV && unreg->hooknum == NF_NETDEV_INGRESS) net_dec_ingress_queue(); #endif #ifdef HAVE_JUMP_LABEL - static_key_slow_dec(&nf_hooks_needed[unreg->pf][unreg->hooknum]); + static_key_slow_dec(&nf_hooks_needed[pf][unreg->hooknum]); #endif } else { - WARN_ONCE(1, "hook not found, pf %d num %d", unreg->pf, unreg->hooknum); + WARN_ONCE(1, "hook not found, pf %d num %d", pf, unreg->hooknum); } } -void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg) +static void __nf_unregister_net_hook(struct net *net, int pf, + const struct nf_hook_ops *reg) { struct nf_hook_entries __rcu **pp; struct nf_hook_entries *p; - unsigned int nfq; - pp = nf_hook_entry_head(net, reg); + pp = nf_hook_entry_head(net, pf, reg->hooknum, reg->dev); if (!pp) return; @@ -355,23 +395,52 @@ void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg) return; } - __nf_unregister_net_hook(p, reg); + nf_remove_net_hook(p, reg, pf); p = __nf_hook_entries_try_shrink(pp); mutex_unlock(&nf_hook_mutex); if (!p) return; - synchronize_net(); + nf_queue_nf_hook_drop(net); + nf_hook_entries_free(p); +} - /* other cpu might still process nfqueue verdict that used reg */ - nfq = nf_queue_nf_hook_drop(net); - if (nfq) - synchronize_net(); - kvfree(p); +void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg) +{ + if (reg->pf == NFPROTO_INET) { + __nf_unregister_net_hook(net, NFPROTO_IPV4, reg); + __nf_unregister_net_hook(net, NFPROTO_IPV6, reg); + } else { + __nf_unregister_net_hook(net, reg->pf, reg); + } } EXPORT_SYMBOL(nf_unregister_net_hook); +int nf_register_net_hook(struct net *net, const struct nf_hook_ops *reg) +{ + int err; + + if (reg->pf == NFPROTO_INET) { + err = __nf_register_net_hook(net, NFPROTO_IPV4, reg); + if (err < 0) + return err; + + err = __nf_register_net_hook(net, NFPROTO_IPV6, reg); + if (err < 0) { + __nf_unregister_net_hook(net, NFPROTO_IPV4, reg); + return err; + } + } else { + err = __nf_register_net_hook(net, reg->pf, reg); + if (err < 0) + return err; + } + + return 0; +} +EXPORT_SYMBOL(nf_register_net_hook); + int nf_register_net_hooks(struct net *net, const struct nf_hook_ops *reg, unsigned int n) { @@ -395,63 +464,10 @@ EXPORT_SYMBOL(nf_register_net_hooks); void nf_unregister_net_hooks(struct net *net, const struct nf_hook_ops *reg, unsigned int hookcount) { - struct nf_hook_entries *to_free[16], *p; - struct nf_hook_entries __rcu **pp; - unsigned int i, j, n; - - mutex_lock(&nf_hook_mutex); - for (i = 0; i < hookcount; i++) { - pp = nf_hook_entry_head(net, ®[i]); - if (!pp) - continue; - - p = nf_entry_dereference(*pp); - if (WARN_ON_ONCE(!p)) - continue; - __nf_unregister_net_hook(p, ®[i]); - } - mutex_unlock(&nf_hook_mutex); - - do { - n = min_t(unsigned int, hookcount, ARRAY_SIZE(to_free)); - - mutex_lock(&nf_hook_mutex); - - for (i = 0, j = 0; i < hookcount && j < n; i++) { - pp = nf_hook_entry_head(net, ®[i]); - if (!pp) - continue; - - p = nf_entry_dereference(*pp); - if (!p) - continue; - - to_free[j] = __nf_hook_entries_try_shrink(pp); - if (to_free[j]) - ++j; - } - - mutex_unlock(&nf_hook_mutex); - - if (j) { - unsigned int nfq; - - synchronize_net(); - - /* need 2nd synchronize_net() if nfqueue is used, skb - * can get reinjected right before nf_queue_hook_drop() - */ - nfq = nf_queue_nf_hook_drop(net); - if (nfq) - synchronize_net(); - - for (i = 0; i < j; i++) - kvfree(to_free[i]); - } + unsigned int i; - reg += n; - hookcount -= n; - } while (hookcount > 0); + for (i = 0; i < hookcount; i++) + nf_unregister_net_hook(net, ®[i]); } EXPORT_SYMBOL(nf_unregister_net_hooks); @@ -569,14 +585,27 @@ void (*nf_nat_decode_session_hook)(struct sk_buff *, struct flowi *); EXPORT_SYMBOL(nf_nat_decode_session_hook); #endif -static int __net_init netfilter_net_init(struct net *net) +static void __net_init __netfilter_net_init(struct nf_hook_entries **e, int max) { - int i, h; + int h; - for (i = 0; i < ARRAY_SIZE(net->nf.hooks); i++) { - for (h = 0; h < NF_MAX_HOOKS; h++) - RCU_INIT_POINTER(net->nf.hooks[i][h], NULL); - } + for (h = 0; h < max; h++) + RCU_INIT_POINTER(e[h], NULL); +} + +static int __net_init netfilter_net_init(struct net *net) +{ + __netfilter_net_init(net->nf.hooks_ipv4, ARRAY_SIZE(net->nf.hooks_ipv4)); + __netfilter_net_init(net->nf.hooks_ipv6, ARRAY_SIZE(net->nf.hooks_ipv6)); +#ifdef CONFIG_NETFILTER_FAMILY_ARP + __netfilter_net_init(net->nf.hooks_arp, ARRAY_SIZE(net->nf.hooks_arp)); +#endif +#ifdef CONFIG_NETFILTER_FAMILY_BRIDGE + __netfilter_net_init(net->nf.hooks_bridge, ARRAY_SIZE(net->nf.hooks_bridge)); +#endif +#if IS_ENABLED(CONFIG_DECNET) + __netfilter_net_init(net->nf.hooks_decnet, ARRAY_SIZE(net->nf.hooks_decnet)); +#endif #ifdef CONFIG_PROC_FS net->nf.proc_netfilter = proc_net_mkdir(net, "netfilter", diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h index 5ca18f07683b..257ca393e6f2 100644 --- a/net/netfilter/ipset/ip_set_bitmap_gen.h +++ b/net/netfilter/ipset/ip_set_bitmap_gen.h @@ -127,14 +127,7 @@ mtype_test(struct ip_set *set, void *value, const struct ip_set_ext *ext, if (ret <= 0) return ret; - if (SET_WITH_TIMEOUT(set) && - ip_set_timeout_expired(ext_timeout(x, set))) - return 0; - if (SET_WITH_COUNTER(set)) - ip_set_update_counter(ext_counter(x, set), ext, mext, flags); - if (SET_WITH_SKBINFO(set)) - ip_set_get_skbinfo(ext_skbinfo(x, set), ext, mext, flags); - return 1; + return ip_set_match_extensions(set, ext, mext, flags, x); } static int @@ -227,6 +220,7 @@ mtype_list(const struct ip_set *set, rcu_read_lock(); for (; cb->args[IPSET_CB_ARG0] < map->elements; cb->args[IPSET_CB_ARG0]++) { + cond_resched_rcu(); id = cb->args[IPSET_CB_ARG0]; x = get_ext(set, map, id); if (!test_bit(id, map->members) || diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c index d8975a0b4282..488d6d05c65c 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ip.c +++ b/net/netfilter/ipset/ip_set_bitmap_ip.c @@ -263,12 +263,8 @@ bitmap_ip_create(struct net *net, struct ip_set *set, struct nlattr *tb[], ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &last_ip); if (ret) return ret; - if (first_ip > last_ip) { - u32 tmp = first_ip; - - first_ip = last_ip; - last_ip = tmp; - } + if (first_ip > last_ip) + swap(first_ip, last_ip); } else if (tb[IPSET_ATTR_CIDR]) { u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c index 4c279fbd2d5d..c00b6a2e8e3c 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c +++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c @@ -337,12 +337,8 @@ bitmap_ipmac_create(struct net *net, struct ip_set *set, struct nlattr *tb[], ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &last_ip); if (ret) return ret; - if (first_ip > last_ip) { - u32 tmp = first_ip; - - first_ip = last_ip; - last_ip = tmp; - } + if (first_ip > last_ip) + swap(first_ip, last_ip); } else if (tb[IPSET_ATTR_CIDR]) { u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c index 7f9bbd7c98b5..b561ca8b3659 100644 --- a/net/netfilter/ipset/ip_set_bitmap_port.c +++ b/net/netfilter/ipset/ip_set_bitmap_port.c @@ -238,12 +238,8 @@ bitmap_port_create(struct net *net, struct ip_set *set, struct nlattr *tb[], first_port = ip_set_get_h16(tb[IPSET_ATTR_PORT]); last_port = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]); - if (first_port > last_port) { - u16 tmp = first_port; - - first_port = last_port; - last_port = tmp; - } + if (first_port > last_port) + swap(first_port, last_port); elements = last_port - first_port + 1; set->dsize = ip_set_elem_len(set, tb, 0, 0); diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index cf84f7b37cd9..975a85a48d39 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -57,7 +57,7 @@ MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_IPSET); /* When the nfnl mutex is held: */ #define ip_set_dereference(p) \ - rcu_dereference_protected(p, 1) + rcu_dereference_protected(p, lockdep_nfnl_is_held(NFNL_SUBSYS_IPSET)) #define ip_set(inst, id) \ ip_set_dereference((inst)->ip_set_list)[id] @@ -472,6 +472,31 @@ ip_set_put_extensions(struct sk_buff *skb, const struct ip_set *set, } EXPORT_SYMBOL_GPL(ip_set_put_extensions); +bool +ip_set_match_extensions(struct ip_set *set, const struct ip_set_ext *ext, + struct ip_set_ext *mext, u32 flags, void *data) +{ + if (SET_WITH_TIMEOUT(set) && + ip_set_timeout_expired(ext_timeout(data, set))) + return false; + if (SET_WITH_COUNTER(set)) { + struct ip_set_counter *counter = ext_counter(data, set); + + if (flags & IPSET_FLAG_MATCH_COUNTERS && + !(ip_set_match_counter(ip_set_get_packets(counter), + mext->packets, mext->packets_op) && + ip_set_match_counter(ip_set_get_bytes(counter), + mext->bytes, mext->bytes_op))) + return false; + ip_set_update_counter(counter, ext, flags); + } + if (SET_WITH_SKBINFO(set)) + ip_set_get_skbinfo(ext_skbinfo(data, set), + ext, mext, flags); + return true; +} +EXPORT_SYMBOL_GPL(ip_set_match_extensions); + /* Creating/destroying/renaming/swapping affect the existence and * the properties of a set. All of these can be executed from userspace * only and serialized by the nfnl mutex indirectly from nfnetlink. @@ -1386,11 +1411,9 @@ dump_last: goto next_set; if (set->variant->uref) set->variant->uref(set, cb, true); - /* Fall through and add elements */ + /* fall through */ default: - rcu_read_lock_bh(); ret = set->variant->list(set, skb, cb); - rcu_read_unlock_bh(); if (!cb->args[IPSET_CB_ARG0]) /* Set is done, proceed with next one */ goto next_set; @@ -2055,6 +2078,7 @@ ip_set_net_exit(struct net *net) inst->is_deleted = true; /* flag for ip_set_nfnl_put */ + nfnl_lock(NFNL_SUBSYS_IPSET); for (i = 0; i < inst->ip_set_max; i++) { set = ip_set(inst, i); if (set) { @@ -2062,6 +2086,7 @@ ip_set_net_exit(struct net *net) ip_set_destroy_set(set); } } + nfnl_unlock(NFNL_SUBSYS_IPSET); kfree(rcu_dereference_protected(inst->ip_set_list, 1)); } @@ -2097,7 +2122,6 @@ ip_set_init(void) return ret; } - pr_info("ip_set: protocol %u\n", IPSET_PROTOCOL); return 0; } @@ -2113,3 +2137,5 @@ ip_set_fini(void) module_init(ip_set_init); module_exit(ip_set_fini); + +MODULE_DESCRIPTION("ip_set: protocol " __stringify(IPSET_PROTOCOL)); diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index efffc8eabafe..bbad940c0137 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -917,12 +917,9 @@ static inline int mtype_data_match(struct mtype_elem *data, const struct ip_set_ext *ext, struct ip_set_ext *mext, struct ip_set *set, u32 flags) { - if (SET_WITH_COUNTER(set)) - ip_set_update_counter(ext_counter(data, set), - ext, mext, flags); - if (SET_WITH_SKBINFO(set)) - ip_set_get_skbinfo(ext_skbinfo(data, set), - ext, mext, flags); + if (!ip_set_match_extensions(set, ext, mext, flags, data)) + return 0; + /* nomatch entries return -ENOTEMPTY */ return mtype_do_data_match(data); } @@ -941,9 +938,9 @@ mtype_test_cidrs(struct ip_set *set, struct mtype_elem *d, struct mtype_elem *data; #if IPSET_NET_COUNT == 2 struct mtype_elem orig = *d; - int i, j = 0, k; + int ret, i, j = 0, k; #else - int i, j = 0; + int ret, i, j = 0; #endif u32 key, multi = 0; @@ -969,18 +966,13 @@ mtype_test_cidrs(struct ip_set *set, struct mtype_elem *d, data = ahash_data(n, i, set->dsize); if (!mtype_data_equal(data, d, &multi)) continue; - if (SET_WITH_TIMEOUT(set)) { - if (!ip_set_timeout_expired( - ext_timeout(data, set))) - return mtype_data_match(data, ext, - mext, set, - flags); + ret = mtype_data_match(data, ext, mext, set, flags); + if (ret != 0) + return ret; #ifdef IP_SET_HASH_WITH_MULTI - multi = 0; + /* No match, reset multiple match flag */ + multi = 0; #endif - } else - return mtype_data_match(data, ext, - mext, set, flags); } #if IPSET_NET_COUNT == 2 } @@ -1027,12 +1019,11 @@ mtype_test(struct ip_set *set, void *value, const struct ip_set_ext *ext, if (!test_bit(i, n->used)) continue; data = ahash_data(n, i, set->dsize); - if (mtype_data_equal(data, d, &multi) && - !(SET_WITH_TIMEOUT(set) && - ip_set_timeout_expired(ext_timeout(data, set)))) { - ret = mtype_data_match(data, ext, mext, set, flags); + if (!mtype_data_equal(data, d, &multi)) + continue; + ret = mtype_data_match(data, ext, mext, set, flags); + if (ret != 0) goto out; - } } out: return ret; @@ -1143,6 +1134,7 @@ mtype_list(const struct ip_set *set, rcu_read_lock(); for (; cb->args[IPSET_CB_ARG0] < jhash_size(t->htable_bits); cb->args[IPSET_CB_ARG0]++) { + cond_resched_rcu(); incomplete = skb_tail_pointer(skb); n = rcu_dereference(hbucket(t, cb->args[IPSET_CB_ARG0])); pr_debug("cb->arg bucket: %lu, t %p n %p\n", diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c index e864681b8dc5..072a658fde04 100644 --- a/net/netfilter/ipset/ip_set_list_set.c +++ b/net/netfilter/ipset/ip_set_list_set.c @@ -55,8 +55,9 @@ list_set_ktest(struct ip_set *set, const struct sk_buff *skb, struct ip_set_adt_opt *opt, const struct ip_set_ext *ext) { struct list_set *map = set->data; + struct ip_set_ext *mext = &opt->ext; struct set_elem *e; - u32 cmdflags = opt->cmdflags; + u32 flags = opt->cmdflags; int ret; /* Don't lookup sub-counters at all */ @@ -64,21 +65,11 @@ list_set_ktest(struct ip_set *set, const struct sk_buff *skb, if (opt->cmdflags & IPSET_FLAG_SKIP_SUBCOUNTER_UPDATE) opt->cmdflags &= ~IPSET_FLAG_SKIP_COUNTER_UPDATE; list_for_each_entry_rcu(e, &map->members, list) { - if (SET_WITH_TIMEOUT(set) && - ip_set_timeout_expired(ext_timeout(e, set))) - continue; ret = ip_set_test(e->id, skb, par, opt); - if (ret > 0) { - if (SET_WITH_COUNTER(set)) - ip_set_update_counter(ext_counter(e, set), - ext, &opt->ext, - cmdflags); - if (SET_WITH_SKBINFO(set)) - ip_set_get_skbinfo(ext_skbinfo(e, set), - ext, &opt->ext, - cmdflags); - return ret; - } + if (ret <= 0) + continue; + if (ip_set_match_extensions(set, ext, mext, flags, e)) + return 1; } return 0; } diff --git a/net/netfilter/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c index 299edc6add5a..1c98c907bc63 100644 --- a/net/netfilter/ipvs/ip_vs_app.c +++ b/net/netfilter/ipvs/ip_vs_app.c @@ -595,7 +595,6 @@ static int ip_vs_app_open(struct inode *inode, struct file *file) } static const struct file_operations ip_vs_app_fops = { - .owner = THIS_MODULE, .open = ip_vs_app_open, .read = seq_read, .llseek = seq_lseek, diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index 3e053cb30070..370abbf6f421 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -322,7 +322,7 @@ ip_vs_conn_fill_param_proto(struct netns_ipvs *ipvs, { __be16 _ports[2], *pptr; - pptr = frag_safe_skb_hp(skb, iph->len, sizeof(_ports), _ports, iph); + pptr = frag_safe_skb_hp(skb, iph->len, sizeof(_ports), _ports); if (pptr == NULL) return 1; @@ -1143,7 +1143,6 @@ static int ip_vs_conn_open(struct inode *inode, struct file *file) } static const struct file_operations ip_vs_conn_fops = { - .owner = THIS_MODULE, .open = ip_vs_conn_open, .read = seq_read, .llseek = seq_lseek, @@ -1221,7 +1220,6 @@ static int ip_vs_conn_sync_open(struct inode *inode, struct file *file) } static const struct file_operations ip_vs_conn_sync_fops = { - .owner = THIS_MODULE, .open = ip_vs_conn_sync_open, .read = seq_read, .llseek = seq_lseek, diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 5cb7cac9177d..5f6f73cf2174 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -433,7 +433,7 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, /* * IPv6 frags, only the first hit here. */ - pptr = frag_safe_skb_hp(skb, iph->len, sizeof(_ports), _ports, iph); + pptr = frag_safe_skb_hp(skb, iph->len, sizeof(_ports), _ports); if (pptr == NULL) return NULL; @@ -566,7 +566,7 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, struct netns_ipvs *ipvs = svc->ipvs; struct net *net = ipvs->net; - pptr = frag_safe_skb_hp(skb, iph->len, sizeof(_ports), _ports, iph); + pptr = frag_safe_skb_hp(skb, iph->len, sizeof(_ports), _ports); if (!pptr) return NF_DROP; dport = likely(!ip_vs_iph_inverse(iph)) ? pptr[1] : pptr[0]; @@ -982,7 +982,7 @@ static int ip_vs_out_icmp_v6(struct netns_ipvs *ipvs, struct sk_buff *skb, unsigned int offset; *related = 1; - ic = frag_safe_skb_hp(skb, ipvsh->len, sizeof(_icmph), &_icmph, ipvsh); + ic = frag_safe_skb_hp(skb, ipvsh->len, sizeof(_icmph), &_icmph); if (ic == NULL) return NF_DROP; @@ -1214,7 +1214,7 @@ static struct ip_vs_conn *__ip_vs_rs_conn_out(unsigned int hooknum, return NULL; pptr = frag_safe_skb_hp(skb, iph->len, - sizeof(_ports), _ports, iph); + sizeof(_ports), _ports); if (!pptr) return NULL; @@ -1407,7 +1407,7 @@ ip_vs_out(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, in __be16 _ports[2], *pptr; pptr = frag_safe_skb_hp(skb, iph.len, - sizeof(_ports), _ports, &iph); + sizeof(_ports), _ports); if (pptr == NULL) return NF_ACCEPT; /* Not for me */ if (ip_vs_has_real_service(ipvs, af, iph.protocol, &iph.saddr, @@ -1741,7 +1741,7 @@ static int ip_vs_in_icmp_v6(struct netns_ipvs *ipvs, struct sk_buff *skb, *related = 1; - ic = frag_safe_skb_hp(skb, iph->len, sizeof(_icmph), &_icmph, iph); + ic = frag_safe_skb_hp(skb, iph->len, sizeof(_icmph), &_icmph); if (ic == NULL) return NF_DROP; diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index fff213eacf2a..5ebde4b15810 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -2116,7 +2116,6 @@ static int ip_vs_info_open(struct inode *inode, struct file *file) } static const struct file_operations ip_vs_info_fops = { - .owner = THIS_MODULE, .open = ip_vs_info_open, .read = seq_read, .llseek = seq_lseek, @@ -2161,7 +2160,6 @@ static int ip_vs_stats_seq_open(struct inode *inode, struct file *file) } static const struct file_operations ip_vs_stats_fops = { - .owner = THIS_MODULE, .open = ip_vs_stats_seq_open, .read = seq_read, .llseek = seq_lseek, @@ -2230,7 +2228,6 @@ static int ip_vs_stats_percpu_seq_open(struct inode *inode, struct file *file) } static const struct file_operations ip_vs_stats_percpu_fops = { - .owner = THIS_MODULE, .open = ip_vs_stats_percpu_seq_open, .read = seq_read, .llseek = seq_lseek, diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c index 121a321b91be..bcd9b7bde4ee 100644 --- a/net/netfilter/ipvs/ip_vs_proto_tcp.c +++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c @@ -315,6 +315,7 @@ tcp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp) switch (skb->ip_summed) { case CHECKSUM_NONE: skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0); + /* fall through */ case CHECKSUM_COMPLETE: #ifdef CONFIG_IP_VS_IPV6 if (af == AF_INET6) { diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c index 30e11cd6aa8a..c15ef7c2a1fa 100644 --- a/net/netfilter/ipvs/ip_vs_proto_udp.c +++ b/net/netfilter/ipvs/ip_vs_proto_udp.c @@ -319,6 +319,7 @@ udp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp) case CHECKSUM_NONE: skb->csum = skb_checksum(skb, udphoff, skb->len - udphoff, 0); + /* fall through */ case CHECKSUM_COMPLETE: #ifdef CONFIG_IP_VS_IPV6 if (af == AF_INET6) { diff --git a/net/netfilter/nf_conncount.c b/net/netfilter/nf_conncount.c new file mode 100644 index 000000000000..6d65389e308f --- /dev/null +++ b/net/netfilter/nf_conncount.c @@ -0,0 +1,373 @@ +/* + * count the number of connections matching an arbitrary key. + * + * (C) 2017 Red Hat GmbH + * Author: Florian Westphal <fw@strlen.de> + * + * split from xt_connlimit.c: + * (c) 2000 Gerd Knorr <kraxel@bytesex.org> + * Nov 2002: Martin Bene <martin.bene@icomedias.com>: + * only ignore TIME_WAIT or gone connections + * (C) CC Computer Consultants GmbH, 2007 + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include <linux/in.h> +#include <linux/in6.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/jhash.h> +#include <linux/slab.h> +#include <linux/list.h> +#include <linux/rbtree.h> +#include <linux/module.h> +#include <linux/random.h> +#include <linux/skbuff.h> +#include <linux/spinlock.h> +#include <linux/netfilter/nf_conntrack_tcp.h> +#include <linux/netfilter/x_tables.h> +#include <net/netfilter/nf_conntrack.h> +#include <net/netfilter/nf_conntrack_count.h> +#include <net/netfilter/nf_conntrack_core.h> +#include <net/netfilter/nf_conntrack_tuple.h> +#include <net/netfilter/nf_conntrack_zones.h> + +#define CONNCOUNT_SLOTS 256U + +#ifdef CONFIG_LOCKDEP +#define CONNCOUNT_LOCK_SLOTS 8U +#else +#define CONNCOUNT_LOCK_SLOTS 256U +#endif + +#define CONNCOUNT_GC_MAX_NODES 8 +#define MAX_KEYLEN 5 + +/* we will save the tuples of all connections we care about */ +struct nf_conncount_tuple { + struct hlist_node node; + struct nf_conntrack_tuple tuple; +}; + +struct nf_conncount_rb { + struct rb_node node; + struct hlist_head hhead; /* connections/hosts in same subnet */ + u32 key[MAX_KEYLEN]; +}; + +static spinlock_t nf_conncount_locks[CONNCOUNT_LOCK_SLOTS] __cacheline_aligned_in_smp; + +struct nf_conncount_data { + unsigned int keylen; + struct rb_root root[CONNCOUNT_SLOTS]; +}; + +static u_int32_t conncount_rnd __read_mostly; +static struct kmem_cache *conncount_rb_cachep __read_mostly; +static struct kmem_cache *conncount_conn_cachep __read_mostly; + +static inline bool already_closed(const struct nf_conn *conn) +{ + if (nf_ct_protonum(conn) == IPPROTO_TCP) + return conn->proto.tcp.state == TCP_CONNTRACK_TIME_WAIT || + conn->proto.tcp.state == TCP_CONNTRACK_CLOSE; + else + return false; +} + +static int key_diff(const u32 *a, const u32 *b, unsigned int klen) +{ + return memcmp(a, b, klen * sizeof(u32)); +} + +static bool add_hlist(struct hlist_head *head, + const struct nf_conntrack_tuple *tuple) +{ + struct nf_conncount_tuple *conn; + + conn = kmem_cache_alloc(conncount_conn_cachep, GFP_ATOMIC); + if (conn == NULL) + return false; + conn->tuple = *tuple; + hlist_add_head(&conn->node, head); + return true; +} + +static unsigned int check_hlist(struct net *net, + struct hlist_head *head, + const struct nf_conntrack_tuple *tuple, + const struct nf_conntrack_zone *zone, + bool *addit) +{ + const struct nf_conntrack_tuple_hash *found; + struct nf_conncount_tuple *conn; + struct hlist_node *n; + struct nf_conn *found_ct; + unsigned int length = 0; + + *addit = true; + + /* check the saved connections */ + hlist_for_each_entry_safe(conn, n, head, node) { + found = nf_conntrack_find_get(net, zone, &conn->tuple); + if (found == NULL) { + hlist_del(&conn->node); + kmem_cache_free(conncount_conn_cachep, conn); + continue; + } + + found_ct = nf_ct_tuplehash_to_ctrack(found); + + if (nf_ct_tuple_equal(&conn->tuple, tuple)) { + /* + * Just to be sure we have it only once in the list. + * We should not see tuples twice unless someone hooks + * this into a table without "-p tcp --syn". + */ + *addit = false; + } else if (already_closed(found_ct)) { + /* + * we do not care about connections which are + * closed already -> ditch it + */ + nf_ct_put(found_ct); + hlist_del(&conn->node); + kmem_cache_free(conncount_conn_cachep, conn); + continue; + } + + nf_ct_put(found_ct); + length++; + } + + return length; +} + +static void tree_nodes_free(struct rb_root *root, + struct nf_conncount_rb *gc_nodes[], + unsigned int gc_count) +{ + struct nf_conncount_rb *rbconn; + + while (gc_count) { + rbconn = gc_nodes[--gc_count]; + rb_erase(&rbconn->node, root); + kmem_cache_free(conncount_rb_cachep, rbconn); + } +} + +static unsigned int +count_tree(struct net *net, struct rb_root *root, + const u32 *key, u8 keylen, + u8 family, + const struct nf_conntrack_tuple *tuple, + const struct nf_conntrack_zone *zone) +{ + struct nf_conncount_rb *gc_nodes[CONNCOUNT_GC_MAX_NODES]; + struct rb_node **rbnode, *parent; + struct nf_conncount_rb *rbconn; + struct nf_conncount_tuple *conn; + unsigned int gc_count; + bool no_gc = false; + + restart: + gc_count = 0; + parent = NULL; + rbnode = &(root->rb_node); + while (*rbnode) { + int diff; + bool addit; + + rbconn = rb_entry(*rbnode, struct nf_conncount_rb, node); + + parent = *rbnode; + diff = key_diff(key, rbconn->key, keylen); + if (diff < 0) { + rbnode = &((*rbnode)->rb_left); + } else if (diff > 0) { + rbnode = &((*rbnode)->rb_right); + } else { + /* same source network -> be counted! */ + unsigned int count; + count = check_hlist(net, &rbconn->hhead, tuple, zone, &addit); + + tree_nodes_free(root, gc_nodes, gc_count); + if (!addit) + return count; + + if (!add_hlist(&rbconn->hhead, tuple)) + return 0; /* hotdrop */ + + return count + 1; + } + + if (no_gc || gc_count >= ARRAY_SIZE(gc_nodes)) + continue; + + /* only used for GC on hhead, retval and 'addit' ignored */ + check_hlist(net, &rbconn->hhead, tuple, zone, &addit); + if (hlist_empty(&rbconn->hhead)) + gc_nodes[gc_count++] = rbconn; + } + + if (gc_count) { + no_gc = true; + tree_nodes_free(root, gc_nodes, gc_count); + /* tree_node_free before new allocation permits + * allocator to re-use newly free'd object. + * + * This is a rare event; in most cases we will find + * existing node to re-use. (or gc_count is 0). + */ + goto restart; + } + + /* no match, need to insert new node */ + rbconn = kmem_cache_alloc(conncount_rb_cachep, GFP_ATOMIC); + if (rbconn == NULL) + return 0; + + conn = kmem_cache_alloc(conncount_conn_cachep, GFP_ATOMIC); + if (conn == NULL) { + kmem_cache_free(conncount_rb_cachep, rbconn); + return 0; + } + + conn->tuple = *tuple; + memcpy(rbconn->key, key, sizeof(u32) * keylen); + + INIT_HLIST_HEAD(&rbconn->hhead); + hlist_add_head(&conn->node, &rbconn->hhead); + + rb_link_node(&rbconn->node, parent, rbnode); + rb_insert_color(&rbconn->node, root); + return 1; +} + +unsigned int nf_conncount_count(struct net *net, + struct nf_conncount_data *data, + const u32 *key, + unsigned int family, + const struct nf_conntrack_tuple *tuple, + const struct nf_conntrack_zone *zone) +{ + struct rb_root *root; + int count; + u32 hash; + + hash = jhash2(key, data->keylen, conncount_rnd) % CONNCOUNT_SLOTS; + root = &data->root[hash]; + + spin_lock_bh(&nf_conncount_locks[hash % CONNCOUNT_LOCK_SLOTS]); + + count = count_tree(net, root, key, data->keylen, family, tuple, zone); + + spin_unlock_bh(&nf_conncount_locks[hash % CONNCOUNT_LOCK_SLOTS]); + + return count; +} +EXPORT_SYMBOL_GPL(nf_conncount_count); + +struct nf_conncount_data *nf_conncount_init(struct net *net, unsigned int family, + unsigned int keylen) +{ + struct nf_conncount_data *data; + int ret, i; + + if (keylen % sizeof(u32) || + keylen / sizeof(u32) > MAX_KEYLEN || + keylen == 0) + return ERR_PTR(-EINVAL); + + net_get_random_once(&conncount_rnd, sizeof(conncount_rnd)); + + data = kmalloc(sizeof(*data), GFP_KERNEL); + if (!data) + return ERR_PTR(-ENOMEM); + + ret = nf_ct_netns_get(net, family); + if (ret < 0) { + kfree(data); + return ERR_PTR(ret); + } + + for (i = 0; i < ARRAY_SIZE(data->root); ++i) + data->root[i] = RB_ROOT; + + data->keylen = keylen / sizeof(u32); + + return data; +} +EXPORT_SYMBOL_GPL(nf_conncount_init); + +static void destroy_tree(struct rb_root *r) +{ + struct nf_conncount_tuple *conn; + struct nf_conncount_rb *rbconn; + struct hlist_node *n; + struct rb_node *node; + + while ((node = rb_first(r)) != NULL) { + rbconn = rb_entry(node, struct nf_conncount_rb, node); + + rb_erase(node, r); + + hlist_for_each_entry_safe(conn, n, &rbconn->hhead, node) + kmem_cache_free(conncount_conn_cachep, conn); + + kmem_cache_free(conncount_rb_cachep, rbconn); + } +} + +void nf_conncount_destroy(struct net *net, unsigned int family, + struct nf_conncount_data *data) +{ + unsigned int i; + + nf_ct_netns_put(net, family); + + for (i = 0; i < ARRAY_SIZE(data->root); ++i) + destroy_tree(&data->root[i]); + + kfree(data); +} +EXPORT_SYMBOL_GPL(nf_conncount_destroy); + +static int __init nf_conncount_modinit(void) +{ + int i; + + BUILD_BUG_ON(CONNCOUNT_LOCK_SLOTS > CONNCOUNT_SLOTS); + BUILD_BUG_ON((CONNCOUNT_SLOTS % CONNCOUNT_LOCK_SLOTS) != 0); + + for (i = 0; i < CONNCOUNT_LOCK_SLOTS; ++i) + spin_lock_init(&nf_conncount_locks[i]); + + conncount_conn_cachep = kmem_cache_create("nf_conncount_tuple", + sizeof(struct nf_conncount_tuple), + 0, 0, NULL); + if (!conncount_conn_cachep) + return -ENOMEM; + + conncount_rb_cachep = kmem_cache_create("nf_conncount_rb", + sizeof(struct nf_conncount_rb), + 0, 0, NULL); + if (!conncount_rb_cachep) { + kmem_cache_destroy(conncount_conn_cachep); + return -ENOMEM; + } + + return 0; +} + +static void __exit nf_conncount_modexit(void) +{ + kmem_cache_destroy(conncount_conn_cachep); + kmem_cache_destroy(conncount_rb_cachep); +} + +module_init(nf_conncount_modinit); +module_exit(nf_conncount_modexit); +MODULE_AUTHOR("Jan Engelhardt <jengelh@medozas.de>"); +MODULE_AUTHOR("Florian Westphal <fw@strlen.de>"); +MODULE_DESCRIPTION("netfilter: count number of connections matching a key"); +MODULE_LICENSE("GPL"); diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 4efaa3066c78..705198de671d 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -58,8 +58,6 @@ #include "nf_internals.h" -#define NF_CONNTRACK_VERSION "0.5.0" - int (*nfnetlink_parse_nat_setup_hook)(struct nf_conn *ct, enum nf_nat_manip_type manip, const struct nlattr *attr) __read_mostly; @@ -901,6 +899,9 @@ static unsigned int early_drop_list(struct net *net, hlist_nulls_for_each_entry_rcu(h, n, head, hnnode) { tmp = nf_ct_tuplehash_to_ctrack(h); + if (test_bit(IPS_OFFLOAD_BIT, &tmp->status)) + continue; + if (nf_ct_is_expired(tmp)) { nf_ct_gc_expired(tmp); continue; @@ -975,6 +976,18 @@ static bool gc_worker_can_early_drop(const struct nf_conn *ct) return false; } +#define DAY (86400 * HZ) + +/* Set an arbitrary timeout large enough not to ever expire, this save + * us a check for the IPS_OFFLOAD_BIT from the packet path via + * nf_ct_is_expired(). + */ +static void nf_ct_offload_timeout(struct nf_conn *ct) +{ + if (nf_ct_expires(ct) < DAY / 2) + ct->timeout = nfct_time_stamp + DAY; +} + static void gc_worker(struct work_struct *work) { unsigned int min_interval = max(HZ / GC_MAX_BUCKETS_DIV, 1u); @@ -1011,6 +1024,11 @@ static void gc_worker(struct work_struct *work) tmp = nf_ct_tuplehash_to_ctrack(h); scanned++; + if (test_bit(IPS_OFFLOAD_BIT, &tmp->status)) { + nf_ct_offload_timeout(tmp); + continue; + } + if (nf_ct_is_expired(tmp)) { nf_ct_gc_expired(tmp); expired_count++; @@ -2048,10 +2066,6 @@ int nf_conntrack_init_start(void) if (!nf_conntrack_cachep) goto err_cachep; - printk(KERN_INFO "nf_conntrack version %s (%u buckets, %d max)\n", - NF_CONNTRACK_VERSION, nf_conntrack_htable_size, - nf_conntrack_max); - ret = nf_conntrack_expect_init(); if (ret < 0) goto err_expect; diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index d6748a8a79c5..8ef21d9f9a00 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -649,7 +649,6 @@ static int exp_open(struct inode *inode, struct file *file) } static const struct file_operations exp_file_ops = { - .owner = THIS_MODULE, .open = exp_open, .read = seq_read, .llseek = seq_lseek, diff --git a/net/netfilter/nf_conntrack_h323_asn1.c b/net/netfilter/nf_conntrack_h323_asn1.c index dc6347342e34..1601275efe2d 100644 --- a/net/netfilter/nf_conntrack_h323_asn1.c +++ b/net/netfilter/nf_conntrack_h323_asn1.c @@ -1,4 +1,4 @@ -/**************************************************************************** +/* * ip_conntrack_helper_h323_asn1.c - BER and PER decoding library for H.323 * conntrack/NAT module. * @@ -8,7 +8,7 @@ * * See ip_conntrack_helper_h323_asn1.h for details. * - ****************************************************************************/ + */ #ifdef __KERNEL__ #include <linux/kernel.h> @@ -140,14 +140,15 @@ static const decoder_t Decoders[] = { decode_choice, }; -/**************************************************************************** +/* * H.323 Types - ****************************************************************************/ + */ #include "nf_conntrack_h323_types.c" -/**************************************************************************** +/* * Functions - ****************************************************************************/ + */ + /* Assume bs is aligned && v < 16384 */ static unsigned int get_len(struct bitstr *bs) { @@ -177,7 +178,6 @@ static int nf_h323_error_boundary(struct bitstr *bs, size_t bytes, size_t bits) return 0; } -/****************************************************************************/ static unsigned int get_bit(struct bitstr *bs) { unsigned int b = (*bs->cur) & (0x80 >> bs->bit); @@ -187,7 +187,6 @@ static unsigned int get_bit(struct bitstr *bs) return b; } -/****************************************************************************/ /* Assume b <= 8 */ static unsigned int get_bits(struct bitstr *bs, unsigned int b) { @@ -213,7 +212,6 @@ static unsigned int get_bits(struct bitstr *bs, unsigned int b) return v; } -/****************************************************************************/ /* Assume b <= 32 */ static unsigned int get_bitmap(struct bitstr *bs, unsigned int b) { @@ -251,9 +249,9 @@ static unsigned int get_bitmap(struct bitstr *bs, unsigned int b) return v; } -/**************************************************************************** +/* * Assume bs is aligned and sizeof(unsigned int) == 4 - ****************************************************************************/ + */ static unsigned int get_uint(struct bitstr *bs, int b) { unsigned int v = 0; @@ -262,12 +260,15 @@ static unsigned int get_uint(struct bitstr *bs, int b) case 4: v |= *bs->cur++; v <<= 8; + /* fall through */ case 3: v |= *bs->cur++; v <<= 8; + /* fall through */ case 2: v |= *bs->cur++; v <<= 8; + /* fall through */ case 1: v |= *bs->cur++; break; @@ -275,7 +276,6 @@ static unsigned int get_uint(struct bitstr *bs, int b) return v; } -/****************************************************************************/ static int decode_nul(struct bitstr *bs, const struct field_t *f, char *base, int level) { @@ -284,7 +284,6 @@ static int decode_nul(struct bitstr *bs, const struct field_t *f, return H323_ERROR_NONE; } -/****************************************************************************/ static int decode_bool(struct bitstr *bs, const struct field_t *f, char *base, int level) { @@ -296,7 +295,6 @@ static int decode_bool(struct bitstr *bs, const struct field_t *f, return H323_ERROR_NONE; } -/****************************************************************************/ static int decode_oid(struct bitstr *bs, const struct field_t *f, char *base, int level) { @@ -316,7 +314,6 @@ static int decode_oid(struct bitstr *bs, const struct field_t *f, return H323_ERROR_NONE; } -/****************************************************************************/ static int decode_int(struct bitstr *bs, const struct field_t *f, char *base, int level) { @@ -364,7 +361,6 @@ static int decode_int(struct bitstr *bs, const struct field_t *f, return H323_ERROR_NONE; } -/****************************************************************************/ static int decode_enum(struct bitstr *bs, const struct field_t *f, char *base, int level) { @@ -381,7 +377,6 @@ static int decode_enum(struct bitstr *bs, const struct field_t *f, return H323_ERROR_NONE; } -/****************************************************************************/ static int decode_bitstr(struct bitstr *bs, const struct field_t *f, char *base, int level) { @@ -418,7 +413,6 @@ static int decode_bitstr(struct bitstr *bs, const struct field_t *f, return H323_ERROR_NONE; } -/****************************************************************************/ static int decode_numstr(struct bitstr *bs, const struct field_t *f, char *base, int level) { @@ -439,7 +433,6 @@ static int decode_numstr(struct bitstr *bs, const struct field_t *f, return H323_ERROR_NONE; } -/****************************************************************************/ static int decode_octstr(struct bitstr *bs, const struct field_t *f, char *base, int level) { @@ -493,7 +486,6 @@ static int decode_octstr(struct bitstr *bs, const struct field_t *f, return H323_ERROR_NONE; } -/****************************************************************************/ static int decode_bmpstr(struct bitstr *bs, const struct field_t *f, char *base, int level) { @@ -523,7 +515,6 @@ static int decode_bmpstr(struct bitstr *bs, const struct field_t *f, return H323_ERROR_NONE; } -/****************************************************************************/ static int decode_seq(struct bitstr *bs, const struct field_t *f, char *base, int level) { @@ -653,7 +644,6 @@ static int decode_seq(struct bitstr *bs, const struct field_t *f, return H323_ERROR_NONE; } -/****************************************************************************/ static int decode_seqof(struct bitstr *bs, const struct field_t *f, char *base, int level) { @@ -750,8 +740,6 @@ static int decode_seqof(struct bitstr *bs, const struct field_t *f, return H323_ERROR_NONE; } - -/****************************************************************************/ static int decode_choice(struct bitstr *bs, const struct field_t *f, char *base, int level) { @@ -833,7 +821,6 @@ static int decode_choice(struct bitstr *bs, const struct field_t *f, return H323_ERROR_NONE; } -/****************************************************************************/ int DecodeRasMessage(unsigned char *buf, size_t sz, RasMessage *ras) { static const struct field_t ras_message = { @@ -849,7 +836,6 @@ int DecodeRasMessage(unsigned char *buf, size_t sz, RasMessage *ras) return decode_choice(&bs, &ras_message, (char *) ras, 0); } -/****************************************************************************/ static int DecodeH323_UserInformation(unsigned char *buf, unsigned char *beg, size_t sz, H323_UserInformation *uuie) { @@ -867,7 +853,6 @@ static int DecodeH323_UserInformation(unsigned char *buf, unsigned char *beg, return decode_seq(&bs, &h323_userinformation, (char *) uuie, 0); } -/****************************************************************************/ int DecodeMultimediaSystemControlMessage(unsigned char *buf, size_t sz, MultimediaSystemControlMessage * mscm) @@ -886,7 +871,6 @@ int DecodeMultimediaSystemControlMessage(unsigned char *buf, size_t sz, (char *) mscm, 0); } -/****************************************************************************/ int DecodeQ931(unsigned char *buf, size_t sz, Q931 *q931) { unsigned char *p = buf; diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c index f71f0d2558fd..005589c6d0f6 100644 --- a/net/netfilter/nf_conntrack_h323_main.c +++ b/net/netfilter/nf_conntrack_h323_main.c @@ -24,6 +24,7 @@ #include <linux/skbuff.h> #include <net/route.h> #include <net/ip6_route.h> +#include <linux/netfilter_ipv6.h> #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_core.h> @@ -115,7 +116,6 @@ static struct nf_conntrack_helper nf_conntrack_helper_h245; static struct nf_conntrack_helper nf_conntrack_helper_q931[]; static struct nf_conntrack_helper nf_conntrack_helper_ras[]; -/****************************************************************************/ static int get_tpkt_data(struct sk_buff *skb, unsigned int protoff, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, int *datalen, int *dataoff) @@ -219,7 +219,6 @@ static int get_tpkt_data(struct sk_buff *skb, unsigned int protoff, return 0; } -/****************************************************************************/ static int get_h245_addr(struct nf_conn *ct, const unsigned char *data, H245_TransportAddress *taddr, union nf_inet_addr *addr, __be16 *port) @@ -254,7 +253,6 @@ static int get_h245_addr(struct nf_conn *ct, const unsigned char *data, return 1; } -/****************************************************************************/ static int expect_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned int protoff, @@ -328,7 +326,6 @@ static int expect_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct, return ret; } -/****************************************************************************/ static int expect_t120(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, @@ -380,7 +377,6 @@ static int expect_t120(struct sk_buff *skb, return ret; } -/****************************************************************************/ static int process_h245_channel(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, @@ -410,7 +406,6 @@ static int process_h245_channel(struct sk_buff *skb, return 0; } -/****************************************************************************/ static int process_olc(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned int protoff, @@ -472,7 +467,6 @@ static int process_olc(struct sk_buff *skb, struct nf_conn *ct, return 0; } -/****************************************************************************/ static int process_olca(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned int protoff, unsigned char **data, int dataoff, @@ -542,7 +536,6 @@ static int process_olca(struct sk_buff *skb, struct nf_conn *ct, return 0; } -/****************************************************************************/ static int process_h245(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned int protoff, unsigned char **data, int dataoff, @@ -578,7 +571,6 @@ static int process_h245(struct sk_buff *skb, struct nf_conn *ct, return 0; } -/****************************************************************************/ static int h245_help(struct sk_buff *skb, unsigned int protoff, struct nf_conn *ct, enum ip_conntrack_info ctinfo) { @@ -628,7 +620,6 @@ static int h245_help(struct sk_buff *skb, unsigned int protoff, return NF_DROP; } -/****************************************************************************/ static const struct nf_conntrack_expect_policy h245_exp_policy = { .max_expected = H323_RTP_CHANNEL_MAX * 4 + 2 /* T.120 */, .timeout = 240, @@ -643,7 +634,6 @@ static struct nf_conntrack_helper nf_conntrack_helper_h245 __read_mostly = { .expect_policy = &h245_exp_policy, }; -/****************************************************************************/ int get_h225_addr(struct nf_conn *ct, unsigned char *data, TransportAddress *taddr, union nf_inet_addr *addr, __be16 *port) @@ -675,7 +665,6 @@ int get_h225_addr(struct nf_conn *ct, unsigned char *data, return 1; } -/****************************************************************************/ static int expect_h245(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned int protoff, unsigned char **data, int dataoff, @@ -726,20 +715,15 @@ static int expect_h245(struct sk_buff *skb, struct nf_conn *ct, } /* If the calling party is on the same side of the forward-to party, - * we don't need to track the second call */ + * we don't need to track the second call + */ static int callforward_do_filter(struct net *net, const union nf_inet_addr *src, const union nf_inet_addr *dst, u_int8_t family) { - const struct nf_afinfo *afinfo; int ret = 0; - /* rcu_read_lock()ed by nf_hook_thresh */ - afinfo = nf_get_afinfo(family); - if (!afinfo) - return 0; - switch (family) { case AF_INET: { struct flowi4 fl1, fl2; @@ -750,10 +734,10 @@ static int callforward_do_filter(struct net *net, memset(&fl2, 0, sizeof(fl2)); fl2.daddr = dst->ip; - if (!afinfo->route(net, (struct dst_entry **)&rt1, - flowi4_to_flowi(&fl1), false)) { - if (!afinfo->route(net, (struct dst_entry **)&rt2, - flowi4_to_flowi(&fl2), false)) { + if (!nf_ip_route(net, (struct dst_entry **)&rt1, + flowi4_to_flowi(&fl1), false)) { + if (!nf_ip_route(net, (struct dst_entry **)&rt2, + flowi4_to_flowi(&fl2), false)) { if (rt_nexthop(rt1, fl1.daddr) == rt_nexthop(rt2, fl2.daddr) && rt1->dst.dev == rt2->dst.dev) @@ -766,18 +750,23 @@ static int callforward_do_filter(struct net *net, } #if IS_ENABLED(CONFIG_NF_CONNTRACK_IPV6) case AF_INET6: { - struct flowi6 fl1, fl2; + const struct nf_ipv6_ops *v6ops; struct rt6_info *rt1, *rt2; + struct flowi6 fl1, fl2; + + v6ops = nf_get_ipv6_ops(); + if (!v6ops) + return 0; memset(&fl1, 0, sizeof(fl1)); fl1.daddr = src->in6; memset(&fl2, 0, sizeof(fl2)); fl2.daddr = dst->in6; - if (!afinfo->route(net, (struct dst_entry **)&rt1, - flowi6_to_flowi(&fl1), false)) { - if (!afinfo->route(net, (struct dst_entry **)&rt2, - flowi6_to_flowi(&fl2), false)) { + if (!v6ops->route(net, (struct dst_entry **)&rt1, + flowi6_to_flowi(&fl1), false)) { + if (!v6ops->route(net, (struct dst_entry **)&rt2, + flowi6_to_flowi(&fl2), false)) { if (ipv6_addr_equal(rt6_nexthop(rt1, &fl1.daddr), rt6_nexthop(rt2, &fl2.daddr)) && rt1->dst.dev == rt2->dst.dev) @@ -794,7 +783,6 @@ static int callforward_do_filter(struct net *net, } -/****************************************************************************/ static int expect_callforwarding(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, @@ -815,7 +803,8 @@ static int expect_callforwarding(struct sk_buff *skb, return 0; /* If the calling party is on the same side of the forward-to party, - * we don't need to track the second call */ + * we don't need to track the second call + */ if (callforward_filter && callforward_do_filter(net, &addr, &ct->tuplehash[!dir].tuple.src.u3, nf_ct_l3num(ct))) { @@ -854,7 +843,6 @@ static int expect_callforwarding(struct sk_buff *skb, return ret; } -/****************************************************************************/ static int process_setup(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned int protoff, @@ -925,7 +913,6 @@ static int process_setup(struct sk_buff *skb, struct nf_conn *ct, return 0; } -/****************************************************************************/ static int process_callproceeding(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, @@ -958,7 +945,6 @@ static int process_callproceeding(struct sk_buff *skb, return 0; } -/****************************************************************************/ static int process_connect(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned int protoff, @@ -990,7 +976,6 @@ static int process_connect(struct sk_buff *skb, struct nf_conn *ct, return 0; } -/****************************************************************************/ static int process_alerting(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned int protoff, @@ -1022,7 +1007,6 @@ static int process_alerting(struct sk_buff *skb, struct nf_conn *ct, return 0; } -/****************************************************************************/ static int process_facility(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned int protoff, @@ -1063,7 +1047,6 @@ static int process_facility(struct sk_buff *skb, struct nf_conn *ct, return 0; } -/****************************************************************************/ static int process_progress(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned int protoff, @@ -1095,7 +1078,6 @@ static int process_progress(struct sk_buff *skb, struct nf_conn *ct, return 0; } -/****************************************************************************/ static int process_q931(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned int protoff, unsigned char **data, int dataoff, @@ -1154,7 +1136,6 @@ static int process_q931(struct sk_buff *skb, struct nf_conn *ct, return 0; } -/****************************************************************************/ static int q931_help(struct sk_buff *skb, unsigned int protoff, struct nf_conn *ct, enum ip_conntrack_info ctinfo) { @@ -1203,7 +1184,6 @@ static int q931_help(struct sk_buff *skb, unsigned int protoff, return NF_DROP; } -/****************************************************************************/ static const struct nf_conntrack_expect_policy q931_exp_policy = { /* T.120 and H.245 */ .max_expected = H323_RTP_CHANNEL_MAX * 4 + 4, @@ -1231,7 +1211,6 @@ static struct nf_conntrack_helper nf_conntrack_helper_q931[] __read_mostly = { }, }; -/****************************************************************************/ static unsigned char *get_udp_data(struct sk_buff *skb, unsigned int protoff, int *datalen) { @@ -1249,7 +1228,6 @@ static unsigned char *get_udp_data(struct sk_buff *skb, unsigned int protoff, return skb_header_pointer(skb, dataoff, *datalen, h323_buffer); } -/****************************************************************************/ static struct nf_conntrack_expect *find_expect(struct nf_conn *ct, union nf_inet_addr *addr, __be16 port) @@ -1270,7 +1248,6 @@ static struct nf_conntrack_expect *find_expect(struct nf_conn *ct, return NULL; } -/****************************************************************************/ static int expect_q931(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned int protoff, unsigned char **data, @@ -1328,7 +1305,6 @@ static int expect_q931(struct sk_buff *skb, struct nf_conn *ct, return ret; } -/****************************************************************************/ static int process_grq(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned int protoff, @@ -1346,7 +1322,6 @@ static int process_grq(struct sk_buff *skb, struct nf_conn *ct, return 0; } -/****************************************************************************/ static int process_gcf(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned int protoff, @@ -1391,7 +1366,6 @@ static int process_gcf(struct sk_buff *skb, struct nf_conn *ct, return ret; } -/****************************************************************************/ static int process_rrq(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned int protoff, @@ -1428,7 +1402,6 @@ static int process_rrq(struct sk_buff *skb, struct nf_conn *ct, return 0; } -/****************************************************************************/ static int process_rcf(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned int protoff, @@ -1480,7 +1453,6 @@ static int process_rcf(struct sk_buff *skb, struct nf_conn *ct, return 0; } -/****************************************************************************/ static int process_urq(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned int protoff, @@ -1514,7 +1486,6 @@ static int process_urq(struct sk_buff *skb, struct nf_conn *ct, return 0; } -/****************************************************************************/ static int process_arq(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned int protoff, @@ -1559,7 +1530,6 @@ static int process_arq(struct sk_buff *skb, struct nf_conn *ct, return 0; } -/****************************************************************************/ static int process_acf(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned int protoff, @@ -1608,7 +1578,6 @@ static int process_acf(struct sk_buff *skb, struct nf_conn *ct, return ret; } -/****************************************************************************/ static int process_lrq(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned int protoff, @@ -1626,7 +1595,6 @@ static int process_lrq(struct sk_buff *skb, struct nf_conn *ct, return 0; } -/****************************************************************************/ static int process_lcf(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned int protoff, @@ -1666,7 +1634,6 @@ static int process_lcf(struct sk_buff *skb, struct nf_conn *ct, return ret; } -/****************************************************************************/ static int process_irr(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned int protoff, @@ -1700,7 +1667,6 @@ static int process_irr(struct sk_buff *skb, struct nf_conn *ct, return 0; } -/****************************************************************************/ static int process_ras(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned int protoff, @@ -1745,7 +1711,6 @@ static int process_ras(struct sk_buff *skb, struct nf_conn *ct, return 0; } -/****************************************************************************/ static int ras_help(struct sk_buff *skb, unsigned int protoff, struct nf_conn *ct, enum ip_conntrack_info ctinfo) { @@ -1788,7 +1753,6 @@ static int ras_help(struct sk_buff *skb, unsigned int protoff, return NF_DROP; } -/****************************************************************************/ static const struct nf_conntrack_expect_policy ras_exp_policy = { .max_expected = 32, .timeout = 240, @@ -1849,7 +1813,6 @@ static void __exit h323_helper_exit(void) nf_conntrack_helper_unregister(&nf_conntrack_helper_h245); } -/****************************************************************************/ static void __exit nf_conntrack_h323_fini(void) { h323_helper_exit(); @@ -1857,7 +1820,6 @@ static void __exit nf_conntrack_h323_fini(void) pr_debug("nf_ct_h323: fini\n"); } -/****************************************************************************/ static int __init nf_conntrack_h323_init(void) { int ret; @@ -1877,7 +1839,6 @@ err1: return ret; } -/****************************************************************************/ module_init(nf_conntrack_h323_init); module_exit(nf_conntrack_h323_fini); diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 382d49792f42..dd177ebee9aa 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -57,8 +57,6 @@ MODULE_LICENSE("GPL"); -static char __initdata version[] = "0.93"; - static int ctnetlink_dump_tuples_proto(struct sk_buff *skb, const struct nf_conntrack_tuple *tuple, const struct nf_conntrack_l4proto *l4proto) @@ -544,7 +542,7 @@ static size_t ctnetlink_proto_size(const struct nf_conn *ct) len *= 3u; /* ORIG, REPLY, MASTER */ l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct)); - len += l4proto->nla_size; + len += l4proto->nlattr_size; if (l4proto->nlattr_tuple_size) { len4 = l4proto->nlattr_tuple_size(); len4 *= 3u; /* ORIG, REPLY, MASTER */ @@ -1110,6 +1108,14 @@ static const struct nla_policy ct_nla_policy[CTA_MAX+1] = { .len = NF_CT_LABELS_MAX_SIZE }, }; +static int ctnetlink_flush_iterate(struct nf_conn *ct, void *data) +{ + if (test_bit(IPS_OFFLOAD_BIT, &ct->status)) + return 0; + + return ctnetlink_filter_match(ct, data); +} + static int ctnetlink_flush_conntrack(struct net *net, const struct nlattr * const cda[], u32 portid, int report) @@ -1122,7 +1128,7 @@ static int ctnetlink_flush_conntrack(struct net *net, return PTR_ERR(filter); } - nf_ct_iterate_cleanup_net(net, ctnetlink_filter_match, filter, + nf_ct_iterate_cleanup_net(net, ctnetlink_flush_iterate, filter, portid, report); kfree(filter); @@ -1168,6 +1174,11 @@ static int ctnetlink_del_conntrack(struct net *net, struct sock *ctnl, ct = nf_ct_tuplehash_to_ctrack(h); + if (test_bit(IPS_OFFLOAD_BIT, &ct->status)) { + nf_ct_put(ct); + return -EBUSY; + } + if (cda[CTA_ID]) { u_int32_t id = ntohl(nla_get_be32(cda[CTA_ID])); if (id != (u32)(unsigned long)ct) { @@ -3412,7 +3423,6 @@ static int __init ctnetlink_init(void) { int ret; - pr_info("ctnetlink v%s: registering with nfnetlink.\n", version); ret = nfnetlink_subsys_register(&ctnl_subsys); if (ret < 0) { pr_err("ctnetlink_init: cannot register with nfnetlink.\n"); @@ -3446,8 +3456,6 @@ err_out: static void __exit ctnetlink_exit(void) { - pr_info("ctnetlink: unregistering from nfnetlink.\n"); - unregister_pernet_subsys(&ctnetlink_net_ops); nfnetlink_subsys_unregister(&ctnl_exp_subsys); nfnetlink_subsys_unregister(&ctnl_subsys); diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index c8e9c9503a08..afdeca53e88b 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -385,14 +385,14 @@ void nf_ct_l4proto_unregister_sysctl(struct net *net, /* FIXME: Allow NULL functions and sub in pointers to generic for them. --RR */ -int nf_ct_l4proto_register_one(struct nf_conntrack_l4proto *l4proto) +int nf_ct_l4proto_register_one(const struct nf_conntrack_l4proto *l4proto) { int ret = 0; if (l4proto->l3proto >= ARRAY_SIZE(nf_ct_protos)) return -EBUSY; - if ((l4proto->to_nlattr && !l4proto->nlattr_size) || + if ((l4proto->to_nlattr && l4proto->nlattr_size == 0) || (l4proto->tuple_to_nlattr && !l4proto->nlattr_tuple_size)) return -EINVAL; @@ -428,10 +428,6 @@ int nf_ct_l4proto_register_one(struct nf_conntrack_l4proto *l4proto) goto out_unlock; } - l4proto->nla_size = 0; - if (l4proto->nlattr_size) - l4proto->nla_size += l4proto->nlattr_size(); - rcu_assign_pointer(nf_ct_protos[l4proto->l3proto][l4proto->l4proto], l4proto); out_unlock: @@ -502,7 +498,7 @@ void nf_ct_l4proto_pernet_unregister_one(struct net *net, } EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_unregister_one); -int nf_ct_l4proto_register(struct nf_conntrack_l4proto *l4proto[], +int nf_ct_l4proto_register(const struct nf_conntrack_l4proto * const l4proto[], unsigned int num_proto) { int ret = -EINVAL, ver; @@ -524,7 +520,7 @@ int nf_ct_l4proto_register(struct nf_conntrack_l4proto *l4proto[], EXPORT_SYMBOL_GPL(nf_ct_l4proto_register); int nf_ct_l4proto_pernet_register(struct net *net, - struct nf_conntrack_l4proto *const l4proto[], + const struct nf_conntrack_l4proto *const l4proto[], unsigned int num_proto) { int ret = -EINVAL; @@ -545,7 +541,7 @@ int nf_ct_l4proto_pernet_register(struct net *net, } EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_register); -void nf_ct_l4proto_unregister(struct nf_conntrack_l4proto *l4proto[], +void nf_ct_l4proto_unregister(const struct nf_conntrack_l4proto * const l4proto[], unsigned int num_proto) { mutex_lock(&nf_ct_proto_mutex); @@ -555,12 +551,12 @@ void nf_ct_l4proto_unregister(struct nf_conntrack_l4proto *l4proto[], synchronize_net(); /* Remove all contrack entries for this protocol */ - nf_ct_iterate_destroy(kill_l4proto, l4proto); + nf_ct_iterate_destroy(kill_l4proto, (void *)l4proto); } EXPORT_SYMBOL_GPL(nf_ct_l4proto_unregister); void nf_ct_l4proto_pernet_unregister(struct net *net, - struct nf_conntrack_l4proto *const l4proto[], + const struct nf_conntrack_l4proto *const l4proto[], unsigned int num_proto) { while (num_proto-- != 0) diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c index 2a446f4a554c..abe647d5b8c6 100644 --- a/net/netfilter/nf_conntrack_proto_dccp.c +++ b/net/netfilter/nf_conntrack_proto_dccp.c @@ -654,6 +654,12 @@ static const struct nla_policy dccp_nla_policy[CTA_PROTOINFO_DCCP_MAX + 1] = { [CTA_PROTOINFO_DCCP_PAD] = { .type = NLA_UNSPEC }, }; +#define DCCP_NLATTR_SIZE ( \ + NLA_ALIGN(NLA_HDRLEN + 1) + \ + NLA_ALIGN(NLA_HDRLEN + 1) + \ + NLA_ALIGN(NLA_HDRLEN + sizeof(u64)) + \ + NLA_ALIGN(NLA_HDRLEN + 0)) + static int nlattr_to_dccp(struct nlattr *cda[], struct nf_conn *ct) { struct nlattr *attr = cda[CTA_PROTOINFO_DCCP]; @@ -691,13 +697,6 @@ static int nlattr_to_dccp(struct nlattr *cda[], struct nf_conn *ct) spin_unlock_bh(&ct->lock); return 0; } - -static int dccp_nlattr_size(void) -{ - return nla_total_size(0) /* CTA_PROTOINFO_DCCP */ - + nla_policy_len(dccp_nla_policy, CTA_PROTOINFO_DCCP_MAX + 1); -} - #endif #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) @@ -862,7 +861,7 @@ static struct nf_proto_net *dccp_get_net_proto(struct net *net) return &net->ct.nf_ct_proto.dccp.pn; } -struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp4 __read_mostly = { +const struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp4 = { .l3proto = AF_INET, .l4proto = IPPROTO_DCCP, .pkt_to_tuple = dccp_pkt_to_tuple, @@ -876,8 +875,8 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp4 __read_mostly = { .print_conntrack = dccp_print_conntrack, #endif #if IS_ENABLED(CONFIG_NF_CT_NETLINK) + .nlattr_size = DCCP_NLATTR_SIZE, .to_nlattr = dccp_to_nlattr, - .nlattr_size = dccp_nlattr_size, .from_nlattr = nlattr_to_dccp, .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, .nlattr_tuple_size = nf_ct_port_nlattr_tuple_size, @@ -898,7 +897,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp4 __read_mostly = { }; EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_dccp4); -struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp6 __read_mostly = { +const struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp6 = { .l3proto = AF_INET6, .l4proto = IPPROTO_DCCP, .pkt_to_tuple = dccp_pkt_to_tuple, @@ -912,8 +911,8 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp6 __read_mostly = { .print_conntrack = dccp_print_conntrack, #endif #if IS_ENABLED(CONFIG_NF_CT_NETLINK) + .nlattr_size = DCCP_NLATTR_SIZE, .to_nlattr = dccp_to_nlattr, - .nlattr_size = dccp_nlattr_size, .from_nlattr = nlattr_to_dccp, .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, .nlattr_tuple_size = nf_ct_port_nlattr_tuple_size, diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c index 1f86ddf6649a..6c6896d21cd7 100644 --- a/net/netfilter/nf_conntrack_proto_generic.c +++ b/net/netfilter/nf_conntrack_proto_generic.c @@ -12,7 +12,7 @@ #include <linux/netfilter.h> #include <net/netfilter/nf_conntrack_l4proto.h> -static unsigned int nf_ct_generic_timeout __read_mostly = 600*HZ; +static const unsigned int nf_ct_generic_timeout = 600*HZ; static bool nf_generic_should_process(u8 proto) { @@ -163,7 +163,7 @@ static struct nf_proto_net *generic_get_net_proto(struct net *net) return &net->ct.nf_ct_proto.generic.pn; } -struct nf_conntrack_l4proto nf_conntrack_l4proto_generic __read_mostly = +const struct nf_conntrack_l4proto nf_conntrack_l4proto_generic = { .l3proto = PF_UNSPEC, .l4proto = 255, diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c index a2503005d80b..d049ea5a3770 100644 --- a/net/netfilter/nf_conntrack_proto_gre.c +++ b/net/netfilter/nf_conntrack_proto_gre.c @@ -48,7 +48,7 @@ enum grep_conntrack { GRE_CT_MAX }; -static unsigned int gre_timeouts[GRE_CT_MAX] = { +static const unsigned int gre_timeouts[GRE_CT_MAX] = { [GRE_CT_UNREPLIED] = 30*HZ, [GRE_CT_REPLIED] = 180*HZ, }; @@ -352,7 +352,7 @@ static int gre_init_net(struct net *net, u_int16_t proto) } /* protocol helper struct */ -static struct nf_conntrack_l4proto nf_conntrack_l4proto_gre4 __read_mostly = { +static const struct nf_conntrack_l4proto nf_conntrack_l4proto_gre4 = { .l3proto = AF_INET, .l4proto = IPPROTO_GRE, .pkt_to_tuple = gre_pkt_to_tuple, diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index 80faf04ddf15..fb9a35d16069 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -52,7 +52,7 @@ static const char *const sctp_conntrack_names[] = { #define HOURS * 60 MINS #define DAYS * 24 HOURS -static unsigned int sctp_timeouts[SCTP_CONNTRACK_MAX] __read_mostly = { +static const unsigned int sctp_timeouts[SCTP_CONNTRACK_MAX] = { [SCTP_CONNTRACK_CLOSED] = 10 SECS, [SCTP_CONNTRACK_COOKIE_WAIT] = 3 SECS, [SCTP_CONNTRACK_COOKIE_ECHOED] = 3 SECS, @@ -578,6 +578,11 @@ static const struct nla_policy sctp_nla_policy[CTA_PROTOINFO_SCTP_MAX+1] = { [CTA_PROTOINFO_SCTP_VTAG_REPLY] = { .type = NLA_U32 }, }; +#define SCTP_NLATTR_SIZE ( \ + NLA_ALIGN(NLA_HDRLEN + 1) + \ + NLA_ALIGN(NLA_HDRLEN + 4) + \ + NLA_ALIGN(NLA_HDRLEN + 4)) + static int nlattr_to_sctp(struct nlattr *cda[], struct nf_conn *ct) { struct nlattr *attr = cda[CTA_PROTOINFO_SCTP]; @@ -608,12 +613,6 @@ static int nlattr_to_sctp(struct nlattr *cda[], struct nf_conn *ct) return 0; } - -static int sctp_nlattr_size(void) -{ - return nla_total_size(0) /* CTA_PROTOINFO_SCTP */ - + nla_policy_len(sctp_nla_policy, CTA_PROTOINFO_SCTP_MAX + 1); -} #endif #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) @@ -778,7 +777,7 @@ static struct nf_proto_net *sctp_get_net_proto(struct net *net) return &net->ct.nf_ct_proto.sctp.pn; } -struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 __read_mostly = { +const struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 = { .l3proto = PF_INET, .l4proto = IPPROTO_SCTP, .pkt_to_tuple = sctp_pkt_to_tuple, @@ -793,8 +792,8 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 __read_mostly = { .can_early_drop = sctp_can_early_drop, .me = THIS_MODULE, #if IS_ENABLED(CONFIG_NF_CT_NETLINK) + .nlattr_size = SCTP_NLATTR_SIZE, .to_nlattr = sctp_to_nlattr, - .nlattr_size = sctp_nlattr_size, .from_nlattr = nlattr_to_sctp, .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, .nlattr_tuple_size = nf_ct_port_nlattr_tuple_size, @@ -815,7 +814,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 __read_mostly = { }; EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_sctp4); -struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6 __read_mostly = { +const struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6 = { .l3proto = PF_INET6, .l4proto = IPPROTO_SCTP, .pkt_to_tuple = sctp_pkt_to_tuple, @@ -830,8 +829,8 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6 __read_mostly = { .can_early_drop = sctp_can_early_drop, .me = THIS_MODULE, #if IS_ENABLED(CONFIG_NF_CT_NETLINK) + .nlattr_size = SCTP_NLATTR_SIZE, .to_nlattr = sctp_to_nlattr, - .nlattr_size = sctp_nlattr_size, .from_nlattr = nlattr_to_sctp, .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, .nlattr_tuple_size = nf_ct_port_nlattr_tuple_size, diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 37ef35b861f2..e97cdc1cf98c 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -68,7 +68,7 @@ static const char *const tcp_conntrack_names[] = { #define HOURS * 60 MINS #define DAYS * 24 HOURS -static unsigned int tcp_timeouts[TCP_CONNTRACK_TIMEOUT_MAX] __read_mostly = { +static const unsigned int tcp_timeouts[TCP_CONNTRACK_TIMEOUT_MAX] = { [TCP_CONNTRACK_SYN_SENT] = 2 MINS, [TCP_CONNTRACK_SYN_RECV] = 60 SECS, [TCP_CONNTRACK_ESTABLISHED] = 5 DAYS, @@ -305,6 +305,9 @@ static bool tcp_invert_tuple(struct nf_conntrack_tuple *tuple, /* Print out the private part of the conntrack. */ static void tcp_print_conntrack(struct seq_file *s, struct nf_conn *ct) { + if (test_bit(IPS_OFFLOAD_BIT, &ct->status)) + return; + seq_printf(s, "%s ", tcp_conntrack_names[ct->proto.tcp.state]); } #endif @@ -1222,6 +1225,12 @@ static const struct nla_policy tcp_nla_policy[CTA_PROTOINFO_TCP_MAX+1] = { [CTA_PROTOINFO_TCP_FLAGS_REPLY] = { .len = sizeof(struct nf_ct_tcp_flags) }, }; +#define TCP_NLATTR_SIZE ( \ + NLA_ALIGN(NLA_HDRLEN + 1) + \ + NLA_ALIGN(NLA_HDRLEN + 1) + \ + NLA_ALIGN(NLA_HDRLEN + sizeof(sizeof(struct nf_ct_tcp_flags))) + \ + NLA_ALIGN(NLA_HDRLEN + sizeof(sizeof(struct nf_ct_tcp_flags)))) + static int nlattr_to_tcp(struct nlattr *cda[], struct nf_conn *ct) { struct nlattr *pattr = cda[CTA_PROTOINFO_TCP]; @@ -1274,12 +1283,6 @@ static int nlattr_to_tcp(struct nlattr *cda[], struct nf_conn *ct) return 0; } -static int tcp_nlattr_size(void) -{ - return nla_total_size(0) /* CTA_PROTOINFO_TCP */ - + nla_policy_len(tcp_nla_policy, CTA_PROTOINFO_TCP_MAX + 1); -} - static unsigned int tcp_nlattr_tuple_size(void) { static unsigned int size __read_mostly; @@ -1541,7 +1544,7 @@ static struct nf_proto_net *tcp_get_net_proto(struct net *net) return &net->ct.nf_ct_proto.tcp.pn; } -struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 __read_mostly = +const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 = { .l3proto = PF_INET, .l4proto = IPPROTO_TCP, @@ -1557,11 +1560,11 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 __read_mostly = .can_early_drop = tcp_can_early_drop, #if IS_ENABLED(CONFIG_NF_CT_NETLINK) .to_nlattr = tcp_to_nlattr, - .nlattr_size = tcp_nlattr_size, .from_nlattr = nlattr_to_tcp, .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, .nlattr_tuple_size = tcp_nlattr_tuple_size, + .nlattr_size = TCP_NLATTR_SIZE, .nla_policy = nf_ct_port_nla_policy, #endif #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) @@ -1579,7 +1582,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 __read_mostly = }; EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_tcp4); -struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6 __read_mostly = +const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6 = { .l3proto = PF_INET6, .l4proto = IPPROTO_TCP, @@ -1594,8 +1597,8 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6 __read_mostly = .error = tcp_error, .can_early_drop = tcp_can_early_drop, #if IS_ENABLED(CONFIG_NF_CT_NETLINK) + .nlattr_size = TCP_NLATTR_SIZE, .to_nlattr = tcp_to_nlattr, - .nlattr_size = tcp_nlattr_size, .from_nlattr = nlattr_to_tcp, .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c index 3a5f727103af..fe7243970aa4 100644 --- a/net/netfilter/nf_conntrack_proto_udp.c +++ b/net/netfilter/nf_conntrack_proto_udp.c @@ -26,7 +26,7 @@ #include <net/netfilter/ipv4/nf_conntrack_ipv4.h> #include <net/netfilter/ipv6/nf_conntrack_ipv6.h> -static unsigned int udp_timeouts[UDP_CT_MAX] = { +static const unsigned int udp_timeouts[UDP_CT_MAX] = { [UDP_CT_UNREPLIED] = 30*HZ, [UDP_CT_REPLIED] = 180*HZ, }; @@ -296,7 +296,7 @@ static struct nf_proto_net *udp_get_net_proto(struct net *net) return &net->ct.nf_ct_proto.udp.pn; } -struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4 __read_mostly = +const struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4 = { .l3proto = PF_INET, .l4proto = IPPROTO_UDP, @@ -328,7 +328,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4 __read_mostly = EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_udp4); #ifdef CONFIG_NF_CT_PROTO_UDPLITE -struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite4 __read_mostly = +const struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite4 = { .l3proto = PF_INET, .l4proto = IPPROTO_UDPLITE, @@ -360,7 +360,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite4 __read_mostly = EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_udplite4); #endif -struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6 __read_mostly = +const struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6 = { .l3proto = PF_INET6, .l4proto = IPPROTO_UDP, @@ -392,7 +392,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6 __read_mostly = EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_udp6); #ifdef CONFIG_NF_CT_PROTO_UDPLITE -struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6 __read_mostly = +const struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6 = { .l3proto = PF_INET6, .l4proto = IPPROTO_UDPLITE, diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 5a101caa3e12..9123fdec5e14 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -309,10 +309,12 @@ static int ct_seq_show(struct seq_file *s, void *v) WARN_ON(!l4proto); ret = -ENOSPC; - seq_printf(s, "%-8s %u %-8s %u %ld ", + seq_printf(s, "%-8s %u %-8s %u ", l3proto_name(l3proto->l3proto), nf_ct_l3num(ct), - l4proto_name(l4proto->l4proto), nf_ct_protonum(ct), - nf_ct_expires(ct) / HZ); + l4proto_name(l4proto->l4proto), nf_ct_protonum(ct)); + + if (!test_bit(IPS_OFFLOAD_BIT, &ct->status)) + seq_printf(s, "%ld ", nf_ct_expires(ct) / HZ); if (l4proto->print_conntrack) l4proto->print_conntrack(s, ct); @@ -339,7 +341,9 @@ static int ct_seq_show(struct seq_file *s, void *v) if (seq_print_acct(s, ct, IP_CT_DIR_REPLY)) goto release; - if (test_bit(IPS_ASSURED_BIT, &ct->status)) + if (test_bit(IPS_OFFLOAD_BIT, &ct->status)) + seq_puts(s, "[OFFLOAD] "); + else if (test_bit(IPS_ASSURED_BIT, &ct->status)) seq_puts(s, "[ASSURED] "); if (seq_has_overflowed(s)) @@ -378,7 +382,6 @@ static int ct_open(struct inode *inode, struct file *file) } static const struct file_operations ct_file_ops = { - .owner = THIS_MODULE, .open = ct_open, .read = seq_read, .llseek = seq_lseek, @@ -471,7 +474,6 @@ static int ct_cpu_seq_open(struct inode *inode, struct file *file) } static const struct file_operations ct_cpu_seq_fops = { - .owner = THIS_MODULE, .open = ct_cpu_seq_open, .read = seq_read, .llseek = seq_lseek, diff --git a/net/netfilter/nf_flow_table.c b/net/netfilter/nf_flow_table.c new file mode 100644 index 000000000000..2f5099cb85b8 --- /dev/null +++ b/net/netfilter/nf_flow_table.c @@ -0,0 +1,429 @@ +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/netfilter.h> +#include <linux/rhashtable.h> +#include <linux/netdevice.h> +#include <net/netfilter/nf_flow_table.h> +#include <net/netfilter/nf_conntrack.h> +#include <net/netfilter/nf_conntrack_core.h> +#include <net/netfilter/nf_conntrack_tuple.h> + +struct flow_offload_entry { + struct flow_offload flow; + struct nf_conn *ct; + struct rcu_head rcu_head; +}; + +struct flow_offload * +flow_offload_alloc(struct nf_conn *ct, struct nf_flow_route *route) +{ + struct flow_offload_entry *entry; + struct flow_offload *flow; + + if (unlikely(nf_ct_is_dying(ct) || + !atomic_inc_not_zero(&ct->ct_general.use))) + return NULL; + + entry = kzalloc(sizeof(*entry), GFP_ATOMIC); + if (!entry) + goto err_ct_refcnt; + + flow = &entry->flow; + + if (!dst_hold_safe(route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst)) + goto err_dst_cache_original; + + if (!dst_hold_safe(route->tuple[FLOW_OFFLOAD_DIR_REPLY].dst)) + goto err_dst_cache_reply; + + entry->ct = ct; + + switch (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num) { + case NFPROTO_IPV4: + flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v4 = + ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.in; + flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v4 = + ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.in; + flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v4 = + ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.in; + flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v4 = + ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.in; + break; + case NFPROTO_IPV6: + flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6 = + ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.in6; + flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6 = + ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.in6; + flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v6 = + ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.in6; + flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v6 = + ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.in6; + break; + } + + flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.l3proto = + ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num; + flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.l4proto = + ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum; + flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.l3proto = + ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num; + flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.l4proto = + ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum; + + flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_cache = + route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst; + flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_cache = + route->tuple[FLOW_OFFLOAD_DIR_REPLY].dst; + + flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port = + ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.tcp.port; + flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_port = + ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u.tcp.port; + flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_port = + ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u.tcp.port; + flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_port = + ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u.tcp.port; + + flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dir = + FLOW_OFFLOAD_DIR_ORIGINAL; + flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dir = + FLOW_OFFLOAD_DIR_REPLY; + + flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.iifidx = + route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].ifindex; + flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.oifidx = + route->tuple[FLOW_OFFLOAD_DIR_REPLY].ifindex; + flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.iifidx = + route->tuple[FLOW_OFFLOAD_DIR_REPLY].ifindex; + flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.oifidx = + route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].ifindex; + + if (ct->status & IPS_SRC_NAT) + flow->flags |= FLOW_OFFLOAD_SNAT; + else if (ct->status & IPS_DST_NAT) + flow->flags |= FLOW_OFFLOAD_DNAT; + + return flow; + +err_dst_cache_reply: + dst_release(route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst); +err_dst_cache_original: + kfree(entry); +err_ct_refcnt: + nf_ct_put(ct); + + return NULL; +} +EXPORT_SYMBOL_GPL(flow_offload_alloc); + +void flow_offload_free(struct flow_offload *flow) +{ + struct flow_offload_entry *e; + + dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_cache); + dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_cache); + e = container_of(flow, struct flow_offload_entry, flow); + kfree(e); +} +EXPORT_SYMBOL_GPL(flow_offload_free); + +void flow_offload_dead(struct flow_offload *flow) +{ + flow->flags |= FLOW_OFFLOAD_DYING; +} +EXPORT_SYMBOL_GPL(flow_offload_dead); + +int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow) +{ + flow->timeout = (u32)jiffies; + + rhashtable_insert_fast(&flow_table->rhashtable, + &flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node, + *flow_table->type->params); + rhashtable_insert_fast(&flow_table->rhashtable, + &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node, + *flow_table->type->params); + return 0; +} +EXPORT_SYMBOL_GPL(flow_offload_add); + +void flow_offload_del(struct nf_flowtable *flow_table, + struct flow_offload *flow) +{ + struct flow_offload_entry *e; + + rhashtable_remove_fast(&flow_table->rhashtable, + &flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node, + *flow_table->type->params); + rhashtable_remove_fast(&flow_table->rhashtable, + &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node, + *flow_table->type->params); + + e = container_of(flow, struct flow_offload_entry, flow); + kfree_rcu(e, rcu_head); +} +EXPORT_SYMBOL_GPL(flow_offload_del); + +struct flow_offload_tuple_rhash * +flow_offload_lookup(struct nf_flowtable *flow_table, + struct flow_offload_tuple *tuple) +{ + return rhashtable_lookup_fast(&flow_table->rhashtable, tuple, + *flow_table->type->params); +} +EXPORT_SYMBOL_GPL(flow_offload_lookup); + +static void nf_flow_release_ct(const struct flow_offload *flow) +{ + struct flow_offload_entry *e; + + e = container_of(flow, struct flow_offload_entry, flow); + nf_ct_delete(e->ct, 0, 0); + nf_ct_put(e->ct); +} + +int nf_flow_table_iterate(struct nf_flowtable *flow_table, + void (*iter)(struct flow_offload *flow, void *data), + void *data) +{ + struct flow_offload_tuple_rhash *tuplehash; + struct rhashtable_iter hti; + struct flow_offload *flow; + int err; + + err = rhashtable_walk_init(&flow_table->rhashtable, &hti, GFP_KERNEL); + if (err) + return err; + + rhashtable_walk_start(&hti); + + while ((tuplehash = rhashtable_walk_next(&hti))) { + if (IS_ERR(tuplehash)) { + err = PTR_ERR(tuplehash); + if (err != -EAGAIN) + goto out; + + continue; + } + if (tuplehash->tuple.dir) + continue; + + flow = container_of(tuplehash, struct flow_offload, tuplehash[0]); + + iter(flow, data); + } +out: + rhashtable_walk_stop(&hti); + rhashtable_walk_exit(&hti); + + return err; +} +EXPORT_SYMBOL_GPL(nf_flow_table_iterate); + +static inline bool nf_flow_has_expired(const struct flow_offload *flow) +{ + return (__s32)(flow->timeout - (u32)jiffies) <= 0; +} + +static inline bool nf_flow_is_dying(const struct flow_offload *flow) +{ + return flow->flags & FLOW_OFFLOAD_DYING; +} + +void nf_flow_offload_work_gc(struct work_struct *work) +{ + struct flow_offload_tuple_rhash *tuplehash; + struct nf_flowtable *flow_table; + struct rhashtable_iter hti; + struct flow_offload *flow; + int err; + + flow_table = container_of(work, struct nf_flowtable, gc_work.work); + + err = rhashtable_walk_init(&flow_table->rhashtable, &hti, GFP_KERNEL); + if (err) + goto schedule; + + rhashtable_walk_start(&hti); + + while ((tuplehash = rhashtable_walk_next(&hti))) { + if (IS_ERR(tuplehash)) { + err = PTR_ERR(tuplehash); + if (err != -EAGAIN) + goto out; + + continue; + } + if (tuplehash->tuple.dir) + continue; + + flow = container_of(tuplehash, struct flow_offload, tuplehash[0]); + + if (nf_flow_has_expired(flow) || + nf_flow_is_dying(flow)) { + flow_offload_del(flow_table, flow); + nf_flow_release_ct(flow); + } + } +out: + rhashtable_walk_stop(&hti); + rhashtable_walk_exit(&hti); +schedule: + queue_delayed_work(system_power_efficient_wq, &flow_table->gc_work, HZ); +} +EXPORT_SYMBOL_GPL(nf_flow_offload_work_gc); + +static u32 flow_offload_hash(const void *data, u32 len, u32 seed) +{ + const struct flow_offload_tuple *tuple = data; + + return jhash(tuple, offsetof(struct flow_offload_tuple, dir), seed); +} + +static u32 flow_offload_hash_obj(const void *data, u32 len, u32 seed) +{ + const struct flow_offload_tuple_rhash *tuplehash = data; + + return jhash(&tuplehash->tuple, offsetof(struct flow_offload_tuple, dir), seed); +} + +static int flow_offload_hash_cmp(struct rhashtable_compare_arg *arg, + const void *ptr) +{ + const struct flow_offload_tuple *tuple = arg->key; + const struct flow_offload_tuple_rhash *x = ptr; + + if (memcmp(&x->tuple, tuple, offsetof(struct flow_offload_tuple, dir))) + return 1; + + return 0; +} + +const struct rhashtable_params nf_flow_offload_rhash_params = { + .head_offset = offsetof(struct flow_offload_tuple_rhash, node), + .hashfn = flow_offload_hash, + .obj_hashfn = flow_offload_hash_obj, + .obj_cmpfn = flow_offload_hash_cmp, + .automatic_shrinking = true, +}; +EXPORT_SYMBOL_GPL(nf_flow_offload_rhash_params); + +static int nf_flow_nat_port_tcp(struct sk_buff *skb, unsigned int thoff, + __be16 port, __be16 new_port) +{ + struct tcphdr *tcph; + + if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) || + skb_try_make_writable(skb, thoff + sizeof(*tcph))) + return -1; + + tcph = (void *)(skb_network_header(skb) + thoff); + inet_proto_csum_replace2(&tcph->check, skb, port, new_port, true); + + return 0; +} + +static int nf_flow_nat_port_udp(struct sk_buff *skb, unsigned int thoff, + __be16 port, __be16 new_port) +{ + struct udphdr *udph; + + if (!pskb_may_pull(skb, thoff + sizeof(*udph)) || + skb_try_make_writable(skb, thoff + sizeof(*udph))) + return -1; + + udph = (void *)(skb_network_header(skb) + thoff); + if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) { + inet_proto_csum_replace2(&udph->check, skb, port, + new_port, true); + if (!udph->check) + udph->check = CSUM_MANGLED_0; + } + + return 0; +} + +static int nf_flow_nat_port(struct sk_buff *skb, unsigned int thoff, + u8 protocol, __be16 port, __be16 new_port) +{ + switch (protocol) { + case IPPROTO_TCP: + if (nf_flow_nat_port_tcp(skb, thoff, port, new_port) < 0) + return NF_DROP; + break; + case IPPROTO_UDP: + if (nf_flow_nat_port_udp(skb, thoff, port, new_port) < 0) + return NF_DROP; + break; + } + + return 0; +} + +int nf_flow_snat_port(const struct flow_offload *flow, + struct sk_buff *skb, unsigned int thoff, + u8 protocol, enum flow_offload_tuple_dir dir) +{ + struct flow_ports *hdr; + __be16 port, new_port; + + if (!pskb_may_pull(skb, thoff + sizeof(*hdr)) || + skb_try_make_writable(skb, thoff + sizeof(*hdr))) + return -1; + + hdr = (void *)(skb_network_header(skb) + thoff); + + switch (dir) { + case FLOW_OFFLOAD_DIR_ORIGINAL: + port = hdr->source; + new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_port; + hdr->source = new_port; + break; + case FLOW_OFFLOAD_DIR_REPLY: + port = hdr->dest; + new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port; + hdr->dest = new_port; + break; + default: + return -1; + } + + return nf_flow_nat_port(skb, thoff, protocol, port, new_port); +} +EXPORT_SYMBOL_GPL(nf_flow_snat_port); + +int nf_flow_dnat_port(const struct flow_offload *flow, + struct sk_buff *skb, unsigned int thoff, + u8 protocol, enum flow_offload_tuple_dir dir) +{ + struct flow_ports *hdr; + __be16 port, new_port; + + if (!pskb_may_pull(skb, thoff + sizeof(*hdr)) || + skb_try_make_writable(skb, thoff + sizeof(*hdr))) + return -1; + + hdr = (void *)(skb_network_header(skb) + thoff); + + switch (dir) { + case FLOW_OFFLOAD_DIR_ORIGINAL: + port = hdr->dest; + new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_port; + hdr->dest = new_port; + break; + case FLOW_OFFLOAD_DIR_REPLY: + port = hdr->source; + new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_port; + hdr->source = new_port; + break; + default: + return -1; + } + + return nf_flow_nat_port(skb, thoff, protocol, port, new_port); +} +EXPORT_SYMBOL_GPL(nf_flow_dnat_port); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>"); diff --git a/net/netfilter/nf_flow_table_inet.c b/net/netfilter/nf_flow_table_inet.c new file mode 100644 index 000000000000..281209aeba8f --- /dev/null +++ b/net/netfilter/nf_flow_table_inet.c @@ -0,0 +1,48 @@ +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/netfilter.h> +#include <linux/rhashtable.h> +#include <net/netfilter/nf_flow_table.h> +#include <net/netfilter/nf_tables.h> + +static unsigned int +nf_flow_offload_inet_hook(void *priv, struct sk_buff *skb, + const struct nf_hook_state *state) +{ + switch (skb->protocol) { + case htons(ETH_P_IP): + return nf_flow_offload_ip_hook(priv, skb, state); + case htons(ETH_P_IPV6): + return nf_flow_offload_ipv6_hook(priv, skb, state); + } + + return NF_ACCEPT; +} + +static struct nf_flowtable_type flowtable_inet = { + .family = NFPROTO_INET, + .params = &nf_flow_offload_rhash_params, + .gc = nf_flow_offload_work_gc, + .hook = nf_flow_offload_inet_hook, + .owner = THIS_MODULE, +}; + +static int __init nf_flow_inet_module_init(void) +{ + nft_register_flowtable_type(&flowtable_inet); + + return 0; +} + +static void __exit nf_flow_inet_module_exit(void) +{ + nft_unregister_flowtable_type(&flowtable_inet); +} + +module_init(nf_flow_inet_module_init); +module_exit(nf_flow_inet_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>"); +MODULE_ALIAS_NF_FLOWTABLE(1); /* NFPROTO_INET */ diff --git a/net/netfilter/nf_internals.h b/net/netfilter/nf_internals.h index 44284cd2528d..18f6d7ae995b 100644 --- a/net/netfilter/nf_internals.h +++ b/net/netfilter/nf_internals.h @@ -10,7 +10,7 @@ int nf_queue(struct sk_buff *skb, struct nf_hook_state *state, const struct nf_hook_entries *entries, unsigned int index, unsigned int verdict); -unsigned int nf_queue_nf_hook_drop(struct net *net); +void nf_queue_nf_hook_drop(struct net *net); /* nf_log.c */ int __init netfilter_log_init(void); diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index 8bb152a7cca4..c2c1b16b7538 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -402,7 +402,6 @@ static int nflog_open(struct inode *inode, struct file *file) } static const struct file_operations nflog_file_ops = { - .owner = THIS_MODULE, .open = nflog_open, .read = seq_read, .llseek = seq_lseek, diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index f7e21953b1de..d67a96a25a68 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -10,6 +10,8 @@ #include <linux/proc_fs.h> #include <linux/skbuff.h> #include <linux/netfilter.h> +#include <linux/netfilter_ipv4.h> +#include <linux/netfilter_ipv6.h> #include <linux/netfilter_bridge.h> #include <linux/seq_file.h> #include <linux/rcupdate.h> @@ -96,30 +98,56 @@ void nf_queue_entry_get_refs(struct nf_queue_entry *entry) } EXPORT_SYMBOL_GPL(nf_queue_entry_get_refs); -unsigned int nf_queue_nf_hook_drop(struct net *net) +void nf_queue_nf_hook_drop(struct net *net) { const struct nf_queue_handler *qh; - unsigned int count = 0; rcu_read_lock(); qh = rcu_dereference(net->nf.queue_handler); if (qh) - count = qh->nf_hook_drop(net); + qh->nf_hook_drop(net); rcu_read_unlock(); - - return count; } EXPORT_SYMBOL_GPL(nf_queue_nf_hook_drop); +static void nf_ip_saveroute(const struct sk_buff *skb, + struct nf_queue_entry *entry) +{ + struct ip_rt_info *rt_info = nf_queue_entry_reroute(entry); + + if (entry->state.hook == NF_INET_LOCAL_OUT) { + const struct iphdr *iph = ip_hdr(skb); + + rt_info->tos = iph->tos; + rt_info->daddr = iph->daddr; + rt_info->saddr = iph->saddr; + rt_info->mark = skb->mark; + } +} + +static void nf_ip6_saveroute(const struct sk_buff *skb, + struct nf_queue_entry *entry) +{ + struct ip6_rt_info *rt_info = nf_queue_entry_reroute(entry); + + if (entry->state.hook == NF_INET_LOCAL_OUT) { + const struct ipv6hdr *iph = ipv6_hdr(skb); + + rt_info->daddr = iph->daddr; + rt_info->saddr = iph->saddr; + rt_info->mark = skb->mark; + } +} + static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state, const struct nf_hook_entries *entries, unsigned int index, unsigned int queuenum) { int status = -ENOENT; struct nf_queue_entry *entry = NULL; - const struct nf_afinfo *afinfo; const struct nf_queue_handler *qh; struct net *net = state->net; + unsigned int route_key_size; /* QUEUE == DROP if no one is waiting, to be safe. */ qh = rcu_dereference(net->nf.queue_handler); @@ -128,11 +156,19 @@ static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state, goto err; } - afinfo = nf_get_afinfo(state->pf); - if (!afinfo) - goto err; + switch (state->pf) { + case AF_INET: + route_key_size = sizeof(struct ip_rt_info); + break; + case AF_INET6: + route_key_size = sizeof(struct ip6_rt_info); + break; + default: + route_key_size = 0; + break; + } - entry = kmalloc(sizeof(*entry) + afinfo->route_key_size, GFP_ATOMIC); + entry = kmalloc(sizeof(*entry) + route_key_size, GFP_ATOMIC); if (!entry) { status = -ENOMEM; goto err; @@ -142,12 +178,21 @@ static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state, .skb = skb, .state = *state, .hook_index = index, - .size = sizeof(*entry) + afinfo->route_key_size, + .size = sizeof(*entry) + route_key_size, }; nf_queue_entry_get_refs(entry); skb_dst_force(skb); - afinfo->saveroute(skb, entry); + + switch (entry->state.pf) { + case AF_INET: + nf_ip_saveroute(skb, entry); + break; + case AF_INET6: + nf_ip6_saveroute(skb, entry); + break; + } + status = qh->outfn(entry, queuenum); if (status < 0) { @@ -204,13 +249,31 @@ repeat: return NF_ACCEPT; } +static struct nf_hook_entries *nf_hook_entries_head(const struct net *net, u8 pf, u8 hooknum) +{ + switch (pf) { +#ifdef CONFIG_NETFILTER_FAMILY_BRIDGE + case NFPROTO_BRIDGE: + return rcu_dereference(net->nf.hooks_bridge[hooknum]); +#endif + case NFPROTO_IPV4: + return rcu_dereference(net->nf.hooks_ipv4[hooknum]); + case NFPROTO_IPV6: + return rcu_dereference(net->nf.hooks_ipv6[hooknum]); + default: + WARN_ON_ONCE(1); + return NULL; + } + + return NULL; +} + /* Caller must hold rcu read-side lock */ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict) { const struct nf_hook_entry *hook_entry; const struct nf_hook_entries *hooks; struct sk_buff *skb = entry->skb; - const struct nf_afinfo *afinfo; const struct net *net; unsigned int i; int err; @@ -219,12 +282,12 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict) net = entry->state.net; pf = entry->state.pf; - hooks = rcu_dereference(net->nf.hooks[pf][entry->state.hook]); + hooks = nf_hook_entries_head(net, pf, entry->state.hook); nf_queue_entry_release_refs(entry); i = entry->hook_index; - if (WARN_ON_ONCE(i >= hooks->num_hook_entries)) { + if (WARN_ON_ONCE(!hooks || i >= hooks->num_hook_entries)) { kfree_skb(skb); kfree(entry); return; @@ -237,8 +300,7 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict) verdict = nf_hook_entry_hookfn(hook_entry, skb, &entry->state); if (verdict == NF_ACCEPT) { - afinfo = nf_get_afinfo(entry->state.pf); - if (!afinfo || afinfo->reroute(entry->state.net, skb, entry) < 0) + if (nf_reroute(skb, entry) < 0) verdict = NF_DROP; } diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c index 49bd8bb16b18..92139a087260 100644 --- a/net/netfilter/nf_synproxy_core.c +++ b/net/netfilter/nf_synproxy_core.c @@ -317,7 +317,6 @@ static int synproxy_cpu_seq_open(struct inode *inode, struct file *file) } static const struct file_operations synproxy_cpu_seq_fops = { - .owner = THIS_MODULE, .open = synproxy_cpu_seq_open, .read = seq_read, .llseek = seq_lseek, diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 07bd4138c84e..0791813a1e7d 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -17,6 +17,7 @@ #include <linux/netfilter.h> #include <linux/netfilter/nfnetlink.h> #include <linux/netfilter/nf_tables.h> +#include <net/netfilter/nf_flow_table.h> #include <net/netfilter/nf_tables_core.h> #include <net/netfilter/nf_tables.h> #include <net/net_namespace.h> @@ -24,86 +25,20 @@ static LIST_HEAD(nf_tables_expressions); static LIST_HEAD(nf_tables_objects); - -/** - * nft_register_afinfo - register nf_tables address family info - * - * @afi: address family info to register - * - * Register the address family for use with nf_tables. Returns zero on - * success or a negative errno code otherwise. - */ -int nft_register_afinfo(struct net *net, struct nft_af_info *afi) -{ - INIT_LIST_HEAD(&afi->tables); - nfnl_lock(NFNL_SUBSYS_NFTABLES); - list_add_tail_rcu(&afi->list, &net->nft.af_info); - nfnl_unlock(NFNL_SUBSYS_NFTABLES); - return 0; -} -EXPORT_SYMBOL_GPL(nft_register_afinfo); - -static void __nft_release_afinfo(struct net *net, struct nft_af_info *afi); - -/** - * nft_unregister_afinfo - unregister nf_tables address family info - * - * @afi: address family info to unregister - * - * Unregister the address family for use with nf_tables. - */ -void nft_unregister_afinfo(struct net *net, struct nft_af_info *afi) -{ - nfnl_lock(NFNL_SUBSYS_NFTABLES); - __nft_release_afinfo(net, afi); - list_del_rcu(&afi->list); - nfnl_unlock(NFNL_SUBSYS_NFTABLES); -} -EXPORT_SYMBOL_GPL(nft_unregister_afinfo); - -static struct nft_af_info *nft_afinfo_lookup(struct net *net, int family) -{ - struct nft_af_info *afi; - - list_for_each_entry(afi, &net->nft.af_info, list) { - if (afi->family == family) - return afi; - } - return NULL; -} - -static struct nft_af_info * -nf_tables_afinfo_lookup(struct net *net, int family, bool autoload) -{ - struct nft_af_info *afi; - - afi = nft_afinfo_lookup(net, family); - if (afi != NULL) - return afi; -#ifdef CONFIG_MODULES - if (autoload) { - nfnl_unlock(NFNL_SUBSYS_NFTABLES); - request_module("nft-afinfo-%u", family); - nfnl_lock(NFNL_SUBSYS_NFTABLES); - afi = nft_afinfo_lookup(net, family); - if (afi != NULL) - return ERR_PTR(-EAGAIN); - } -#endif - return ERR_PTR(-EAFNOSUPPORT); -} +static LIST_HEAD(nf_tables_flowtables); +static u64 table_handle; static void nft_ctx_init(struct nft_ctx *ctx, struct net *net, const struct sk_buff *skb, const struct nlmsghdr *nlh, - struct nft_af_info *afi, + u8 family, struct nft_table *table, struct nft_chain *chain, const struct nlattr * const *nla) { ctx->net = net; - ctx->afi = afi; + ctx->family = family; ctx->table = table; ctx->chain = chain; ctx->nla = nla; @@ -139,29 +74,26 @@ static void nft_trans_destroy(struct nft_trans *trans) kfree(trans); } -static int nf_tables_register_hooks(struct net *net, - const struct nft_table *table, - struct nft_chain *chain, - unsigned int hook_nops) +static int nf_tables_register_hook(struct net *net, + const struct nft_table *table, + struct nft_chain *chain) { if (table->flags & NFT_TABLE_F_DORMANT || !nft_is_base_chain(chain)) return 0; - return nf_register_net_hooks(net, nft_base_chain(chain)->ops, - hook_nops); + return nf_register_net_hook(net, &nft_base_chain(chain)->ops); } -static void nf_tables_unregister_hooks(struct net *net, - const struct nft_table *table, - struct nft_chain *chain, - unsigned int hook_nops) +static void nf_tables_unregister_hook(struct net *net, + const struct nft_table *table, + struct nft_chain *chain) { if (table->flags & NFT_TABLE_F_DORMANT || !nft_is_base_chain(chain)) return; - nf_unregister_net_hooks(net, nft_base_chain(chain)->ops, hook_nops); + nf_unregister_net_hook(net, &nft_base_chain(chain)->ops); } static int nft_trans_table_add(struct nft_ctx *ctx, int msg_type) @@ -348,34 +280,99 @@ static int nft_delobj(struct nft_ctx *ctx, struct nft_object *obj) return err; } +static int nft_trans_flowtable_add(struct nft_ctx *ctx, int msg_type, + struct nft_flowtable *flowtable) +{ + struct nft_trans *trans; + + trans = nft_trans_alloc(ctx, msg_type, + sizeof(struct nft_trans_flowtable)); + if (trans == NULL) + return -ENOMEM; + + if (msg_type == NFT_MSG_NEWFLOWTABLE) + nft_activate_next(ctx->net, flowtable); + + nft_trans_flowtable(trans) = flowtable; + list_add_tail(&trans->list, &ctx->net->nft.commit_list); + + return 0; +} + +static int nft_delflowtable(struct nft_ctx *ctx, + struct nft_flowtable *flowtable) +{ + int err; + + err = nft_trans_flowtable_add(ctx, NFT_MSG_DELFLOWTABLE, flowtable); + if (err < 0) + return err; + + nft_deactivate_next(ctx->net, flowtable); + ctx->table->use--; + + return err; +} + /* * Tables */ -static struct nft_table *nft_table_lookup(const struct nft_af_info *afi, +static struct nft_table *nft_table_lookup(const struct net *net, const struct nlattr *nla, - u8 genmask) + u8 family, u8 genmask) { struct nft_table *table; - list_for_each_entry(table, &afi->tables, list) { + list_for_each_entry(table, &net->nft.tables, list) { if (!nla_strcmp(nla, table->name) && + table->family == family && nft_active_genmask(table, genmask)) return table; } return NULL; } -static struct nft_table *nf_tables_table_lookup(const struct nft_af_info *afi, +static struct nft_table *nft_table_lookup_byhandle(const struct net *net, + const struct nlattr *nla, + u8 genmask) +{ + struct nft_table *table; + + list_for_each_entry(table, &net->nft.tables, list) { + if (be64_to_cpu(nla_get_be64(nla)) == table->handle && + nft_active_genmask(table, genmask)) + return table; + } + return NULL; +} + +static struct nft_table *nf_tables_table_lookup(const struct net *net, const struct nlattr *nla, - u8 genmask) + u8 family, u8 genmask) +{ + struct nft_table *table; + + if (nla == NULL) + return ERR_PTR(-EINVAL); + + table = nft_table_lookup(net, nla, family, genmask); + if (table != NULL) + return table; + + return ERR_PTR(-ENOENT); +} + +static struct nft_table *nf_tables_table_lookup_byhandle(const struct net *net, + const struct nlattr *nla, + u8 genmask) { struct nft_table *table; if (nla == NULL) return ERR_PTR(-EINVAL); - table = nft_table_lookup(afi, nla, genmask); + table = nft_table_lookup_byhandle(net, nla, genmask); if (table != NULL) return table; @@ -390,7 +387,7 @@ static inline u64 nf_tables_alloc_handle(struct nft_table *table) static const struct nf_chain_type *chain_type[NFPROTO_NUMPROTO][NFT_CHAIN_T_MAX]; static const struct nf_chain_type * -__nf_tables_chain_type_lookup(int family, const struct nlattr *nla) +__nf_tables_chain_type_lookup(const struct nlattr *nla, u8 family) { int i; @@ -403,22 +400,20 @@ __nf_tables_chain_type_lookup(int family, const struct nlattr *nla) } static const struct nf_chain_type * -nf_tables_chain_type_lookup(const struct nft_af_info *afi, - const struct nlattr *nla, - bool autoload) +nf_tables_chain_type_lookup(const struct nlattr *nla, u8 family, bool autoload) { const struct nf_chain_type *type; - type = __nf_tables_chain_type_lookup(afi->family, nla); + type = __nf_tables_chain_type_lookup(nla, family); if (type != NULL) return type; #ifdef CONFIG_MODULES if (autoload) { nfnl_unlock(NFNL_SUBSYS_NFTABLES); - request_module("nft-chain-%u-%.*s", afi->family, + request_module("nft-chain-%u-%.*s", family, nla_len(nla), (const char *)nla_data(nla)); nfnl_lock(NFNL_SUBSYS_NFTABLES); - type = __nf_tables_chain_type_lookup(afi->family, nla); + type = __nf_tables_chain_type_lookup(nla, family); if (type != NULL) return ERR_PTR(-EAGAIN); } @@ -430,6 +425,7 @@ static const struct nla_policy nft_table_policy[NFTA_TABLE_MAX + 1] = { [NFTA_TABLE_NAME] = { .type = NLA_STRING, .len = NFT_TABLE_MAXNAMELEN - 1 }, [NFTA_TABLE_FLAGS] = { .type = NLA_U32 }, + [NFTA_TABLE_HANDLE] = { .type = NLA_U64 }, }; static int nf_tables_fill_table_info(struct sk_buff *skb, struct net *net, @@ -451,7 +447,9 @@ static int nf_tables_fill_table_info(struct sk_buff *skb, struct net *net, if (nla_put_string(skb, NFTA_TABLE_NAME, table->name) || nla_put_be32(skb, NFTA_TABLE_FLAGS, htonl(table->flags)) || - nla_put_be32(skb, NFTA_TABLE_USE, htonl(table->use))) + nla_put_be32(skb, NFTA_TABLE_USE, htonl(table->use)) || + nla_put_be64(skb, NFTA_TABLE_HANDLE, cpu_to_be64(table->handle), + NFTA_TABLE_PAD)) goto nla_put_failure; nlmsg_end(skb, nlh); @@ -476,7 +474,7 @@ static void nf_tables_table_notify(const struct nft_ctx *ctx, int event) goto err; err = nf_tables_fill_table_info(skb, ctx->net, ctx->portid, ctx->seq, - event, 0, ctx->afi->family, ctx->table); + event, 0, ctx->family, ctx->table); if (err < 0) { kfree_skb(skb); goto err; @@ -493,7 +491,6 @@ static int nf_tables_dump_tables(struct sk_buff *skb, struct netlink_callback *cb) { const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); - const struct nft_af_info *afi; const struct nft_table *table; unsigned int idx = 0, s_idx = cb->args[0]; struct net *net = sock_net(skb->sk); @@ -502,30 +499,27 @@ static int nf_tables_dump_tables(struct sk_buff *skb, rcu_read_lock(); cb->seq = net->nft.base_seq; - list_for_each_entry_rcu(afi, &net->nft.af_info, list) { - if (family != NFPROTO_UNSPEC && family != afi->family) + list_for_each_entry_rcu(table, &net->nft.tables, list) { + if (family != NFPROTO_UNSPEC && family != table->family) continue; - list_for_each_entry_rcu(table, &afi->tables, list) { - if (idx < s_idx) - goto cont; - if (idx > s_idx) - memset(&cb->args[1], 0, - sizeof(cb->args) - sizeof(cb->args[0])); - if (!nft_is_active(net, table)) - continue; - if (nf_tables_fill_table_info(skb, net, - NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, - NFT_MSG_NEWTABLE, - NLM_F_MULTI, - afi->family, table) < 0) - goto done; - - nl_dump_check_consistent(cb, nlmsg_hdr(skb)); + if (idx < s_idx) + goto cont; + if (idx > s_idx) + memset(&cb->args[1], 0, + sizeof(cb->args) - sizeof(cb->args[0])); + if (!nft_is_active(net, table)) + continue; + if (nf_tables_fill_table_info(skb, net, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + NFT_MSG_NEWTABLE, NLM_F_MULTI, + table->family, table) < 0) + goto done; + + nl_dump_check_consistent(cb, nlmsg_hdr(skb)); cont: - idx++; - } + idx++; } done: rcu_read_unlock(); @@ -540,7 +534,6 @@ static int nf_tables_gettable(struct net *net, struct sock *nlsk, { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); u8 genmask = nft_genmask_cur(net); - const struct nft_af_info *afi; const struct nft_table *table; struct sk_buff *skb2; int family = nfmsg->nfgen_family; @@ -553,11 +546,8 @@ static int nf_tables_gettable(struct net *net, struct sock *nlsk, return netlink_dump_start(nlsk, skb, nlh, &c); } - afi = nf_tables_afinfo_lookup(net, family, false); - if (IS_ERR(afi)) - return PTR_ERR(afi); - - table = nf_tables_table_lookup(afi, nla[NFTA_TABLE_NAME], genmask); + table = nf_tables_table_lookup(net, nla[NFTA_TABLE_NAME], family, + genmask); if (IS_ERR(table)) return PTR_ERR(table); @@ -578,10 +568,7 @@ err: return err; } -static void _nf_tables_table_disable(struct net *net, - const struct nft_af_info *afi, - struct nft_table *table, - u32 cnt) +static void nft_table_disable(struct net *net, struct nft_table *table, u32 cnt) { struct nft_chain *chain; u32 i = 0; @@ -595,14 +582,11 @@ static void _nf_tables_table_disable(struct net *net, if (cnt && i++ == cnt) break; - nf_unregister_net_hooks(net, nft_base_chain(chain)->ops, - afi->nops); + nf_unregister_net_hook(net, &nft_base_chain(chain)->ops); } } -static int nf_tables_table_enable(struct net *net, - const struct nft_af_info *afi, - struct nft_table *table) +static int nf_tables_table_enable(struct net *net, struct nft_table *table) { struct nft_chain *chain; int err, i = 0; @@ -613,8 +597,7 @@ static int nf_tables_table_enable(struct net *net, if (!nft_is_base_chain(chain)) continue; - err = nf_register_net_hooks(net, nft_base_chain(chain)->ops, - afi->nops); + err = nf_register_net_hook(net, &nft_base_chain(chain)->ops); if (err < 0) goto err; @@ -623,15 +606,13 @@ static int nf_tables_table_enable(struct net *net, return 0; err: if (i) - _nf_tables_table_disable(net, afi, table, i); + nft_table_disable(net, table, i); return err; } -static void nf_tables_table_disable(struct net *net, - const struct nft_af_info *afi, - struct nft_table *table) +static void nf_tables_table_disable(struct net *net, struct nft_table *table) { - _nf_tables_table_disable(net, afi, table, 0); + nft_table_disable(net, table, 0); } static int nf_tables_updtable(struct nft_ctx *ctx) @@ -660,7 +641,7 @@ static int nf_tables_updtable(struct nft_ctx *ctx) nft_trans_table_enable(trans) = false; } else if (!(flags & NFT_TABLE_F_DORMANT) && ctx->table->flags & NFT_TABLE_F_DORMANT) { - ret = nf_tables_table_enable(ctx->net, ctx->afi, ctx->table); + ret = nf_tables_table_enable(ctx->net, ctx->table); if (ret >= 0) { ctx->table->flags &= ~NFT_TABLE_F_DORMANT; nft_trans_table_enable(trans) = true; @@ -685,19 +666,14 @@ static int nf_tables_newtable(struct net *net, struct sock *nlsk, const struct nfgenmsg *nfmsg = nlmsg_data(nlh); u8 genmask = nft_genmask_next(net); const struct nlattr *name; - struct nft_af_info *afi; struct nft_table *table; int family = nfmsg->nfgen_family; u32 flags = 0; struct nft_ctx ctx; int err; - afi = nf_tables_afinfo_lookup(net, family, true); - if (IS_ERR(afi)) - return PTR_ERR(afi); - name = nla[NFTA_TABLE_NAME]; - table = nf_tables_table_lookup(afi, name, genmask); + table = nf_tables_table_lookup(net, name, family, genmask); if (IS_ERR(table)) { if (PTR_ERR(table) != -ENOENT) return PTR_ERR(table); @@ -707,7 +683,7 @@ static int nf_tables_newtable(struct net *net, struct sock *nlsk, if (nlh->nlmsg_flags & NLM_F_REPLACE) return -EOPNOTSUPP; - nft_ctx_init(&ctx, net, skb, nlh, afi, table, NULL, nla); + nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla); return nf_tables_updtable(&ctx); } @@ -717,47 +693,45 @@ static int nf_tables_newtable(struct net *net, struct sock *nlsk, return -EINVAL; } - err = -EAFNOSUPPORT; - if (!try_module_get(afi->owner)) - goto err1; - err = -ENOMEM; table = kzalloc(sizeof(*table), GFP_KERNEL); if (table == NULL) - goto err2; + goto err_kzalloc; table->name = nla_strdup(name, GFP_KERNEL); if (table->name == NULL) - goto err3; + goto err_strdup; INIT_LIST_HEAD(&table->chains); INIT_LIST_HEAD(&table->sets); INIT_LIST_HEAD(&table->objects); + INIT_LIST_HEAD(&table->flowtables); + table->family = family; table->flags = flags; + table->handle = ++table_handle; - nft_ctx_init(&ctx, net, skb, nlh, afi, table, NULL, nla); + nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla); err = nft_trans_table_add(&ctx, NFT_MSG_NEWTABLE); if (err < 0) - goto err4; + goto err_trans; - list_add_tail_rcu(&table->list, &afi->tables); + list_add_tail_rcu(&table->list, &net->nft.tables); return 0; -err4: +err_trans: kfree(table->name); -err3: +err_strdup: kfree(table); -err2: - module_put(afi->owner); -err1: +err_kzalloc: return err; } static int nft_flush_table(struct nft_ctx *ctx) { - int err; + struct nft_flowtable *flowtable, *nft; struct nft_chain *chain, *nc; struct nft_object *obj, *ne; struct nft_set *set, *ns; + int err; list_for_each_entry(chain, &ctx->table->chains, list) { if (!nft_is_active_next(ctx->net, chain)) @@ -774,7 +748,7 @@ static int nft_flush_table(struct nft_ctx *ctx) if (!nft_is_active_next(ctx->net, set)) continue; - if (set->flags & NFT_SET_ANONYMOUS && + if (nft_set_is_anonymous(set) && !list_empty(&set->bindings)) continue; @@ -783,6 +757,12 @@ static int nft_flush_table(struct nft_ctx *ctx) goto out; } + list_for_each_entry_safe(flowtable, nft, &ctx->table->flowtables, list) { + err = nft_delflowtable(ctx, flowtable); + if (err < 0) + goto out; + } + list_for_each_entry_safe(obj, ne, &ctx->table->objects, list) { err = nft_delobj(ctx, obj); if (err < 0) @@ -807,30 +787,28 @@ out: static int nft_flush(struct nft_ctx *ctx, int family) { - struct nft_af_info *afi; struct nft_table *table, *nt; const struct nlattr * const *nla = ctx->nla; int err = 0; - list_for_each_entry(afi, &ctx->net->nft.af_info, list) { - if (family != AF_UNSPEC && afi->family != family) + list_for_each_entry_safe(table, nt, &ctx->net->nft.tables, list) { + if (family != AF_UNSPEC && table->family != family) continue; - ctx->afi = afi; - list_for_each_entry_safe(table, nt, &afi->tables, list) { - if (!nft_is_active_next(ctx->net, table)) - continue; + ctx->family = table->family; - if (nla[NFTA_TABLE_NAME] && - nla_strcmp(nla[NFTA_TABLE_NAME], table->name) != 0) - continue; + if (!nft_is_active_next(ctx->net, table)) + continue; - ctx->table = table; + if (nla[NFTA_TABLE_NAME] && + nla_strcmp(nla[NFTA_TABLE_NAME], table->name) != 0) + continue; - err = nft_flush_table(ctx); - if (err < 0) - goto out; - } + ctx->table = table; + + err = nft_flush_table(ctx); + if (err < 0) + goto out; } out: return err; @@ -843,20 +821,23 @@ static int nf_tables_deltable(struct net *net, struct sock *nlsk, { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); u8 genmask = nft_genmask_next(net); - struct nft_af_info *afi; struct nft_table *table; int family = nfmsg->nfgen_family; struct nft_ctx ctx; - nft_ctx_init(&ctx, net, skb, nlh, NULL, NULL, NULL, nla); - if (family == AF_UNSPEC || nla[NFTA_TABLE_NAME] == NULL) + nft_ctx_init(&ctx, net, skb, nlh, 0, NULL, NULL, nla); + if (family == AF_UNSPEC || + (!nla[NFTA_TABLE_NAME] && !nla[NFTA_TABLE_HANDLE])) return nft_flush(&ctx, family); - afi = nf_tables_afinfo_lookup(net, family, false); - if (IS_ERR(afi)) - return PTR_ERR(afi); + if (nla[NFTA_TABLE_HANDLE]) + table = nf_tables_table_lookup_byhandle(net, + nla[NFTA_TABLE_HANDLE], + genmask); + else + table = nf_tables_table_lookup(net, nla[NFTA_TABLE_NAME], + family, genmask); - table = nf_tables_table_lookup(afi, nla[NFTA_TABLE_NAME], genmask); if (IS_ERR(table)) return PTR_ERR(table); @@ -864,7 +845,7 @@ static int nf_tables_deltable(struct net *net, struct sock *nlsk, table->use > 0) return -EBUSY; - ctx.afi = afi; + ctx.family = family; ctx.table = table; return nft_flush_table(&ctx); @@ -876,7 +857,6 @@ static void nf_tables_table_destroy(struct nft_ctx *ctx) kfree(ctx->table->name); kfree(ctx->table); - module_put(ctx->afi->owner); } int nft_register_chain_type(const struct nf_chain_type *ctype) @@ -1026,7 +1006,7 @@ static int nf_tables_fill_chain_info(struct sk_buff *skb, struct net *net, if (nft_is_base_chain(chain)) { const struct nft_base_chain *basechain = nft_base_chain(chain); - const struct nf_hook_ops *ops = &basechain->ops[0]; + const struct nf_hook_ops *ops = &basechain->ops; struct nlattr *nest; nest = nla_nest_start(skb, NFTA_CHAIN_HOOK); @@ -1077,7 +1057,7 @@ static void nf_tables_chain_notify(const struct nft_ctx *ctx, int event) goto err; err = nf_tables_fill_chain_info(skb, ctx->net, ctx->portid, ctx->seq, - event, 0, ctx->afi->family, ctx->table, + event, 0, ctx->family, ctx->table, ctx->chain); if (err < 0) { kfree_skb(skb); @@ -1095,7 +1075,6 @@ static int nf_tables_dump_chains(struct sk_buff *skb, struct netlink_callback *cb) { const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); - const struct nft_af_info *afi; const struct nft_table *table; const struct nft_chain *chain; unsigned int idx = 0, s_idx = cb->args[0]; @@ -1105,31 +1084,30 @@ static int nf_tables_dump_chains(struct sk_buff *skb, rcu_read_lock(); cb->seq = net->nft.base_seq; - list_for_each_entry_rcu(afi, &net->nft.af_info, list) { - if (family != NFPROTO_UNSPEC && family != afi->family) + list_for_each_entry_rcu(table, &net->nft.tables, list) { + if (family != NFPROTO_UNSPEC && family != table->family) continue; - list_for_each_entry_rcu(table, &afi->tables, list) { - list_for_each_entry_rcu(chain, &table->chains, list) { - if (idx < s_idx) - goto cont; - if (idx > s_idx) - memset(&cb->args[1], 0, - sizeof(cb->args) - sizeof(cb->args[0])); - if (!nft_is_active(net, chain)) - continue; - if (nf_tables_fill_chain_info(skb, net, - NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, - NFT_MSG_NEWCHAIN, - NLM_F_MULTI, - afi->family, table, chain) < 0) - goto done; + list_for_each_entry_rcu(chain, &table->chains, list) { + if (idx < s_idx) + goto cont; + if (idx > s_idx) + memset(&cb->args[1], 0, + sizeof(cb->args) - sizeof(cb->args[0])); + if (!nft_is_active(net, chain)) + continue; + if (nf_tables_fill_chain_info(skb, net, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + NFT_MSG_NEWCHAIN, + NLM_F_MULTI, + table->family, table, + chain) < 0) + goto done; - nl_dump_check_consistent(cb, nlmsg_hdr(skb)); + nl_dump_check_consistent(cb, nlmsg_hdr(skb)); cont: - idx++; - } + idx++; } } done: @@ -1145,7 +1123,6 @@ static int nf_tables_getchain(struct net *net, struct sock *nlsk, { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); u8 genmask = nft_genmask_cur(net); - const struct nft_af_info *afi; const struct nft_table *table; const struct nft_chain *chain; struct sk_buff *skb2; @@ -1159,11 +1136,8 @@ static int nf_tables_getchain(struct net *net, struct sock *nlsk, return netlink_dump_start(nlsk, skb, nlh, &c); } - afi = nf_tables_afinfo_lookup(net, family, false); - if (IS_ERR(afi)) - return PTR_ERR(afi); - - table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE], genmask); + table = nf_tables_table_lookup(net, nla[NFTA_CHAIN_TABLE], family, + genmask); if (IS_ERR(table)) return PTR_ERR(table); @@ -1227,13 +1201,13 @@ static struct nft_stats __percpu *nft_stats_alloc(const struct nlattr *attr) static void nft_chain_stats_replace(struct nft_base_chain *chain, struct nft_stats __percpu *newstats) { + struct nft_stats __percpu *oldstats; + if (newstats == NULL) return; if (chain->stats) { - struct nft_stats __percpu *oldstats = - nft_dereference(chain->stats); - + oldstats = nfnl_dereference(chain->stats, NFNL_SUBSYS_NFTABLES); rcu_assign_pointer(chain->stats, newstats); synchronize_rcu(); free_percpu(oldstats); @@ -1252,8 +1226,8 @@ static void nf_tables_chain_destroy(struct nft_chain *chain) free_percpu(basechain->stats); if (basechain->stats) static_branch_dec(&nft_counters_enabled); - if (basechain->ops[0].dev != NULL) - dev_put(basechain->ops[0].dev); + if (basechain->ops.dev != NULL) + dev_put(basechain->ops.dev); kfree(chain->name); kfree(basechain); } else { @@ -1264,15 +1238,15 @@ static void nf_tables_chain_destroy(struct nft_chain *chain) struct nft_chain_hook { u32 num; - u32 priority; + s32 priority; const struct nf_chain_type *type; struct net_device *dev; }; static int nft_chain_parse_hook(struct net *net, const struct nlattr * const nla[], - struct nft_af_info *afi, - struct nft_chain_hook *hook, bool create) + struct nft_chain_hook *hook, u8 family, + bool create) { struct nlattr *ha[NFTA_HOOK_MAX + 1]; const struct nf_chain_type *type; @@ -1289,27 +1263,29 @@ static int nft_chain_parse_hook(struct net *net, return -EINVAL; hook->num = ntohl(nla_get_be32(ha[NFTA_HOOK_HOOKNUM])); - if (hook->num >= afi->nhooks) - return -EINVAL; - hook->priority = ntohl(nla_get_be32(ha[NFTA_HOOK_PRIORITY])); - type = chain_type[afi->family][NFT_CHAIN_T_DEFAULT]; + type = chain_type[family][NFT_CHAIN_T_DEFAULT]; if (nla[NFTA_CHAIN_TYPE]) { - type = nf_tables_chain_type_lookup(afi, nla[NFTA_CHAIN_TYPE], - create); + type = nf_tables_chain_type_lookup(nla[NFTA_CHAIN_TYPE], + family, create); if (IS_ERR(type)) return PTR_ERR(type); } if (!(type->hook_mask & (1 << hook->num))) return -EOPNOTSUPP; + + if (type->type == NFT_CHAIN_T_NAT && + hook->priority <= NF_IP_PRI_CONNTRACK) + return -EOPNOTSUPP; + if (!try_module_get(type->owner)) return -ENOENT; hook->type = type; hook->dev = NULL; - if (afi->flags & NFT_AF_NEEDS_DEV) { + if (family == NFPROTO_NETDEV) { char ifname[IFNAMSIZ]; if (!ha[NFTA_HOOK_DEV]) { @@ -1344,12 +1320,10 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, { const struct nlattr * const *nla = ctx->nla; struct nft_table *table = ctx->table; - struct nft_af_info *afi = ctx->afi; struct nft_base_chain *basechain; struct nft_stats __percpu *stats; struct net *net = ctx->net; struct nft_chain *chain; - unsigned int i; int err; if (table->use == UINT_MAX) @@ -1358,9 +1332,8 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, if (nla[NFTA_CHAIN_HOOK]) { struct nft_chain_hook hook; struct nf_hook_ops *ops; - nf_hookfn *hookfn; - err = nft_chain_parse_hook(net, nla, afi, &hook, create); + err = nft_chain_parse_hook(net, nla, &hook, family, create); if (err < 0) return err; @@ -1384,23 +1357,19 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, static_branch_inc(&nft_counters_enabled); } - hookfn = hook.type->hooks[hook.num]; basechain->type = hook.type; chain = &basechain->chain; - for (i = 0; i < afi->nops; i++) { - ops = &basechain->ops[i]; - ops->pf = family; - ops->hooknum = hook.num; - ops->priority = hook.priority; - ops->priv = chain; - ops->hook = afi->hooks[ops->hooknum]; - ops->dev = hook.dev; - if (hookfn) - ops->hook = hookfn; - if (afi->hook_ops_init) - afi->hook_ops_init(ops, i); - } + ops = &basechain->ops; + ops->pf = family; + ops->hooknum = hook.num; + ops->priority = hook.priority; + ops->priv = chain; + ops->hook = hook.type->hooks[ops->hooknum]; + ops->dev = hook.dev; + + if (basechain->type->type == NFT_CHAIN_T_NAT) + ops->nat_hook = true; chain->flags |= NFT_BASE_CHAIN; basechain->policy = policy; @@ -1418,7 +1387,7 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, goto err1; } - err = nf_tables_register_hooks(net, table, chain, afi->nops); + err = nf_tables_register_hook(net, table, chain); if (err < 0) goto err1; @@ -1432,7 +1401,7 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, return 0; err2: - nf_tables_unregister_hooks(net, table, chain, afi->nops); + nf_tables_unregister_hook(net, table, chain); err1: nf_tables_chain_destroy(chain); @@ -1445,20 +1414,19 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy, const struct nlattr * const *nla = ctx->nla; struct nft_table *table = ctx->table; struct nft_chain *chain = ctx->chain; - struct nft_af_info *afi = ctx->afi; struct nft_base_chain *basechain; struct nft_stats *stats = NULL; struct nft_chain_hook hook; const struct nlattr *name; struct nf_hook_ops *ops; struct nft_trans *trans; - int err, i; + int err; if (nla[NFTA_CHAIN_HOOK]) { if (!nft_is_base_chain(chain)) return -EBUSY; - err = nft_chain_parse_hook(ctx->net, nla, ctx->afi, &hook, + err = nft_chain_parse_hook(ctx->net, nla, &hook, ctx->family, create); if (err < 0) return err; @@ -1469,14 +1437,12 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy, return -EBUSY; } - for (i = 0; i < afi->nops; i++) { - ops = &basechain->ops[i]; - if (ops->hooknum != hook.num || - ops->priority != hook.priority || - ops->dev != hook.dev) { - nft_chain_release_hook(&hook); - return -EBUSY; - } + ops = &basechain->ops; + if (ops->hooknum != hook.num || + ops->priority != hook.priority || + ops->dev != hook.dev) { + nft_chain_release_hook(&hook); + return -EBUSY; } nft_chain_release_hook(&hook); } @@ -1539,7 +1505,6 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk, const struct nlattr * uninitialized_var(name); u8 genmask = nft_genmask_next(net); int family = nfmsg->nfgen_family; - struct nft_af_info *afi; struct nft_table *table; struct nft_chain *chain; u8 policy = NF_ACCEPT; @@ -1549,11 +1514,8 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk, create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false; - afi = nf_tables_afinfo_lookup(net, family, true); - if (IS_ERR(afi)) - return PTR_ERR(afi); - - table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE], genmask); + table = nf_tables_table_lookup(net, nla[NFTA_CHAIN_TABLE], family, + genmask); if (IS_ERR(table)) return PTR_ERR(table); @@ -1593,7 +1555,7 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk, } } - nft_ctx_init(&ctx, net, skb, nlh, afi, table, chain, nla); + nft_ctx_init(&ctx, net, skb, nlh, family, table, chain, nla); if (chain != NULL) { if (nlh->nlmsg_flags & NLM_F_EXCL) @@ -1614,24 +1576,26 @@ static int nf_tables_delchain(struct net *net, struct sock *nlsk, { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); u8 genmask = nft_genmask_next(net); - struct nft_af_info *afi; struct nft_table *table; struct nft_chain *chain; struct nft_rule *rule; int family = nfmsg->nfgen_family; struct nft_ctx ctx; + u64 handle; u32 use; int err; - afi = nf_tables_afinfo_lookup(net, family, false); - if (IS_ERR(afi)) - return PTR_ERR(afi); - - table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE], genmask); + table = nf_tables_table_lookup(net, nla[NFTA_CHAIN_TABLE], family, + genmask); if (IS_ERR(table)) return PTR_ERR(table); - chain = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME], genmask); + if (nla[NFTA_CHAIN_HANDLE]) { + handle = be64_to_cpu(nla_get_be64(nla[NFTA_CHAIN_HANDLE])); + chain = nf_tables_chain_lookup_byhandle(table, handle, genmask); + } else { + chain = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME], genmask); + } if (IS_ERR(chain)) return PTR_ERR(chain); @@ -1639,7 +1603,7 @@ static int nf_tables_delchain(struct net *net, struct sock *nlsk, chain->use > 0) return -EBUSY; - nft_ctx_init(&ctx, net, skb, nlh, afi, table, chain, nla); + nft_ctx_init(&ctx, net, skb, nlh, family, table, chain, nla); use = chain->use; list_for_each_entry(rule, &chain->rules, list) { @@ -1804,7 +1768,7 @@ static int nf_tables_expr_parse(const struct nft_ctx *ctx, if (err < 0) return err; - type = nft_expr_type_get(ctx->afi->family, tb[NFTA_EXPR_NAME]); + type = nft_expr_type_get(ctx->family, tb[NFTA_EXPR_NAME]); if (IS_ERR(type)) return PTR_ERR(type); @@ -2027,7 +1991,7 @@ static void nf_tables_rule_notify(const struct nft_ctx *ctx, goto err; err = nf_tables_fill_rule_info(skb, ctx->net, ctx->portid, ctx->seq, - event, 0, ctx->afi->family, ctx->table, + event, 0, ctx->family, ctx->table, ctx->chain, rule); if (err < 0) { kfree_skb(skb); @@ -2051,7 +2015,6 @@ static int nf_tables_dump_rules(struct sk_buff *skb, { const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); const struct nft_rule_dump_ctx *ctx = cb->data; - const struct nft_af_info *afi; const struct nft_table *table; const struct nft_chain *chain; const struct nft_rule *rule; @@ -2062,39 +2025,37 @@ static int nf_tables_dump_rules(struct sk_buff *skb, rcu_read_lock(); cb->seq = net->nft.base_seq; - list_for_each_entry_rcu(afi, &net->nft.af_info, list) { - if (family != NFPROTO_UNSPEC && family != afi->family) + list_for_each_entry_rcu(table, &net->nft.tables, list) { + if (family != NFPROTO_UNSPEC && family != table->family) continue; - list_for_each_entry_rcu(table, &afi->tables, list) { - if (ctx && ctx->table && - strcmp(ctx->table, table->name) != 0) + if (ctx && ctx->table && strcmp(ctx->table, table->name) != 0) + continue; + + list_for_each_entry_rcu(chain, &table->chains, list) { + if (ctx && ctx->chain && + strcmp(ctx->chain, chain->name) != 0) continue; - list_for_each_entry_rcu(chain, &table->chains, list) { - if (ctx && ctx->chain && - strcmp(ctx->chain, chain->name) != 0) - continue; - - list_for_each_entry_rcu(rule, &chain->rules, list) { - if (!nft_is_active(net, rule)) - goto cont; - if (idx < s_idx) - goto cont; - if (idx > s_idx) - memset(&cb->args[1], 0, - sizeof(cb->args) - sizeof(cb->args[0])); - if (nf_tables_fill_rule_info(skb, net, NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, - NFT_MSG_NEWRULE, - NLM_F_MULTI | NLM_F_APPEND, - afi->family, table, chain, rule) < 0) - goto done; - - nl_dump_check_consistent(cb, nlmsg_hdr(skb)); + list_for_each_entry_rcu(rule, &chain->rules, list) { + if (!nft_is_active(net, rule)) + goto cont; + if (idx < s_idx) + goto cont; + if (idx > s_idx) + memset(&cb->args[1], 0, + sizeof(cb->args) - sizeof(cb->args[0])); + if (nf_tables_fill_rule_info(skb, net, NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + NFT_MSG_NEWRULE, + NLM_F_MULTI | NLM_F_APPEND, + table->family, + table, chain, rule) < 0) + goto done; + + nl_dump_check_consistent(cb, nlmsg_hdr(skb)); cont: - idx++; - } + idx++; } } } @@ -2124,7 +2085,6 @@ static int nf_tables_getrule(struct net *net, struct sock *nlsk, { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); u8 genmask = nft_genmask_cur(net); - const struct nft_af_info *afi; const struct nft_table *table; const struct nft_chain *chain; const struct nft_rule *rule; @@ -2168,11 +2128,8 @@ static int nf_tables_getrule(struct net *net, struct sock *nlsk, return netlink_dump_start(nlsk, skb, nlh, &c); } - afi = nf_tables_afinfo_lookup(net, family, false); - if (IS_ERR(afi)) - return PTR_ERR(afi); - - table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE], genmask); + table = nf_tables_table_lookup(net, nla[NFTA_RULE_TABLE], family, + genmask); if (IS_ERR(table)) return PTR_ERR(table); @@ -2229,7 +2186,7 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk, { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); u8 genmask = nft_genmask_next(net); - struct nft_af_info *afi; + int family = nfmsg->nfgen_family; struct nft_table *table; struct nft_chain *chain; struct nft_rule *rule, *old_rule = NULL; @@ -2245,11 +2202,8 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk, create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false; - afi = nf_tables_afinfo_lookup(net, nfmsg->nfgen_family, create); - if (IS_ERR(afi)) - return PTR_ERR(afi); - - table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE], genmask); + table = nf_tables_table_lookup(net, nla[NFTA_RULE_TABLE], family, + genmask); if (IS_ERR(table)) return PTR_ERR(table); @@ -2288,7 +2242,7 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk, return PTR_ERR(old_rule); } - nft_ctx_init(&ctx, net, skb, nlh, afi, table, chain, nla); + nft_ctx_init(&ctx, net, skb, nlh, family, table, chain, nla); n = 0; size = 0; @@ -2412,18 +2366,14 @@ static int nf_tables_delrule(struct net *net, struct sock *nlsk, { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); u8 genmask = nft_genmask_next(net); - struct nft_af_info *afi; struct nft_table *table; struct nft_chain *chain = NULL; struct nft_rule *rule; int family = nfmsg->nfgen_family, err = 0; struct nft_ctx ctx; - afi = nf_tables_afinfo_lookup(net, family, false); - if (IS_ERR(afi)) - return PTR_ERR(afi); - - table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE], genmask); + table = nf_tables_table_lookup(net, nla[NFTA_RULE_TABLE], family, + genmask); if (IS_ERR(table)) return PTR_ERR(table); @@ -2434,7 +2384,7 @@ static int nf_tables_delrule(struct net *net, struct sock *nlsk, return PTR_ERR(chain); } - nft_ctx_init(&ctx, net, skb, nlh, afi, table, chain, nla); + nft_ctx_init(&ctx, net, skb, nlh, family, table, chain, nla); if (chain) { if (nla[NFTA_RULE_HANDLE]) { @@ -2601,6 +2551,7 @@ static const struct nla_policy nft_set_policy[NFTA_SET_MAX + 1] = { [NFTA_SET_USERDATA] = { .type = NLA_BINARY, .len = NFT_USERDATA_MAXLEN }, [NFTA_SET_OBJ_TYPE] = { .type = NLA_U32 }, + [NFTA_SET_HANDLE] = { .type = NLA_U64 }, }; static const struct nla_policy nft_set_desc_policy[NFTA_SET_DESC_MAX + 1] = { @@ -2614,26 +2565,17 @@ static int nft_ctx_init_from_setattr(struct nft_ctx *ctx, struct net *net, u8 genmask) { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); - struct nft_af_info *afi = NULL; + int family = nfmsg->nfgen_family; struct nft_table *table = NULL; - if (nfmsg->nfgen_family != NFPROTO_UNSPEC) { - afi = nf_tables_afinfo_lookup(net, nfmsg->nfgen_family, false); - if (IS_ERR(afi)) - return PTR_ERR(afi); - } - if (nla[NFTA_SET_TABLE] != NULL) { - if (afi == NULL) - return -EAFNOSUPPORT; - - table = nf_tables_table_lookup(afi, nla[NFTA_SET_TABLE], - genmask); + table = nf_tables_table_lookup(net, nla[NFTA_SET_TABLE], + family, genmask); if (IS_ERR(table)) return PTR_ERR(table); } - nft_ctx_init(ctx, net, skb, nlh, afi, table, NULL, nla); + nft_ctx_init(ctx, net, skb, nlh, family, table, NULL, nla); return 0; } @@ -2653,6 +2595,22 @@ static struct nft_set *nf_tables_set_lookup(const struct nft_table *table, return ERR_PTR(-ENOENT); } +static struct nft_set *nf_tables_set_lookup_byhandle(const struct nft_table *table, + const struct nlattr *nla, u8 genmask) +{ + struct nft_set *set; + + if (nla == NULL) + return ERR_PTR(-EINVAL); + + list_for_each_entry(set, &table->sets, list) { + if (be64_to_cpu(nla_get_be64(nla)) == set->handle && + nft_active_genmask(set, genmask)) + return set; + } + return ERR_PTR(-ENOENT); +} + static struct nft_set *nf_tables_set_lookup_byid(const struct net *net, const struct nlattr *nla, u8 genmask) @@ -2760,7 +2718,7 @@ static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx, goto nla_put_failure; nfmsg = nlmsg_data(nlh); - nfmsg->nfgen_family = ctx->afi->family; + nfmsg->nfgen_family = ctx->family; nfmsg->version = NFNETLINK_V0; nfmsg->res_id = htons(ctx->net->nft.base_seq & 0xffff); @@ -2768,6 +2726,9 @@ static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx, goto nla_put_failure; if (nla_put_string(skb, NFTA_SET_NAME, set->name)) goto nla_put_failure; + if (nla_put_be64(skb, NFTA_SET_HANDLE, cpu_to_be64(set->handle), + NFTA_SET_PAD)) + goto nla_put_failure; if (set->flags != 0) if (nla_put_be32(skb, NFTA_SET_FLAGS, htonl(set->flags))) goto nla_put_failure; @@ -2852,10 +2813,8 @@ static int nf_tables_dump_sets(struct sk_buff *skb, struct netlink_callback *cb) { const struct nft_set *set; unsigned int idx, s_idx = cb->args[0]; - struct nft_af_info *afi; struct nft_table *table, *cur_table = (struct nft_table *)cb->args[2]; struct net *net = sock_net(skb->sk); - int cur_family = cb->args[3]; struct nft_ctx *ctx = cb->data, ctx_set; if (cb->args[1]) @@ -2864,51 +2823,44 @@ static int nf_tables_dump_sets(struct sk_buff *skb, struct netlink_callback *cb) rcu_read_lock(); cb->seq = net->nft.base_seq; - list_for_each_entry_rcu(afi, &net->nft.af_info, list) { - if (ctx->afi && ctx->afi != afi) + list_for_each_entry_rcu(table, &net->nft.tables, list) { + if (ctx->family != NFPROTO_UNSPEC && + ctx->family != table->family) continue; - if (cur_family) { - if (afi->family != cur_family) + if (ctx->table && ctx->table != table) + continue; + + if (cur_table) { + if (cur_table != table) continue; - cur_family = 0; + cur_table = NULL; } - list_for_each_entry_rcu(table, &afi->tables, list) { - if (ctx->table && ctx->table != table) - continue; + idx = 0; + list_for_each_entry_rcu(set, &table->sets, list) { + if (idx < s_idx) + goto cont; + if (!nft_is_active(net, set)) + goto cont; - if (cur_table) { - if (cur_table != table) - continue; + ctx_set = *ctx; + ctx_set.table = table; + ctx_set.family = table->family; - cur_table = NULL; + if (nf_tables_fill_set(skb, &ctx_set, set, + NFT_MSG_NEWSET, + NLM_F_MULTI) < 0) { + cb->args[0] = idx; + cb->args[2] = (unsigned long) table; + goto done; } - idx = 0; - list_for_each_entry_rcu(set, &table->sets, list) { - if (idx < s_idx) - goto cont; - if (!nft_is_active(net, set)) - goto cont; - - ctx_set = *ctx; - ctx_set.table = table; - ctx_set.afi = afi; - if (nf_tables_fill_set(skb, &ctx_set, set, - NFT_MSG_NEWSET, - NLM_F_MULTI) < 0) { - cb->args[0] = idx; - cb->args[2] = (unsigned long) table; - cb->args[3] = afi->family; - goto done; - } - nl_dump_check_consistent(cb, nlmsg_hdr(skb)); + nl_dump_check_consistent(cb, nlmsg_hdr(skb)); cont: - idx++; - } - if (s_idx) - s_idx = 0; + idx++; } + if (s_idx) + s_idx = 0; } cb->args[1] = 1; done: @@ -3006,8 +2958,8 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk, { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); u8 genmask = nft_genmask_next(net); + int family = nfmsg->nfgen_family; const struct nft_set_ops *ops; - struct nft_af_info *afi; struct nft_table *table; struct nft_set *set; struct nft_ctx ctx; @@ -3114,15 +3066,12 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk, create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false; - afi = nf_tables_afinfo_lookup(net, nfmsg->nfgen_family, create); - if (IS_ERR(afi)) - return PTR_ERR(afi); - - table = nf_tables_table_lookup(afi, nla[NFTA_SET_TABLE], genmask); + table = nf_tables_table_lookup(net, nla[NFTA_SET_TABLE], family, + genmask); if (IS_ERR(table)) return PTR_ERR(table); - nft_ctx_init(&ctx, net, skb, nlh, afi, table, NULL, nla); + nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla); set = nf_tables_set_lookup(table, nla[NFTA_SET_NAME], genmask); if (IS_ERR(set)) { @@ -3188,6 +3137,7 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk, set->udata = udata; set->timeout = timeout; set->gc_int = gc_int; + set->handle = nf_tables_alloc_handle(table); err = ops->init(set, &desc, nla); if (err < 0) @@ -3245,7 +3195,10 @@ static int nf_tables_delset(struct net *net, struct sock *nlsk, if (err < 0) return err; - set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME], genmask); + if (nla[NFTA_SET_HANDLE]) + set = nf_tables_set_lookup_byhandle(ctx.table, nla[NFTA_SET_HANDLE], genmask); + else + set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME], genmask); if (IS_ERR(set)) return PTR_ERR(set); @@ -3277,7 +3230,7 @@ int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set, struct nft_set_binding *i; struct nft_set_iter iter; - if (!list_empty(&set->bindings) && set->flags & NFT_SET_ANONYMOUS) + if (!list_empty(&set->bindings) && nft_set_is_anonymous(set)) return -EBUSY; if (binding->flags & NFT_SET_MAP) { @@ -3312,7 +3265,7 @@ void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set, { list_del_rcu(&binding->list); - if (list_empty(&set->bindings) && set->flags & NFT_SET_ANONYMOUS && + if (list_empty(&set->bindings) && nft_set_is_anonymous(set) && nft_is_active(ctx->net, set)) nf_tables_set_destroy(ctx, set); } @@ -3380,19 +3333,15 @@ static int nft_ctx_init_from_elemattr(struct nft_ctx *ctx, struct net *net, u8 genmask) { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); - struct nft_af_info *afi; + int family = nfmsg->nfgen_family; struct nft_table *table; - afi = nf_tables_afinfo_lookup(net, nfmsg->nfgen_family, false); - if (IS_ERR(afi)) - return PTR_ERR(afi); - - table = nf_tables_table_lookup(afi, nla[NFTA_SET_ELEM_LIST_TABLE], - genmask); + table = nf_tables_table_lookup(net, nla[NFTA_SET_ELEM_LIST_TABLE], + family, genmask); if (IS_ERR(table)) return PTR_ERR(table); - nft_ctx_init(ctx, net, skb, nlh, afi, table, NULL, nla); + nft_ctx_init(ctx, net, skb, nlh, family, table, NULL, nla); return 0; } @@ -3497,7 +3446,6 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb) { struct nft_set_dump_ctx *dump_ctx = cb->data; struct net *net = sock_net(skb->sk); - struct nft_af_info *afi; struct nft_table *table; struct nft_set *set; struct nft_set_dump_args args; @@ -3509,21 +3457,19 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb) int event; rcu_read_lock(); - list_for_each_entry_rcu(afi, &net->nft.af_info, list) { - if (afi != dump_ctx->ctx.afi) + list_for_each_entry_rcu(table, &net->nft.tables, list) { + if (dump_ctx->ctx.family != NFPROTO_UNSPEC && + dump_ctx->ctx.family != table->family) continue; - list_for_each_entry_rcu(table, &afi->tables, list) { - if (table != dump_ctx->ctx.table) - continue; + if (table != dump_ctx->ctx.table) + continue; - list_for_each_entry_rcu(set, &table->sets, list) { - if (set == dump_ctx->set) { - set_found = true; - break; - } + list_for_each_entry_rcu(set, &table->sets, list) { + if (set == dump_ctx->set) { + set_found = true; + break; } - break; } break; } @@ -3543,7 +3489,7 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb) goto nla_put_failure; nfmsg = nlmsg_data(nlh); - nfmsg->nfgen_family = afi->family; + nfmsg->nfgen_family = table->family; nfmsg->version = NFNETLINK_V0; nfmsg->res_id = htons(net->nft.base_seq & 0xffff); @@ -3606,7 +3552,7 @@ static int nf_tables_fill_setelem_info(struct sk_buff *skb, goto nla_put_failure; nfmsg = nlmsg_data(nlh); - nfmsg->nfgen_family = ctx->afi->family; + nfmsg->nfgen_family = ctx->family; nfmsg->version = NFNETLINK_V0; nfmsg->res_id = htons(ctx->net->nft.base_seq & 0xffff); @@ -3963,7 +3909,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, list_for_each_entry(binding, &set->bindings, list) { struct nft_ctx bind_ctx = { .net = ctx->net, - .afi = ctx->afi, + .family = ctx->family, .table = ctx->table, .chain = (struct nft_chain *)binding->chain, }; @@ -4382,6 +4328,21 @@ struct nft_object *nf_tables_obj_lookup(const struct nft_table *table, } EXPORT_SYMBOL_GPL(nf_tables_obj_lookup); +struct nft_object *nf_tables_obj_lookup_byhandle(const struct nft_table *table, + const struct nlattr *nla, + u32 objtype, u8 genmask) +{ + struct nft_object *obj; + + list_for_each_entry(obj, &table->objects, list) { + if (be64_to_cpu(nla_get_be64(nla)) == obj->handle && + objtype == obj->ops->type->type && + nft_active_genmask(obj, genmask)) + return obj; + } + return ERR_PTR(-ENOENT); +} + static const struct nla_policy nft_obj_policy[NFTA_OBJ_MAX + 1] = { [NFTA_OBJ_TABLE] = { .type = NLA_STRING, .len = NFT_TABLE_MAXNAMELEN - 1 }, @@ -4389,6 +4350,7 @@ static const struct nla_policy nft_obj_policy[NFTA_OBJ_MAX + 1] = { .len = NFT_OBJ_MAXNAMELEN - 1 }, [NFTA_OBJ_TYPE] = { .type = NLA_U32 }, [NFTA_OBJ_DATA] = { .type = NLA_NESTED }, + [NFTA_OBJ_HANDLE] = { .type = NLA_U64}, }; static struct nft_object *nft_obj_init(const struct nft_ctx *ctx, @@ -4494,7 +4456,6 @@ static int nf_tables_newobj(struct net *net, struct sock *nlsk, const struct nft_object_type *type; u8 genmask = nft_genmask_next(net); int family = nfmsg->nfgen_family; - struct nft_af_info *afi; struct nft_table *table; struct nft_object *obj; struct nft_ctx ctx; @@ -4506,11 +4467,8 @@ static int nf_tables_newobj(struct net *net, struct sock *nlsk, !nla[NFTA_OBJ_DATA]) return -EINVAL; - afi = nf_tables_afinfo_lookup(net, family, true); - if (IS_ERR(afi)) - return PTR_ERR(afi); - - table = nf_tables_table_lookup(afi, nla[NFTA_OBJ_TABLE], genmask); + table = nf_tables_table_lookup(net, nla[NFTA_OBJ_TABLE], family, + genmask); if (IS_ERR(table)) return PTR_ERR(table); @@ -4528,7 +4486,7 @@ static int nf_tables_newobj(struct net *net, struct sock *nlsk, return 0; } - nft_ctx_init(&ctx, net, skb, nlh, afi, table, NULL, nla); + nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla); type = nft_obj_type_get(objtype); if (IS_ERR(type)) @@ -4540,6 +4498,8 @@ static int nf_tables_newobj(struct net *net, struct sock *nlsk, goto err1; } obj->table = table; + obj->handle = nf_tables_alloc_handle(table); + obj->name = nla_strdup(nla[NFTA_OBJ_NAME], GFP_KERNEL); if (!obj->name) { err = -ENOMEM; @@ -4586,7 +4546,9 @@ static int nf_tables_fill_obj_info(struct sk_buff *skb, struct net *net, nla_put_string(skb, NFTA_OBJ_NAME, obj->name) || nla_put_be32(skb, NFTA_OBJ_TYPE, htonl(obj->ops->type->type)) || nla_put_be32(skb, NFTA_OBJ_USE, htonl(obj->use)) || - nft_object_dump(skb, NFTA_OBJ_DATA, obj, reset)) + nft_object_dump(skb, NFTA_OBJ_DATA, obj, reset) || + nla_put_be64(skb, NFTA_OBJ_HANDLE, cpu_to_be64(obj->handle), + NFTA_OBJ_PAD)) goto nla_put_failure; nlmsg_end(skb, nlh); @@ -4605,7 +4567,6 @@ struct nft_obj_filter { static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb) { const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); - const struct nft_af_info *afi; const struct nft_table *table; unsigned int idx = 0, s_idx = cb->args[0]; struct nft_obj_filter *filter = cb->data; @@ -4620,38 +4581,37 @@ static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb) rcu_read_lock(); cb->seq = net->nft.base_seq; - list_for_each_entry_rcu(afi, &net->nft.af_info, list) { - if (family != NFPROTO_UNSPEC && family != afi->family) + list_for_each_entry_rcu(table, &net->nft.tables, list) { + if (family != NFPROTO_UNSPEC && family != table->family) continue; - list_for_each_entry_rcu(table, &afi->tables, list) { - list_for_each_entry_rcu(obj, &table->objects, list) { - if (!nft_is_active(net, obj)) - goto cont; - if (idx < s_idx) - goto cont; - if (idx > s_idx) - memset(&cb->args[1], 0, - sizeof(cb->args) - sizeof(cb->args[0])); - if (filter && filter->table[0] && - strcmp(filter->table, table->name)) - goto cont; - if (filter && - filter->type != NFT_OBJECT_UNSPEC && - obj->ops->type->type != filter->type) - goto cont; + list_for_each_entry_rcu(obj, &table->objects, list) { + if (!nft_is_active(net, obj)) + goto cont; + if (idx < s_idx) + goto cont; + if (idx > s_idx) + memset(&cb->args[1], 0, + sizeof(cb->args) - sizeof(cb->args[0])); + if (filter && filter->table[0] && + strcmp(filter->table, table->name)) + goto cont; + if (filter && + filter->type != NFT_OBJECT_UNSPEC && + obj->ops->type->type != filter->type) + goto cont; - if (nf_tables_fill_obj_info(skb, net, NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, - NFT_MSG_NEWOBJ, - NLM_F_MULTI | NLM_F_APPEND, - afi->family, table, obj, reset) < 0) - goto done; + if (nf_tables_fill_obj_info(skb, net, NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + NFT_MSG_NEWOBJ, + NLM_F_MULTI | NLM_F_APPEND, + table->family, table, + obj, reset) < 0) + goto done; - nl_dump_check_consistent(cb, nlmsg_hdr(skb)); + nl_dump_check_consistent(cb, nlmsg_hdr(skb)); cont: - idx++; - } + idx++; } } done: @@ -4703,7 +4663,6 @@ static int nf_tables_getobj(struct net *net, struct sock *nlsk, const struct nfgenmsg *nfmsg = nlmsg_data(nlh); u8 genmask = nft_genmask_cur(net); int family = nfmsg->nfgen_family; - const struct nft_af_info *afi; const struct nft_table *table; struct nft_object *obj; struct sk_buff *skb2; @@ -4734,11 +4693,8 @@ static int nf_tables_getobj(struct net *net, struct sock *nlsk, !nla[NFTA_OBJ_TYPE]) return -EINVAL; - afi = nf_tables_afinfo_lookup(net, family, false); - if (IS_ERR(afi)) - return PTR_ERR(afi); - - table = nf_tables_table_lookup(afi, nla[NFTA_OBJ_TABLE], genmask); + table = nf_tables_table_lookup(net, nla[NFTA_OBJ_TABLE], family, + genmask); if (IS_ERR(table)) return PTR_ERR(table); @@ -4784,32 +4740,33 @@ static int nf_tables_delobj(struct net *net, struct sock *nlsk, const struct nfgenmsg *nfmsg = nlmsg_data(nlh); u8 genmask = nft_genmask_next(net); int family = nfmsg->nfgen_family; - struct nft_af_info *afi; struct nft_table *table; struct nft_object *obj; struct nft_ctx ctx; u32 objtype; if (!nla[NFTA_OBJ_TYPE] || - !nla[NFTA_OBJ_NAME]) + (!nla[NFTA_OBJ_NAME] && !nla[NFTA_OBJ_HANDLE])) return -EINVAL; - afi = nf_tables_afinfo_lookup(net, family, true); - if (IS_ERR(afi)) - return PTR_ERR(afi); - - table = nf_tables_table_lookup(afi, nla[NFTA_OBJ_TABLE], genmask); + table = nf_tables_table_lookup(net, nla[NFTA_OBJ_TABLE], family, + genmask); if (IS_ERR(table)) return PTR_ERR(table); objtype = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE])); - obj = nf_tables_obj_lookup(table, nla[NFTA_OBJ_NAME], objtype, genmask); + if (nla[NFTA_OBJ_HANDLE]) + obj = nf_tables_obj_lookup_byhandle(table, nla[NFTA_OBJ_HANDLE], + objtype, genmask); + else + obj = nf_tables_obj_lookup(table, nla[NFTA_OBJ_NAME], + objtype, genmask); if (IS_ERR(obj)) return PTR_ERR(obj); if (obj->use > 0) return -EBUSY; - nft_ctx_init(&ctx, net, skb, nlh, afi, table, NULL, nla); + nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla); return nft_delobj(&ctx, obj); } @@ -4847,7 +4804,613 @@ static void nf_tables_obj_notify(const struct nft_ctx *ctx, struct nft_object *obj, int event) { nft_obj_notify(ctx->net, ctx->table, obj, ctx->portid, ctx->seq, event, - ctx->afi->family, ctx->report, GFP_KERNEL); + ctx->family, ctx->report, GFP_KERNEL); +} + +/* + * Flow tables + */ +void nft_register_flowtable_type(struct nf_flowtable_type *type) +{ + nfnl_lock(NFNL_SUBSYS_NFTABLES); + list_add_tail_rcu(&type->list, &nf_tables_flowtables); + nfnl_unlock(NFNL_SUBSYS_NFTABLES); +} +EXPORT_SYMBOL_GPL(nft_register_flowtable_type); + +void nft_unregister_flowtable_type(struct nf_flowtable_type *type) +{ + nfnl_lock(NFNL_SUBSYS_NFTABLES); + list_del_rcu(&type->list); + nfnl_unlock(NFNL_SUBSYS_NFTABLES); +} +EXPORT_SYMBOL_GPL(nft_unregister_flowtable_type); + +static const struct nla_policy nft_flowtable_policy[NFTA_FLOWTABLE_MAX + 1] = { + [NFTA_FLOWTABLE_TABLE] = { .type = NLA_STRING, + .len = NFT_NAME_MAXLEN - 1 }, + [NFTA_FLOWTABLE_NAME] = { .type = NLA_STRING, + .len = NFT_NAME_MAXLEN - 1 }, + [NFTA_FLOWTABLE_HOOK] = { .type = NLA_NESTED }, + [NFTA_FLOWTABLE_HANDLE] = { .type = NLA_U64 }, +}; + +struct nft_flowtable *nf_tables_flowtable_lookup(const struct nft_table *table, + const struct nlattr *nla, + u8 genmask) +{ + struct nft_flowtable *flowtable; + + list_for_each_entry(flowtable, &table->flowtables, list) { + if (!nla_strcmp(nla, flowtable->name) && + nft_active_genmask(flowtable, genmask)) + return flowtable; + } + return ERR_PTR(-ENOENT); +} +EXPORT_SYMBOL_GPL(nf_tables_flowtable_lookup); + +struct nft_flowtable * +nf_tables_flowtable_lookup_byhandle(const struct nft_table *table, + const struct nlattr *nla, u8 genmask) +{ + struct nft_flowtable *flowtable; + + list_for_each_entry(flowtable, &table->flowtables, list) { + if (be64_to_cpu(nla_get_be64(nla)) == flowtable->handle && + nft_active_genmask(flowtable, genmask)) + return flowtable; + } + return ERR_PTR(-ENOENT); +} + +#define NFT_FLOWTABLE_DEVICE_MAX 8 + +static int nf_tables_parse_devices(const struct nft_ctx *ctx, + const struct nlattr *attr, + struct net_device *dev_array[], int *len) +{ + const struct nlattr *tmp; + struct net_device *dev; + char ifname[IFNAMSIZ]; + int rem, n = 0, err; + + nla_for_each_nested(tmp, attr, rem) { + if (nla_type(tmp) != NFTA_DEVICE_NAME) { + err = -EINVAL; + goto err1; + } + + nla_strlcpy(ifname, tmp, IFNAMSIZ); + dev = dev_get_by_name(ctx->net, ifname); + if (!dev) { + err = -ENOENT; + goto err1; + } + + dev_array[n++] = dev; + if (n == NFT_FLOWTABLE_DEVICE_MAX) { + err = -EFBIG; + goto err1; + } + } + if (!len) + return -EINVAL; + + err = 0; +err1: + *len = n; + return err; +} + +static const struct nla_policy nft_flowtable_hook_policy[NFTA_FLOWTABLE_HOOK_MAX + 1] = { + [NFTA_FLOWTABLE_HOOK_NUM] = { .type = NLA_U32 }, + [NFTA_FLOWTABLE_HOOK_PRIORITY] = { .type = NLA_U32 }, + [NFTA_FLOWTABLE_HOOK_DEVS] = { .type = NLA_NESTED }, +}; + +static int nf_tables_flowtable_parse_hook(const struct nft_ctx *ctx, + const struct nlattr *attr, + struct nft_flowtable *flowtable) +{ + struct net_device *dev_array[NFT_FLOWTABLE_DEVICE_MAX]; + struct nlattr *tb[NFTA_FLOWTABLE_HOOK_MAX + 1]; + struct nf_hook_ops *ops; + int hooknum, priority; + int err, n = 0, i; + + err = nla_parse_nested(tb, NFTA_FLOWTABLE_HOOK_MAX, attr, + nft_flowtable_hook_policy, NULL); + if (err < 0) + return err; + + if (!tb[NFTA_FLOWTABLE_HOOK_NUM] || + !tb[NFTA_FLOWTABLE_HOOK_PRIORITY] || + !tb[NFTA_FLOWTABLE_HOOK_DEVS]) + return -EINVAL; + + hooknum = ntohl(nla_get_be32(tb[NFTA_FLOWTABLE_HOOK_NUM])); + if (hooknum != NF_NETDEV_INGRESS) + return -EINVAL; + + priority = ntohl(nla_get_be32(tb[NFTA_FLOWTABLE_HOOK_PRIORITY])); + + err = nf_tables_parse_devices(ctx, tb[NFTA_FLOWTABLE_HOOK_DEVS], + dev_array, &n); + if (err < 0) + goto err1; + + ops = kzalloc(sizeof(struct nf_hook_ops) * n, GFP_KERNEL); + if (!ops) { + err = -ENOMEM; + goto err1; + } + + flowtable->hooknum = hooknum; + flowtable->priority = priority; + flowtable->ops = ops; + flowtable->ops_len = n; + + for (i = 0; i < n; i++) { + flowtable->ops[i].pf = NFPROTO_NETDEV; + flowtable->ops[i].hooknum = hooknum; + flowtable->ops[i].priority = priority; + flowtable->ops[i].priv = &flowtable->data.rhashtable; + flowtable->ops[i].hook = flowtable->data.type->hook; + flowtable->ops[i].dev = dev_array[i]; + } + + err = 0; +err1: + for (i = 0; i < n; i++) + dev_put(dev_array[i]); + + return err; +} + +static const struct nf_flowtable_type *__nft_flowtable_type_get(u8 family) +{ + const struct nf_flowtable_type *type; + + list_for_each_entry(type, &nf_tables_flowtables, list) { + if (family == type->family) + return type; + } + return NULL; +} + +static const struct nf_flowtable_type *nft_flowtable_type_get(u8 family) +{ + const struct nf_flowtable_type *type; + + type = __nft_flowtable_type_get(family); + if (type != NULL && try_module_get(type->owner)) + return type; + +#ifdef CONFIG_MODULES + if (type == NULL) { + nfnl_unlock(NFNL_SUBSYS_NFTABLES); + request_module("nf-flowtable-%u", family); + nfnl_lock(NFNL_SUBSYS_NFTABLES); + if (__nft_flowtable_type_get(family)) + return ERR_PTR(-EAGAIN); + } +#endif + return ERR_PTR(-ENOENT); +} + +void nft_flow_table_iterate(struct net *net, + void (*iter)(struct nf_flowtable *flowtable, void *data), + void *data) +{ + struct nft_flowtable *flowtable; + const struct nft_table *table; + + rcu_read_lock(); + list_for_each_entry_rcu(table, &net->nft.tables, list) { + list_for_each_entry_rcu(flowtable, &table->flowtables, list) { + iter(&flowtable->data, data); + } + } + rcu_read_unlock(); +} +EXPORT_SYMBOL_GPL(nft_flow_table_iterate); + +static void nft_unregister_flowtable_net_hooks(struct net *net, + struct nft_flowtable *flowtable) +{ + int i; + + for (i = 0; i < flowtable->ops_len; i++) { + if (!flowtable->ops[i].dev) + continue; + + nf_unregister_net_hook(net, &flowtable->ops[i]); + } +} + +static int nf_tables_newflowtable(struct net *net, struct sock *nlsk, + struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const nla[], + struct netlink_ext_ack *extack) +{ + const struct nfgenmsg *nfmsg = nlmsg_data(nlh); + const struct nf_flowtable_type *type; + u8 genmask = nft_genmask_next(net); + int family = nfmsg->nfgen_family; + struct nft_flowtable *flowtable; + struct nft_table *table; + struct nft_ctx ctx; + int err, i, k; + + if (!nla[NFTA_FLOWTABLE_TABLE] || + !nla[NFTA_FLOWTABLE_NAME] || + !nla[NFTA_FLOWTABLE_HOOK]) + return -EINVAL; + + table = nf_tables_table_lookup(net, nla[NFTA_FLOWTABLE_TABLE], + family, genmask); + if (IS_ERR(table)) + return PTR_ERR(table); + + flowtable = nf_tables_flowtable_lookup(table, nla[NFTA_FLOWTABLE_NAME], + genmask); + if (IS_ERR(flowtable)) { + err = PTR_ERR(flowtable); + if (err != -ENOENT) + return err; + } else { + if (nlh->nlmsg_flags & NLM_F_EXCL) + return -EEXIST; + + return 0; + } + + nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla); + + flowtable = kzalloc(sizeof(*flowtable), GFP_KERNEL); + if (!flowtable) + return -ENOMEM; + + flowtable->table = table; + flowtable->handle = nf_tables_alloc_handle(table); + + flowtable->name = nla_strdup(nla[NFTA_FLOWTABLE_NAME], GFP_KERNEL); + if (!flowtable->name) { + err = -ENOMEM; + goto err1; + } + + type = nft_flowtable_type_get(family); + if (IS_ERR(type)) { + err = PTR_ERR(type); + goto err2; + } + + flowtable->data.type = type; + err = rhashtable_init(&flowtable->data.rhashtable, type->params); + if (err < 0) + goto err3; + + err = nf_tables_flowtable_parse_hook(&ctx, nla[NFTA_FLOWTABLE_HOOK], + flowtable); + if (err < 0) + goto err3; + + for (i = 0; i < flowtable->ops_len; i++) { + err = nf_register_net_hook(net, &flowtable->ops[i]); + if (err < 0) + goto err4; + } + + err = nft_trans_flowtable_add(&ctx, NFT_MSG_NEWFLOWTABLE, flowtable); + if (err < 0) + goto err5; + + INIT_DEFERRABLE_WORK(&flowtable->data.gc_work, type->gc); + queue_delayed_work(system_power_efficient_wq, + &flowtable->data.gc_work, HZ); + + list_add_tail_rcu(&flowtable->list, &table->flowtables); + table->use++; + + return 0; +err5: + i = flowtable->ops_len; +err4: + for (k = i - 1; k >= 0; k--) + nf_unregister_net_hook(net, &flowtable->ops[i]); + + kfree(flowtable->ops); +err3: + module_put(type->owner); +err2: + kfree(flowtable->name); +err1: + kfree(flowtable); + return err; +} + +static int nf_tables_delflowtable(struct net *net, struct sock *nlsk, + struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const nla[], + struct netlink_ext_ack *extack) +{ + const struct nfgenmsg *nfmsg = nlmsg_data(nlh); + u8 genmask = nft_genmask_next(net); + int family = nfmsg->nfgen_family; + struct nft_flowtable *flowtable; + struct nft_table *table; + struct nft_ctx ctx; + + table = nf_tables_table_lookup(net, nla[NFTA_FLOWTABLE_TABLE], + family, genmask); + if (IS_ERR(table)) + return PTR_ERR(table); + + if (nla[NFTA_FLOWTABLE_HANDLE]) + flowtable = nf_tables_flowtable_lookup_byhandle(table, + nla[NFTA_FLOWTABLE_HANDLE], + genmask); + else + flowtable = nf_tables_flowtable_lookup(table, + nla[NFTA_FLOWTABLE_NAME], + genmask); + if (IS_ERR(flowtable)) + return PTR_ERR(flowtable); + if (flowtable->use > 0) + return -EBUSY; + + nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla); + + return nft_delflowtable(&ctx, flowtable); +} + +static int nf_tables_fill_flowtable_info(struct sk_buff *skb, struct net *net, + u32 portid, u32 seq, int event, + u32 flags, int family, + struct nft_flowtable *flowtable) +{ + struct nlattr *nest, *nest_devs; + struct nfgenmsg *nfmsg; + struct nlmsghdr *nlh; + int i; + + event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), flags); + if (nlh == NULL) + goto nla_put_failure; + + nfmsg = nlmsg_data(nlh); + nfmsg->nfgen_family = family; + nfmsg->version = NFNETLINK_V0; + nfmsg->res_id = htons(net->nft.base_seq & 0xffff); + + if (nla_put_string(skb, NFTA_FLOWTABLE_TABLE, flowtable->table->name) || + nla_put_string(skb, NFTA_FLOWTABLE_NAME, flowtable->name) || + nla_put_be32(skb, NFTA_FLOWTABLE_USE, htonl(flowtable->use)) || + nla_put_be64(skb, NFTA_FLOWTABLE_HANDLE, cpu_to_be64(flowtable->handle), + NFTA_FLOWTABLE_PAD)) + goto nla_put_failure; + + nest = nla_nest_start(skb, NFTA_FLOWTABLE_HOOK); + if (nla_put_be32(skb, NFTA_FLOWTABLE_HOOK_NUM, htonl(flowtable->hooknum)) || + nla_put_be32(skb, NFTA_FLOWTABLE_HOOK_PRIORITY, htonl(flowtable->priority))) + goto nla_put_failure; + + nest_devs = nla_nest_start(skb, NFTA_FLOWTABLE_HOOK_DEVS); + if (!nest_devs) + goto nla_put_failure; + + for (i = 0; i < flowtable->ops_len; i++) { + if (flowtable->ops[i].dev && + nla_put_string(skb, NFTA_DEVICE_NAME, + flowtable->ops[i].dev->name)) + goto nla_put_failure; + } + nla_nest_end(skb, nest_devs); + nla_nest_end(skb, nest); + + nlmsg_end(skb, nlh); + return 0; + +nla_put_failure: + nlmsg_trim(skb, nlh); + return -1; +} + +struct nft_flowtable_filter { + char *table; +}; + +static int nf_tables_dump_flowtable(struct sk_buff *skb, + struct netlink_callback *cb) +{ + const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); + struct nft_flowtable_filter *filter = cb->data; + unsigned int idx = 0, s_idx = cb->args[0]; + struct net *net = sock_net(skb->sk); + int family = nfmsg->nfgen_family; + struct nft_flowtable *flowtable; + const struct nft_table *table; + + rcu_read_lock(); + cb->seq = net->nft.base_seq; + + list_for_each_entry_rcu(table, &net->nft.tables, list) { + if (family != NFPROTO_UNSPEC && family != table->family) + continue; + + list_for_each_entry_rcu(flowtable, &table->flowtables, list) { + if (!nft_is_active(net, flowtable)) + goto cont; + if (idx < s_idx) + goto cont; + if (idx > s_idx) + memset(&cb->args[1], 0, + sizeof(cb->args) - sizeof(cb->args[0])); + if (filter && filter->table[0] && + strcmp(filter->table, table->name)) + goto cont; + + if (nf_tables_fill_flowtable_info(skb, net, NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + NFT_MSG_NEWFLOWTABLE, + NLM_F_MULTI | NLM_F_APPEND, + table->family, flowtable) < 0) + goto done; + + nl_dump_check_consistent(cb, nlmsg_hdr(skb)); +cont: + idx++; + } + } +done: + rcu_read_unlock(); + + cb->args[0] = idx; + return skb->len; +} + +static int nf_tables_dump_flowtable_done(struct netlink_callback *cb) +{ + struct nft_flowtable_filter *filter = cb->data; + + if (!filter) + return 0; + + kfree(filter->table); + kfree(filter); + + return 0; +} + +static struct nft_flowtable_filter * +nft_flowtable_filter_alloc(const struct nlattr * const nla[]) +{ + struct nft_flowtable_filter *filter; + + filter = kzalloc(sizeof(*filter), GFP_KERNEL); + if (!filter) + return ERR_PTR(-ENOMEM); + + if (nla[NFTA_FLOWTABLE_TABLE]) { + filter->table = nla_strdup(nla[NFTA_FLOWTABLE_TABLE], + GFP_KERNEL); + if (!filter->table) { + kfree(filter); + return ERR_PTR(-ENOMEM); + } + } + return filter; +} + +static int nf_tables_getflowtable(struct net *net, struct sock *nlsk, + struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const nla[], + struct netlink_ext_ack *extack) +{ + const struct nfgenmsg *nfmsg = nlmsg_data(nlh); + u8 genmask = nft_genmask_cur(net); + int family = nfmsg->nfgen_family; + struct nft_flowtable *flowtable; + const struct nft_table *table; + struct sk_buff *skb2; + int err; + + if (nlh->nlmsg_flags & NLM_F_DUMP) { + struct netlink_dump_control c = { + .dump = nf_tables_dump_flowtable, + .done = nf_tables_dump_flowtable_done, + }; + + if (nla[NFTA_FLOWTABLE_TABLE]) { + struct nft_flowtable_filter *filter; + + filter = nft_flowtable_filter_alloc(nla); + if (IS_ERR(filter)) + return -ENOMEM; + + c.data = filter; + } + return netlink_dump_start(nlsk, skb, nlh, &c); + } + + if (!nla[NFTA_FLOWTABLE_NAME]) + return -EINVAL; + + table = nf_tables_table_lookup(net, nla[NFTA_FLOWTABLE_TABLE], + family, genmask); + if (IS_ERR(table)) + return PTR_ERR(table); + + flowtable = nf_tables_flowtable_lookup(table, nla[NFTA_FLOWTABLE_NAME], + genmask); + if (IS_ERR(flowtable)) + return PTR_ERR(flowtable); + + skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); + if (!skb2) + return -ENOMEM; + + err = nf_tables_fill_flowtable_info(skb2, net, NETLINK_CB(skb).portid, + nlh->nlmsg_seq, + NFT_MSG_NEWFLOWTABLE, 0, family, + flowtable); + if (err < 0) + goto err; + + return nlmsg_unicast(nlsk, skb2, NETLINK_CB(skb).portid); +err: + kfree_skb(skb2); + return err; +} + +static void nf_tables_flowtable_notify(struct nft_ctx *ctx, + struct nft_flowtable *flowtable, + int event) +{ + struct sk_buff *skb; + int err; + + if (ctx->report && + !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES)) + return; + + skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + if (skb == NULL) + goto err; + + err = nf_tables_fill_flowtable_info(skb, ctx->net, ctx->portid, + ctx->seq, event, 0, + ctx->family, flowtable); + if (err < 0) { + kfree_skb(skb); + goto err; + } + + nfnetlink_send(skb, ctx->net, ctx->portid, NFNLGRP_NFTABLES, + ctx->report, GFP_KERNEL); + return; +err: + nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES, -ENOBUFS); +} + +static void nft_flowtable_destroy(void *ptr, void *arg) +{ + kfree(ptr); +} + +static void nf_tables_flowtable_destroy(struct nft_flowtable *flowtable) +{ + cancel_delayed_work_sync(&flowtable->data.gc_work); + kfree(flowtable->name); + rhashtable_free_and_destroy(&flowtable->data.rhashtable, + nft_flowtable_destroy, NULL); + module_put(flowtable->data.type->owner); } static int nf_tables_fill_gen_info(struct sk_buff *skb, struct net *net, @@ -4880,6 +5443,46 @@ nla_put_failure: return -EMSGSIZE; } +static void nft_flowtable_event(unsigned long event, struct net_device *dev, + struct nft_flowtable *flowtable) +{ + int i; + + for (i = 0; i < flowtable->ops_len; i++) { + if (flowtable->ops[i].dev != dev) + continue; + + nf_unregister_net_hook(dev_net(dev), &flowtable->ops[i]); + flowtable->ops[i].dev = NULL; + break; + } +} + +static int nf_tables_flowtable_event(struct notifier_block *this, + unsigned long event, void *ptr) +{ + struct net_device *dev = netdev_notifier_info_to_dev(ptr); + struct nft_flowtable *flowtable; + struct nft_table *table; + + if (event != NETDEV_UNREGISTER) + return 0; + + nfnl_lock(NFNL_SUBSYS_NFTABLES); + list_for_each_entry(table, &dev_net(dev)->nft.tables, list) { + list_for_each_entry(flowtable, &table->flowtables, list) { + nft_flowtable_event(event, dev, flowtable); + } + } + nfnl_unlock(NFNL_SUBSYS_NFTABLES); + + return NOTIFY_DONE; +} + +static struct notifier_block nf_tables_flowtable_notifier = { + .notifier_call = nf_tables_flowtable_event, +}; + static void nf_tables_gen_notify(struct net *net, struct sk_buff *skb, int event) { @@ -5032,6 +5635,21 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = { .attr_count = NFTA_OBJ_MAX, .policy = nft_obj_policy, }, + [NFT_MSG_NEWFLOWTABLE] = { + .call_batch = nf_tables_newflowtable, + .attr_count = NFTA_FLOWTABLE_MAX, + .policy = nft_flowtable_policy, + }, + [NFT_MSG_GETFLOWTABLE] = { + .call = nf_tables_getflowtable, + .attr_count = NFTA_FLOWTABLE_MAX, + .policy = nft_flowtable_policy, + }, + [NFT_MSG_DELFLOWTABLE] = { + .call_batch = nf_tables_delflowtable, + .attr_count = NFTA_FLOWTABLE_MAX, + .policy = nft_flowtable_policy, + }, }; static void nft_chain_commit_update(struct nft_trans *trans) @@ -5077,6 +5695,9 @@ static void nf_tables_commit_release(struct nft_trans *trans) case NFT_MSG_DELOBJ: nft_obj_destroy(nft_trans_obj(trans)); break; + case NFT_MSG_DELFLOWTABLE: + nf_tables_flowtable_destroy(nft_trans_flowtable(trans)); + break; } kfree(trans); } @@ -5103,7 +5724,6 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) if (nft_trans_table_update(trans)) { if (!nft_trans_table_enable(trans)) { nf_tables_table_disable(net, - trans->ctx.afi, trans->ctx.table); trans->ctx.table->flags |= NFT_TABLE_F_DORMANT; } @@ -5129,10 +5749,9 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) case NFT_MSG_DELCHAIN: list_del_rcu(&trans->ctx.chain->list); nf_tables_chain_notify(&trans->ctx, NFT_MSG_DELCHAIN); - nf_tables_unregister_hooks(trans->ctx.net, - trans->ctx.table, - trans->ctx.chain, - trans->ctx.afi->nops); + nf_tables_unregister_hook(trans->ctx.net, + trans->ctx.table, + trans->ctx.chain); break; case NFT_MSG_NEWRULE: nft_clear(trans->ctx.net, nft_trans_rule(trans)); @@ -5152,7 +5771,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) /* This avoids hitting -EBUSY when deleting the table * from the transaction. */ - if (nft_trans_set(trans)->flags & NFT_SET_ANONYMOUS && + if (nft_set_is_anonymous(nft_trans_set(trans)) && !list_empty(&nft_trans_set(trans)->bindings)) trans->ctx.table->use--; @@ -5195,6 +5814,21 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) nf_tables_obj_notify(&trans->ctx, nft_trans_obj(trans), NFT_MSG_DELOBJ); break; + case NFT_MSG_NEWFLOWTABLE: + nft_clear(net, nft_trans_flowtable(trans)); + nf_tables_flowtable_notify(&trans->ctx, + nft_trans_flowtable(trans), + NFT_MSG_NEWFLOWTABLE); + nft_trans_destroy(trans); + break; + case NFT_MSG_DELFLOWTABLE: + list_del_rcu(&nft_trans_flowtable(trans)->list); + nf_tables_flowtable_notify(&trans->ctx, + nft_trans_flowtable(trans), + NFT_MSG_DELFLOWTABLE); + nft_unregister_flowtable_net_hooks(net, + nft_trans_flowtable(trans)); + break; } } @@ -5232,6 +5866,9 @@ static void nf_tables_abort_release(struct nft_trans *trans) case NFT_MSG_NEWOBJ: nft_obj_destroy(nft_trans_obj(trans)); break; + case NFT_MSG_NEWFLOWTABLE: + nf_tables_flowtable_destroy(nft_trans_flowtable(trans)); + break; } kfree(trans); } @@ -5248,7 +5885,6 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb) if (nft_trans_table_update(trans)) { if (nft_trans_table_enable(trans)) { nf_tables_table_disable(net, - trans->ctx.afi, trans->ctx.table); trans->ctx.table->flags |= NFT_TABLE_F_DORMANT; } @@ -5269,10 +5905,9 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb) } else { trans->ctx.table->use--; list_del_rcu(&trans->ctx.chain->list); - nf_tables_unregister_hooks(trans->ctx.net, - trans->ctx.table, - trans->ctx.chain, - trans->ctx.afi->nops); + nf_tables_unregister_hook(trans->ctx.net, + trans->ctx.table, + trans->ctx.chain); } break; case NFT_MSG_DELCHAIN: @@ -5322,6 +5957,17 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb) nft_clear(trans->ctx.net, nft_trans_obj(trans)); nft_trans_destroy(trans); break; + case NFT_MSG_NEWFLOWTABLE: + trans->ctx.table->use--; + list_del_rcu(&nft_trans_flowtable(trans)->list); + nft_unregister_flowtable_net_hooks(net, + nft_trans_flowtable(trans)); + break; + case NFT_MSG_DELFLOWTABLE: + trans->ctx.table->use++; + nft_clear(trans->ctx.net, nft_trans_flowtable(trans)); + nft_trans_destroy(trans); + break; } } @@ -5373,7 +6019,7 @@ int nft_chain_validate_hooks(const struct nft_chain *chain, if (nft_is_base_chain(chain)) { basechain = nft_base_chain(chain); - if ((1 << basechain->ops[0].hooknum) & hook_flags) + if ((1 << basechain->ops.hooknum) & hook_flags) return 0; return -EOPNOTSUPP; @@ -5841,28 +6487,13 @@ int nft_data_dump(struct sk_buff *skb, int attr, const struct nft_data *data, } EXPORT_SYMBOL_GPL(nft_data_dump); -static int __net_init nf_tables_init_net(struct net *net) -{ - INIT_LIST_HEAD(&net->nft.af_info); - INIT_LIST_HEAD(&net->nft.commit_list); - net->nft.base_seq = 1; - return 0; -} - -static void __net_exit nf_tables_exit_net(struct net *net) -{ - WARN_ON_ONCE(!list_empty(&net->nft.af_info)); - WARN_ON_ONCE(!list_empty(&net->nft.commit_list)); -} - int __nft_release_basechain(struct nft_ctx *ctx) { struct nft_rule *rule, *nr; BUG_ON(!nft_is_base_chain(ctx->chain)); - nf_tables_unregister_hooks(ctx->net, ctx->chain->table, ctx->chain, - ctx->afi->nops); + nf_tables_unregister_hook(ctx->net, ctx->chain->table, ctx->chain); list_for_each_entry_safe(rule, nr, &ctx->chain->rules, list) { list_del(&rule->list); ctx->chain->use--; @@ -5876,9 +6507,9 @@ int __nft_release_basechain(struct nft_ctx *ctx) } EXPORT_SYMBOL_GPL(__nft_release_basechain); -/* Called by nft_unregister_afinfo() from __net_exit path, nfnl_lock is held. */ -static void __nft_release_afinfo(struct net *net, struct nft_af_info *afi) +static void __nft_release_tables(struct net *net) { + struct nft_flowtable *flowtable, *nf; struct nft_table *table, *nt; struct nft_chain *chain, *nc; struct nft_object *obj, *ne; @@ -5886,13 +6517,16 @@ static void __nft_release_afinfo(struct net *net, struct nft_af_info *afi) struct nft_set *set, *ns; struct nft_ctx ctx = { .net = net, - .afi = afi, }; - list_for_each_entry_safe(table, nt, &afi->tables, list) { + list_for_each_entry_safe(table, nt, &net->nft.tables, list) { + ctx.family = table->family; + list_for_each_entry(chain, &table->chains, list) - nf_tables_unregister_hooks(net, table, chain, - afi->nops); + nf_tables_unregister_hook(net, table, chain); + list_for_each_entry(flowtable, &table->flowtables, list) + nf_unregister_net_hooks(net, flowtable->ops, + flowtable->ops_len); /* No packets are walking on these chains anymore. */ ctx.table = table; list_for_each_entry(chain, &table->chains, list) { @@ -5903,6 +6537,11 @@ static void __nft_release_afinfo(struct net *net, struct nft_af_info *afi) nf_tables_rule_destroy(&ctx, rule); } } + list_for_each_entry_safe(flowtable, nf, &table->flowtables, list) { + list_del(&flowtable->list); + table->use--; + nf_tables_flowtable_destroy(flowtable); + } list_for_each_entry_safe(set, ns, &table->sets, list) { list_del(&set->list); table->use--; @@ -5923,6 +6562,21 @@ static void __nft_release_afinfo(struct net *net, struct nft_af_info *afi) } } +static int __net_init nf_tables_init_net(struct net *net) +{ + INIT_LIST_HEAD(&net->nft.tables); + INIT_LIST_HEAD(&net->nft.commit_list); + net->nft.base_seq = 1; + return 0; +} + +static void __net_exit nf_tables_exit_net(struct net *net) +{ + __nft_release_tables(net); + WARN_ON_ONCE(!list_empty(&net->nft.tables)); + WARN_ON_ONCE(!list_empty(&net->nft.commit_list)); +} + static struct pernet_operations nf_tables_net_ops = { .init = nf_tables_init_net, .exit = nf_tables_exit_net, @@ -5947,7 +6601,8 @@ static int __init nf_tables_module_init(void) if (err < 0) goto err3; - pr_info("nf_tables: (c) 2007-2009 Patrick McHardy <kaber@trash.net>\n"); + register_netdevice_notifier(&nf_tables_flowtable_notifier); + return register_pernet_subsys(&nf_tables_net_ops); err3: nf_tables_core_module_exit(); @@ -5961,6 +6616,7 @@ static void __exit nf_tables_module_exit(void) { unregister_pernet_subsys(&nf_tables_net_ops); nfnetlink_subsys_unregister(&nf_tables_subsys); + unregister_netdevice_notifier(&nf_tables_flowtable_notifier); rcu_barrier(); nf_tables_core_module_exit(); kfree(info); diff --git a/net/netfilter/nf_tables_inet.c b/net/netfilter/nf_tables_inet.c index f713cc205669..e30c7da09d0d 100644 --- a/net/netfilter/nf_tables_inet.c +++ b/net/netfilter/nf_tables_inet.c @@ -9,6 +9,7 @@ #include <linux/init.h> #include <linux/module.h> #include <linux/ip.h> +#include <linux/ipv6.h> #include <linux/netfilter_ipv4.h> #include <linux/netfilter_ipv6.h> #include <net/netfilter/nf_tables.h> @@ -16,56 +17,27 @@ #include <net/netfilter/nf_tables_ipv6.h> #include <net/ip.h> -static void nft_inet_hook_ops_init(struct nf_hook_ops *ops, unsigned int n) +static unsigned int nft_do_chain_inet(void *priv, struct sk_buff *skb, + const struct nf_hook_state *state) { - struct nft_af_info *afi; - - if (n == 1) - afi = &nft_af_ipv4; - else - afi = &nft_af_ipv6; - - ops->pf = afi->family; - if (afi->hooks[ops->hooknum]) - ops->hook = afi->hooks[ops->hooknum]; -} - -static struct nft_af_info nft_af_inet __read_mostly = { - .family = NFPROTO_INET, - .nhooks = NF_INET_NUMHOOKS, - .owner = THIS_MODULE, - .nops = 2, - .hook_ops_init = nft_inet_hook_ops_init, -}; - -static int __net_init nf_tables_inet_init_net(struct net *net) -{ - net->nft.inet = kmalloc(sizeof(struct nft_af_info), GFP_KERNEL); - if (net->nft.inet == NULL) - return -ENOMEM; - memcpy(net->nft.inet, &nft_af_inet, sizeof(nft_af_inet)); - - if (nft_register_afinfo(net, net->nft.inet) < 0) - goto err; - - return 0; - -err: - kfree(net->nft.inet); - return -ENOMEM; -} - -static void __net_exit nf_tables_inet_exit_net(struct net *net) -{ - nft_unregister_afinfo(net, net->nft.inet); - kfree(net->nft.inet); + struct nft_pktinfo pkt; + + nft_set_pktinfo(&pkt, skb, state); + + switch (state->pf) { + case NFPROTO_IPV4: + nft_set_pktinfo_ipv4(&pkt, skb); + break; + case NFPROTO_IPV6: + nft_set_pktinfo_ipv6(&pkt, skb); + break; + default: + break; + } + + return nft_do_chain(&pkt, priv); } -static struct pernet_operations nf_tables_inet_net_ops = { - .init = nf_tables_inet_init_net, - .exit = nf_tables_inet_exit_net, -}; - static const struct nf_chain_type filter_inet = { .name = "filter", .type = NFT_CHAIN_T_DEFAULT, @@ -76,26 +48,22 @@ static const struct nf_chain_type filter_inet = { (1 << NF_INET_FORWARD) | (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_POST_ROUTING), + .hooks = { + [NF_INET_LOCAL_IN] = nft_do_chain_inet, + [NF_INET_LOCAL_OUT] = nft_do_chain_inet, + [NF_INET_FORWARD] = nft_do_chain_inet, + [NF_INET_PRE_ROUTING] = nft_do_chain_inet, + [NF_INET_POST_ROUTING] = nft_do_chain_inet, + }, }; static int __init nf_tables_inet_init(void) { - int ret; - - ret = nft_register_chain_type(&filter_inet); - if (ret < 0) - return ret; - - ret = register_pernet_subsys(&nf_tables_inet_net_ops); - if (ret < 0) - nft_unregister_chain_type(&filter_inet); - - return ret; + return nft_register_chain_type(&filter_inet); } static void __exit nf_tables_inet_exit(void) { - unregister_pernet_subsys(&nf_tables_inet_net_ops); nft_unregister_chain_type(&filter_inet); } @@ -104,4 +72,4 @@ module_exit(nf_tables_inet_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); -MODULE_ALIAS_NFT_FAMILY(1); +MODULE_ALIAS_NFT_CHAIN(1, "filter"); diff --git a/net/netfilter/nf_tables_netdev.c b/net/netfilter/nf_tables_netdev.c index 403432988313..4041fafca934 100644 --- a/net/netfilter/nf_tables_netdev.c +++ b/net/netfilter/nf_tables_netdev.c @@ -21,66 +21,32 @@ nft_do_chain_netdev(void *priv, struct sk_buff *skb, { struct nft_pktinfo pkt; + nft_set_pktinfo(&pkt, skb, state); + switch (skb->protocol) { case htons(ETH_P_IP): - nft_set_pktinfo_ipv4_validate(&pkt, skb, state); + nft_set_pktinfo_ipv4_validate(&pkt, skb); break; case htons(ETH_P_IPV6): - nft_set_pktinfo_ipv6_validate(&pkt, skb, state); + nft_set_pktinfo_ipv6_validate(&pkt, skb); break; default: - nft_set_pktinfo_unspec(&pkt, skb, state); + nft_set_pktinfo_unspec(&pkt, skb); break; } return nft_do_chain(&pkt, priv); } -static struct nft_af_info nft_af_netdev __read_mostly = { - .family = NFPROTO_NETDEV, - .nhooks = NF_NETDEV_NUMHOOKS, - .owner = THIS_MODULE, - .flags = NFT_AF_NEEDS_DEV, - .nops = 1, - .hooks = { - [NF_NETDEV_INGRESS] = nft_do_chain_netdev, - }, -}; - -static int nf_tables_netdev_init_net(struct net *net) -{ - net->nft.netdev = kmalloc(sizeof(struct nft_af_info), GFP_KERNEL); - if (net->nft.netdev == NULL) - return -ENOMEM; - - memcpy(net->nft.netdev, &nft_af_netdev, sizeof(nft_af_netdev)); - - if (nft_register_afinfo(net, net->nft.netdev) < 0) - goto err; - - return 0; -err: - kfree(net->nft.netdev); - return -ENOMEM; -} - -static void nf_tables_netdev_exit_net(struct net *net) -{ - nft_unregister_afinfo(net, net->nft.netdev); - kfree(net->nft.netdev); -} - -static struct pernet_operations nf_tables_netdev_net_ops = { - .init = nf_tables_netdev_init_net, - .exit = nf_tables_netdev_exit_net, -}; - static const struct nf_chain_type nft_filter_chain_netdev = { .name = "filter", .type = NFT_CHAIN_T_DEFAULT, .family = NFPROTO_NETDEV, .owner = THIS_MODULE, .hook_mask = (1 << NF_NETDEV_INGRESS), + .hooks = { + [NF_NETDEV_INGRESS] = nft_do_chain_netdev, + }, }; static void nft_netdev_event(unsigned long event, struct net_device *dev, @@ -96,7 +62,7 @@ static void nft_netdev_event(unsigned long event, struct net_device *dev, __nft_release_basechain(ctx); break; case NETDEV_CHANGENAME: - if (dev->ifindex != basechain->ops[0].dev->ifindex) + if (dev->ifindex != basechain->ops.dev->ifindex) return; strncpy(basechain->dev_name, dev->name, IFNAMSIZ); @@ -108,7 +74,6 @@ static int nf_tables_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); - struct nft_af_info *afi; struct nft_table *table; struct nft_chain *chain, *nr; struct nft_ctx ctx = { @@ -120,20 +85,18 @@ static int nf_tables_netdev_event(struct notifier_block *this, return NOTIFY_DONE; nfnl_lock(NFNL_SUBSYS_NFTABLES); - list_for_each_entry(afi, &dev_net(dev)->nft.af_info, list) { - ctx.afi = afi; - if (afi->family != NFPROTO_NETDEV) + list_for_each_entry(table, &ctx.net->nft.tables, list) { + if (table->family != NFPROTO_NETDEV) continue; - list_for_each_entry(table, &afi->tables, list) { - ctx.table = table; - list_for_each_entry_safe(chain, nr, &table->chains, list) { - if (!nft_is_base_chain(chain)) - continue; + ctx.family = table->family; + ctx.table = table; + list_for_each_entry_safe(chain, nr, &table->chains, list) { + if (!nft_is_base_chain(chain)) + continue; - ctx.chain = chain; - nft_netdev_event(event, dev, &ctx); - } + ctx.chain = chain; + nft_netdev_event(event, dev, &ctx); } } nfnl_unlock(NFNL_SUBSYS_NFTABLES); @@ -153,27 +116,21 @@ static int __init nf_tables_netdev_init(void) if (ret) return ret; - ret = register_pernet_subsys(&nf_tables_netdev_net_ops); - if (ret) - goto err1; - ret = register_netdevice_notifier(&nf_tables_netdev_notifier); if (ret) - goto err2; + goto err_register_netdevice_notifier; return 0; -err2: - unregister_pernet_subsys(&nf_tables_netdev_net_ops); -err1: +err_register_netdevice_notifier: nft_unregister_chain_type(&nft_filter_chain_netdev); + return ret; } static void __exit nf_tables_netdev_exit(void) { unregister_netdevice_notifier(&nf_tables_netdev_notifier); - unregister_pernet_subsys(&nf_tables_netdev_net_ops); nft_unregister_chain_type(&nft_filter_chain_netdev); } @@ -182,4 +139,4 @@ module_exit(nf_tables_netdev_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>"); -MODULE_ALIAS_NFT_FAMILY(5); /* NFPROTO_NETDEV */ +MODULE_ALIAS_NFT_CHAIN(5, "filter"); /* NFPROTO_NETDEV */ diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 733d3e4a30d8..03ead8a9e90c 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -37,8 +37,6 @@ MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_NETFILTER); rcu_dereference_protected(table[(id)].subsys, \ lockdep_nfnl_is_held((id))) -static char __initdata nfversion[] = "0.30"; - static struct { struct mutex mutex; const struct nfnetlink_subsystem __rcu *subsys; @@ -580,13 +578,11 @@ static int __init nfnetlink_init(void) for (i=0; i<NFNL_SUBSYS_COUNT; i++) mutex_init(&table[i].mutex); - pr_info("Netfilter messages via NETLINK v%s.\n", nfversion); return register_pernet_subsys(&nfnetlink_net_ops); } static void __exit nfnetlink_exit(void) { - pr_info("Removing netfilter NETLINK layer.\n"); unregister_pernet_subsys(&nfnetlink_net_ops); } module_init(nfnetlink_init); diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c index c45e6d4358ab..88d427f9f9e6 100644 --- a/net/netfilter/nfnetlink_acct.c +++ b/net/netfilter/nfnetlink_acct.c @@ -527,7 +527,6 @@ static int __init nfnl_acct_init(void) goto err_out; } - pr_info("nfnl_acct: registering with nfnetlink.\n"); ret = nfnetlink_subsys_register(&nfnl_acct_subsys); if (ret < 0) { pr_err("nfnl_acct_init: cannot register with nfnetlink.\n"); @@ -543,7 +542,6 @@ err_out: static void __exit nfnl_acct_exit(void) { - pr_info("nfnl_acct: unregistering from nfnetlink.\n"); nfnetlink_subsys_unregister(&nfnl_acct_subsys); unregister_pernet_subsys(&nfnl_acct_ops); } diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c index 32b1c0b44e79..95b04702a655 100644 --- a/net/netfilter/nfnetlink_cttimeout.c +++ b/net/netfilter/nfnetlink_cttimeout.c @@ -615,8 +615,6 @@ err_out: static void __exit cttimeout_exit(void) { - pr_info("cttimeout: unregistering from nfnetlink.\n"); - nfnetlink_subsys_unregister(&cttimeout_subsys); unregister_pernet_subsys(&cttimeout_ops); diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index e955bec0acc6..7b46aa4c478d 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -1054,7 +1054,6 @@ static int nful_open(struct inode *inode, struct file *file) } static const struct file_operations nful_file_ops = { - .owner = THIS_MODULE, .open = nful_open, .read = seq_read, .llseek = seq_lseek, diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index c09b36755ed7..8bba23160a68 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -941,23 +941,18 @@ static struct notifier_block nfqnl_dev_notifier = { .notifier_call = nfqnl_rcv_dev_event, }; -static unsigned int nfqnl_nf_hook_drop(struct net *net) +static void nfqnl_nf_hook_drop(struct net *net) { struct nfnl_queue_net *q = nfnl_queue_pernet(net); - unsigned int instances = 0; int i; for (i = 0; i < INSTANCE_BUCKETS; i++) { struct nfqnl_instance *inst; struct hlist_head *head = &q->instance_table[i]; - hlist_for_each_entry_rcu(inst, head, hlist) { + hlist_for_each_entry_rcu(inst, head, hlist) nfqnl_flush(inst, NULL, 0); - instances++; - } } - - return instances; } static int @@ -1482,7 +1477,6 @@ static int nfqnl_open(struct inode *inode, struct file *file) } static const struct file_operations nfqnl_file_ops = { - .owner = THIS_MODULE, .open = nfqnl_open, .read = seq_read, .llseek = seq_lseek, diff --git a/net/netfilter/nft_cmp.c b/net/netfilter/nft_cmp.c index c2945eb3397c..fa90a8402845 100644 --- a/net/netfilter/nft_cmp.c +++ b/net/netfilter/nft_cmp.c @@ -44,6 +44,7 @@ static void nft_cmp_eval(const struct nft_expr *expr, case NFT_CMP_LT: if (d == 0) goto mismatch; + /* fall through */ case NFT_CMP_LTE: if (d > 0) goto mismatch; @@ -51,6 +52,7 @@ static void nft_cmp_eval(const struct nft_expr *expr, case NFT_CMP_GT: if (d == 0) goto mismatch; + /* fall through */ case NFT_CMP_GTE: if (d < 0) goto mismatch; diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index b89f4f65b2a0..8e23726b9081 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -144,7 +144,7 @@ nft_target_set_tgchk_param(struct xt_tgchk_param *par, { par->net = ctx->net; par->table = ctx->table->name; - switch (ctx->afi->family) { + switch (ctx->family) { case AF_INET: entry->e4.ip.proto = proto; entry->e4.ip.invflags = inv ? IPT_INV_PROTO : 0; @@ -169,13 +169,13 @@ nft_target_set_tgchk_param(struct xt_tgchk_param *par, if (nft_is_base_chain(ctx->chain)) { const struct nft_base_chain *basechain = nft_base_chain(ctx->chain); - const struct nf_hook_ops *ops = &basechain->ops[0]; + const struct nf_hook_ops *ops = &basechain->ops; par->hook_mask = 1 << ops->hooknum; } else { par->hook_mask = 0; } - par->family = ctx->afi->family; + par->family = ctx->family; par->nft_compat = true; } @@ -267,7 +267,7 @@ nft_target_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) par.net = ctx->net; par.target = target; par.targinfo = info; - par.family = ctx->afi->family; + par.family = ctx->family; if (par.target->destroy != NULL) par.target->destroy(&par); @@ -302,7 +302,7 @@ static int nft_target_validate(const struct nft_ctx *ctx, if (nft_is_base_chain(ctx->chain)) { const struct nft_base_chain *basechain = nft_base_chain(ctx->chain); - const struct nf_hook_ops *ops = &basechain->ops[0]; + const struct nf_hook_ops *ops = &basechain->ops; hook_mask = 1 << ops->hooknum; if (target->hooks && !(hook_mask & target->hooks)) @@ -358,7 +358,7 @@ nft_match_set_mtchk_param(struct xt_mtchk_param *par, const struct nft_ctx *ctx, { par->net = ctx->net; par->table = ctx->table->name; - switch (ctx->afi->family) { + switch (ctx->family) { case AF_INET: entry->e4.ip.proto = proto; entry->e4.ip.invflags = inv ? IPT_INV_PROTO : 0; @@ -383,13 +383,13 @@ nft_match_set_mtchk_param(struct xt_mtchk_param *par, const struct nft_ctx *ctx, if (nft_is_base_chain(ctx->chain)) { const struct nft_base_chain *basechain = nft_base_chain(ctx->chain); - const struct nf_hook_ops *ops = &basechain->ops[0]; + const struct nf_hook_ops *ops = &basechain->ops; par->hook_mask = 1 << ops->hooknum; } else { par->hook_mask = 0; } - par->family = ctx->afi->family; + par->family = ctx->family; par->nft_compat = true; } @@ -446,7 +446,7 @@ nft_match_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) par.net = ctx->net; par.match = match; par.matchinfo = info; - par.family = ctx->afi->family; + par.family = ctx->family; if (par.match->destroy != NULL) par.match->destroy(&par); @@ -481,7 +481,7 @@ static int nft_match_validate(const struct nft_ctx *ctx, if (nft_is_base_chain(ctx->chain)) { const struct nft_base_chain *basechain = nft_base_chain(ctx->chain); - const struct nf_hook_ops *ops = &basechain->ops[0]; + const struct nf_hook_ops *ops = &basechain->ops; hook_mask = 1 << ops->hooknum; if (match->hooks && !(hook_mask & match->hooks)) @@ -648,7 +648,7 @@ nft_match_select_ops(const struct nft_ctx *ctx, mt_name = nla_data(tb[NFTA_MATCH_NAME]); rev = ntohl(nla_get_be32(tb[NFTA_MATCH_REV])); - family = ctx->afi->family; + family = ctx->family; /* Re-use the existing match if it's already loaded. */ list_for_each_entry(nft_match, &nft_match_list, head) { @@ -733,7 +733,7 @@ nft_target_select_ops(const struct nft_ctx *ctx, tg_name = nla_data(tb[NFTA_TARGET_NAME]); rev = ntohl(nla_get_be32(tb[NFTA_TARGET_REV])); - family = ctx->afi->family; + family = ctx->family; /* Re-use the existing target if it's already loaded. */ list_for_each_entry(nft_target, &nft_target_list, head) { @@ -812,8 +812,6 @@ static int __init nft_compat_module_init(void) goto err_target; } - pr_info("nf_tables_compat: (c) 2012 Pablo Neira Ayuso <pablo@netfilter.org>\n"); - return ret; err_target: diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index 2647b895f4b0..6ab274b14484 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -405,7 +405,7 @@ static int nft_ct_get_init(const struct nft_ctx *ctx, if (tb[NFTA_CT_DIRECTION] == NULL) return -EINVAL; - switch (ctx->afi->family) { + switch (ctx->family) { case NFPROTO_IPV4: len = FIELD_SIZEOF(struct nf_conntrack_tuple, src.u3.ip); @@ -456,7 +456,7 @@ static int nft_ct_get_init(const struct nft_ctx *ctx, if (err < 0) return err; - err = nf_ct_netns_get(ctx->net, ctx->afi->family); + err = nf_ct_netns_get(ctx->net, ctx->family); if (err < 0) return err; @@ -550,7 +550,7 @@ static int nft_ct_set_init(const struct nft_ctx *ctx, if (err < 0) goto err1; - err = nf_ct_netns_get(ctx->net, ctx->afi->family); + err = nf_ct_netns_get(ctx->net, ctx->family); if (err < 0) goto err1; @@ -564,7 +564,7 @@ err1: static void nft_ct_get_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) { - nf_ct_netns_put(ctx->net, ctx->afi->family); + nf_ct_netns_put(ctx->net, ctx->family); } static void nft_ct_set_destroy(const struct nft_ctx *ctx, @@ -573,7 +573,7 @@ static void nft_ct_set_destroy(const struct nft_ctx *ctx, struct nft_ct *priv = nft_expr_priv(expr); __nft_ct_set_destroy(ctx, priv); - nf_ct_netns_put(ctx->net, ctx->afi->family); + nf_ct_netns_put(ctx->net, ctx->family); } static int nft_ct_get_dump(struct sk_buff *skb, const struct nft_expr *expr) @@ -734,7 +734,7 @@ static int nft_ct_helper_obj_init(const struct nft_ctx *ctx, struct nft_ct_helper_obj *priv = nft_obj_data(obj); struct nf_conntrack_helper *help4, *help6; char name[NF_CT_HELPER_NAME_LEN]; - int family = ctx->afi->family; + int family = ctx->family; if (!tb[NFTA_CT_HELPER_NAME] || !tb[NFTA_CT_HELPER_L4PROTO]) return -EINVAL; @@ -753,14 +753,14 @@ static int nft_ct_helper_obj_init(const struct nft_ctx *ctx, switch (family) { case NFPROTO_IPV4: - if (ctx->afi->family == NFPROTO_IPV6) + if (ctx->family == NFPROTO_IPV6) return -EINVAL; help4 = nf_conntrack_helper_try_module_get(name, family, priv->l4proto); break; case NFPROTO_IPV6: - if (ctx->afi->family == NFPROTO_IPV4) + if (ctx->family == NFPROTO_IPV4) return -EINVAL; help6 = nf_conntrack_helper_try_module_get(name, family, diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c index 66221ad891a9..fc83e29d6634 100644 --- a/net/netfilter/nft_dynset.c +++ b/net/netfilter/nft_dynset.c @@ -164,7 +164,7 @@ static int nft_dynset_init(const struct nft_ctx *ctx, } priv->sreg_key = nft_parse_register(tb[NFTA_DYNSET_SREG_KEY]); - err = nft_validate_register_load(priv->sreg_key, set->klen);; + err = nft_validate_register_load(priv->sreg_key, set->klen); if (err < 0) return err; @@ -184,7 +184,7 @@ static int nft_dynset_init(const struct nft_ctx *ctx, if (tb[NFTA_DYNSET_EXPR] != NULL) { if (!(set->flags & NFT_SET_EVAL)) return -EINVAL; - if (!(set->flags & NFT_SET_ANONYMOUS)) + if (!nft_set_is_anonymous(set)) return -EOPNOTSUPP; priv->expr = nft_expr_init(ctx, tb[NFTA_DYNSET_EXPR]); diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c new file mode 100644 index 000000000000..4503b8dcf9c0 --- /dev/null +++ b/net/netfilter/nft_flow_offload.c @@ -0,0 +1,264 @@ +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/netlink.h> +#include <linux/netfilter.h> +#include <linux/workqueue.h> +#include <linux/spinlock.h> +#include <linux/netfilter/nf_tables.h> +#include <net/ip.h> /* for ipv4 options. */ +#include <net/netfilter/nf_tables.h> +#include <net/netfilter/nf_tables_core.h> +#include <net/netfilter/nf_conntrack_core.h> +#include <linux/netfilter/nf_conntrack_common.h> +#include <net/netfilter/nf_flow_table.h> + +struct nft_flow_offload { + struct nft_flowtable *flowtable; +}; + +static int nft_flow_route(const struct nft_pktinfo *pkt, + const struct nf_conn *ct, + struct nf_flow_route *route, + enum ip_conntrack_dir dir) +{ + struct dst_entry *this_dst = skb_dst(pkt->skb); + struct dst_entry *other_dst = NULL; + struct flowi fl; + + memset(&fl, 0, sizeof(fl)); + switch (nft_pf(pkt)) { + case NFPROTO_IPV4: + fl.u.ip4.daddr = ct->tuplehash[!dir].tuple.dst.u3.ip; + break; + case NFPROTO_IPV6: + fl.u.ip6.daddr = ct->tuplehash[!dir].tuple.dst.u3.in6; + break; + } + + nf_route(nft_net(pkt), &other_dst, &fl, false, nft_pf(pkt)); + if (!other_dst) + return -ENOENT; + + route->tuple[dir].dst = this_dst; + route->tuple[dir].ifindex = nft_in(pkt)->ifindex; + route->tuple[!dir].dst = other_dst; + route->tuple[!dir].ifindex = nft_out(pkt)->ifindex; + + return 0; +} + +static bool nft_flow_offload_skip(struct sk_buff *skb) +{ + struct ip_options *opt = &(IPCB(skb)->opt); + + if (unlikely(opt->optlen)) + return true; + if (skb_sec_path(skb)) + return true; + + return false; +} + +static void nft_flow_offload_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + struct nft_flow_offload *priv = nft_expr_priv(expr); + struct nf_flowtable *flowtable = &priv->flowtable->data; + enum ip_conntrack_info ctinfo; + struct nf_flow_route route; + struct flow_offload *flow; + enum ip_conntrack_dir dir; + struct nf_conn *ct; + int ret; + + if (nft_flow_offload_skip(pkt->skb)) + goto out; + + ct = nf_ct_get(pkt->skb, &ctinfo); + if (!ct) + goto out; + + switch (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum) { + case IPPROTO_TCP: + case IPPROTO_UDP: + break; + default: + goto out; + } + + if (test_bit(IPS_HELPER_BIT, &ct->status)) + goto out; + + if (ctinfo == IP_CT_NEW || + ctinfo == IP_CT_RELATED) + goto out; + + if (test_and_set_bit(IPS_OFFLOAD_BIT, &ct->status)) + goto out; + + dir = CTINFO2DIR(ctinfo); + if (nft_flow_route(pkt, ct, &route, dir) < 0) + goto err_flow_route; + + flow = flow_offload_alloc(ct, &route); + if (!flow) + goto err_flow_alloc; + + ret = flow_offload_add(flowtable, flow); + if (ret < 0) + goto err_flow_add; + + return; + +err_flow_add: + flow_offload_free(flow); +err_flow_alloc: + dst_release(route.tuple[!dir].dst); +err_flow_route: + clear_bit(IPS_OFFLOAD_BIT, &ct->status); +out: + regs->verdict.code = NFT_BREAK; +} + +static int nft_flow_offload_validate(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nft_data **data) +{ + unsigned int hook_mask = (1 << NF_INET_FORWARD); + + return nft_chain_validate_hooks(ctx->chain, hook_mask); +} + +static int nft_flow_offload_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_flow_offload *priv = nft_expr_priv(expr); + u8 genmask = nft_genmask_next(ctx->net); + struct nft_flowtable *flowtable; + + if (!tb[NFTA_FLOW_TABLE_NAME]) + return -EINVAL; + + flowtable = nf_tables_flowtable_lookup(ctx->table, + tb[NFTA_FLOW_TABLE_NAME], + genmask); + if (IS_ERR(flowtable)) + return PTR_ERR(flowtable); + + priv->flowtable = flowtable; + flowtable->use++; + + return nf_ct_netns_get(ctx->net, ctx->family); +} + +static void nft_flow_offload_destroy(const struct nft_ctx *ctx, + const struct nft_expr *expr) +{ + struct nft_flow_offload *priv = nft_expr_priv(expr); + + priv->flowtable->use--; + nf_ct_netns_put(ctx->net, ctx->family); +} + +static int nft_flow_offload_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + struct nft_flow_offload *priv = nft_expr_priv(expr); + + if (nla_put_string(skb, NFTA_FLOW_TABLE_NAME, priv->flowtable->name)) + goto nla_put_failure; + + return 0; + +nla_put_failure: + return -1; +} + +static struct nft_expr_type nft_flow_offload_type; +static const struct nft_expr_ops nft_flow_offload_ops = { + .type = &nft_flow_offload_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_flow_offload)), + .eval = nft_flow_offload_eval, + .init = nft_flow_offload_init, + .destroy = nft_flow_offload_destroy, + .validate = nft_flow_offload_validate, + .dump = nft_flow_offload_dump, +}; + +static struct nft_expr_type nft_flow_offload_type __read_mostly = { + .name = "flow_offload", + .ops = &nft_flow_offload_ops, + .maxattr = NFTA_FLOW_MAX, + .owner = THIS_MODULE, +}; + +static void flow_offload_iterate_cleanup(struct flow_offload *flow, void *data) +{ + struct net_device *dev = data; + + if (dev && flow->tuplehash[0].tuple.iifidx != dev->ifindex) + return; + + flow_offload_dead(flow); +} + +static void nft_flow_offload_iterate_cleanup(struct nf_flowtable *flowtable, + void *data) +{ + nf_flow_table_iterate(flowtable, flow_offload_iterate_cleanup, data); +} + +static int flow_offload_netdev_event(struct notifier_block *this, + unsigned long event, void *ptr) +{ + struct net_device *dev = netdev_notifier_info_to_dev(ptr); + + if (event != NETDEV_DOWN) + return NOTIFY_DONE; + + nft_flow_table_iterate(dev_net(dev), nft_flow_offload_iterate_cleanup, dev); + + return NOTIFY_DONE; +} + +static struct notifier_block flow_offload_netdev_notifier = { + .notifier_call = flow_offload_netdev_event, +}; + +static int __init nft_flow_offload_module_init(void) +{ + int err; + + register_netdevice_notifier(&flow_offload_netdev_notifier); + + err = nft_register_expr(&nft_flow_offload_type); + if (err < 0) + goto register_expr; + + return 0; + +register_expr: + unregister_netdevice_notifier(&flow_offload_netdev_notifier); + return err; +} + +static void __exit nft_flow_offload_module_exit(void) +{ + struct net *net; + + nft_unregister_expr(&nft_flow_offload_type); + unregister_netdevice_notifier(&flow_offload_netdev_notifier); + rtnl_lock(); + for_each_net(net) + nft_flow_table_iterate(net, nft_flow_offload_iterate_cleanup, NULL); + rtnl_unlock(); +} + +module_init(nft_flow_offload_module_init); +module_exit(nft_flow_offload_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>"); +MODULE_ALIAS_NFT_EXPR("flow_offload"); diff --git a/net/netfilter/nft_log.c b/net/netfilter/nft_log.c index 6f6e64423643..a27be36dc0af 100644 --- a/net/netfilter/nft_log.c +++ b/net/netfilter/nft_log.c @@ -112,7 +112,7 @@ static int nft_log_init(const struct nft_ctx *ctx, break; } - err = nf_logger_find_get(ctx->afi->family, li->type); + err = nf_logger_find_get(ctx->family, li->type); if (err < 0) goto err1; @@ -133,7 +133,7 @@ static void nft_log_destroy(const struct nft_ctx *ctx, if (priv->prefix != nft_log_null_prefix) kfree(priv->prefix); - nf_logger_put(ctx->afi->family, li->type); + nf_logger_put(ctx->family, li->type); } static int nft_log_dump(struct sk_buff *skb, const struct nft_expr *expr) diff --git a/net/netfilter/nft_masq.c b/net/netfilter/nft_masq.c index 6ac03d4266c9..9d8655bc1bea 100644 --- a/net/netfilter/nft_masq.c +++ b/net/netfilter/nft_masq.c @@ -73,7 +73,7 @@ int nft_masq_init(const struct nft_ctx *ctx, } } - return nf_ct_netns_get(ctx->net, ctx->afi->family); + return nf_ct_netns_get(ctx->net, ctx->family); } EXPORT_SYMBOL_GPL(nft_masq_init); diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c index 5a60eb23a7ed..8fb91940e2e7 100644 --- a/net/netfilter/nft_meta.c +++ b/net/netfilter/nft_meta.c @@ -210,6 +210,11 @@ void nft_meta_get_eval(const struct nft_expr *expr, *dest = prandom_u32_state(state); break; } +#ifdef CONFIG_XFRM + case NFT_META_SECPATH: + nft_reg_store8(dest, !!skb->sp); + break; +#endif default: WARN_ON(1); goto err; @@ -308,6 +313,11 @@ int nft_meta_get_init(const struct nft_ctx *ctx, prandom_init_once(&nft_prandom_state); len = sizeof(u32); break; +#ifdef CONFIG_XFRM + case NFT_META_SECPATH: + len = sizeof(u8); + break; +#endif default: return -EOPNOTSUPP; } @@ -318,6 +328,38 @@ int nft_meta_get_init(const struct nft_ctx *ctx, } EXPORT_SYMBOL_GPL(nft_meta_get_init); +static int nft_meta_get_validate(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nft_data **data) +{ +#ifdef CONFIG_XFRM + const struct nft_meta *priv = nft_expr_priv(expr); + unsigned int hooks; + + if (priv->key != NFT_META_SECPATH) + return 0; + + switch (ctx->family) { + case NFPROTO_NETDEV: + hooks = 1 << NF_NETDEV_INGRESS; + break; + case NFPROTO_IPV4: + case NFPROTO_IPV6: + case NFPROTO_INET: + hooks = (1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_LOCAL_IN) | + (1 << NF_INET_FORWARD); + break; + default: + return -EOPNOTSUPP; + } + + return nft_chain_validate_hooks(ctx->chain, hooks); +#else + return 0; +#endif +} + int nft_meta_set_validate(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nft_data **data) @@ -328,7 +370,7 @@ int nft_meta_set_validate(const struct nft_ctx *ctx, if (priv->key != NFT_META_PKTTYPE) return 0; - switch (ctx->afi->family) { + switch (ctx->family) { case NFPROTO_BRIDGE: hooks = 1 << NF_BR_PRE_ROUTING; break; @@ -434,6 +476,7 @@ static const struct nft_expr_ops nft_meta_get_ops = { .eval = nft_meta_get_eval, .init = nft_meta_get_init, .dump = nft_meta_get_dump, + .validate = nft_meta_get_validate, }; static const struct nft_expr_ops nft_meta_set_ops = { diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c index ed548d06b6dd..1f36954c2ba9 100644 --- a/net/netfilter/nft_nat.c +++ b/net/netfilter/nft_nat.c @@ -142,7 +142,7 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr, return -EINVAL; family = ntohl(nla_get_be32(tb[NFTA_NAT_FAMILY])); - if (family != ctx->afi->family) + if (family != ctx->family) return -EOPNOTSUPP; switch (family) { diff --git a/net/netfilter/nft_redir.c b/net/netfilter/nft_redir.c index 1e66538bf0ff..c64cbe78dee7 100644 --- a/net/netfilter/nft_redir.c +++ b/net/netfilter/nft_redir.c @@ -75,7 +75,7 @@ int nft_redir_init(const struct nft_ctx *ctx, return -EINVAL; } - return nf_ct_netns_get(ctx->net, ctx->afi->family); + return nf_ct_netns_get(ctx->net, ctx->family); } EXPORT_SYMBOL_GPL(nft_redir_init); diff --git a/net/netfilter/nft_rt.c b/net/netfilter/nft_rt.c index a6b7d05aeacf..11a2071b6dd4 100644 --- a/net/netfilter/nft_rt.c +++ b/net/netfilter/nft_rt.c @@ -27,7 +27,7 @@ static u16 get_tcpmss(const struct nft_pktinfo *pkt, const struct dst_entry *skb { u32 minlen = sizeof(struct ipv6hdr), mtu = dst_mtu(skbdst); const struct sk_buff *skb = pkt->skb; - const struct nf_afinfo *ai; + struct dst_entry *dst = NULL; struct flowi fl; memset(&fl, 0, sizeof(fl)); @@ -43,15 +43,10 @@ static u16 get_tcpmss(const struct nft_pktinfo *pkt, const struct dst_entry *skb break; } - ai = nf_get_afinfo(nft_pf(pkt)); - if (ai) { - struct dst_entry *dst = NULL; - - ai->route(nft_net(pkt), &dst, &fl, false); - if (dst) { - mtu = min(mtu, dst_mtu(dst)); - dst_release(dst); - } + nf_route(nft_net(pkt), &dst, &fl, false, nft_pf(pkt)); + if (dst) { + mtu = min(mtu, dst_mtu(dst)); + dst_release(dst); } if (mtu <= minlen || mtu > 0xffff) diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c index f8166c1d5430..3f1624ee056f 100644 --- a/net/netfilter/nft_set_hash.c +++ b/net/netfilter/nft_set_hash.c @@ -251,11 +251,7 @@ static void nft_rhash_walk(const struct nft_ctx *ctx, struct nft_set *set, if (err) return; - err = rhashtable_walk_start(&hti); - if (err && err != -EAGAIN) { - iter->err = err; - goto out; - } + rhashtable_walk_start(&hti); while ((he = rhashtable_walk_next(&hti))) { if (IS_ERR(he)) { @@ -306,9 +302,7 @@ static void nft_rhash_gc(struct work_struct *work) if (err) goto schedule; - err = rhashtable_walk_start(&hti); - if (err && err != -EAGAIN) - goto out; + rhashtable_walk_start(&hti); while ((he = rhashtable_walk_next(&hti))) { if (IS_ERR(he)) { diff --git a/net/netfilter/utils.c b/net/netfilter/utils.c new file mode 100644 index 000000000000..0b660c568156 --- /dev/null +++ b/net/netfilter/utils.c @@ -0,0 +1,90 @@ +#include <linux/kernel.h> +#include <linux/netfilter.h> +#include <linux/netfilter_ipv4.h> +#include <linux/netfilter_ipv6.h> +#include <net/netfilter/nf_queue.h> + +__sum16 nf_checksum(struct sk_buff *skb, unsigned int hook, + unsigned int dataoff, u_int8_t protocol, + unsigned short family) +{ + const struct nf_ipv6_ops *v6ops; + __sum16 csum = 0; + + switch (family) { + case AF_INET: + csum = nf_ip_checksum(skb, hook, dataoff, protocol); + break; + case AF_INET6: + v6ops = rcu_dereference(nf_ipv6_ops); + if (v6ops) + csum = v6ops->checksum(skb, hook, dataoff, protocol); + break; + } + + return csum; +} +EXPORT_SYMBOL_GPL(nf_checksum); + +__sum16 nf_checksum_partial(struct sk_buff *skb, unsigned int hook, + unsigned int dataoff, unsigned int len, + u_int8_t protocol, unsigned short family) +{ + const struct nf_ipv6_ops *v6ops; + __sum16 csum = 0; + + switch (family) { + case AF_INET: + csum = nf_ip_checksum_partial(skb, hook, dataoff, len, + protocol); + break; + case AF_INET6: + v6ops = rcu_dereference(nf_ipv6_ops); + if (v6ops) + csum = v6ops->checksum_partial(skb, hook, dataoff, len, + protocol); + break; + } + + return csum; +} +EXPORT_SYMBOL_GPL(nf_checksum_partial); + +int nf_route(struct net *net, struct dst_entry **dst, struct flowi *fl, + bool strict, unsigned short family) +{ + const struct nf_ipv6_ops *v6ops; + int ret = 0; + + switch (family) { + case AF_INET: + ret = nf_ip_route(net, dst, fl, strict); + break; + case AF_INET6: + v6ops = rcu_dereference(nf_ipv6_ops); + if (v6ops) + ret = v6ops->route(net, dst, fl, strict); + break; + } + + return ret; +} +EXPORT_SYMBOL_GPL(nf_route); + +int nf_reroute(struct sk_buff *skb, struct nf_queue_entry *entry) +{ + const struct nf_ipv6_ops *v6ops; + int ret = 0; + + switch (entry->state.pf) { + case AF_INET: + ret = nf_ip_reroute(skb, entry); + break; + case AF_INET6: + v6ops = rcu_dereference(nf_ipv6_ops); + if (v6ops) + ret = v6ops->reroute(skb, entry); + break; + } + return ret; +} diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 55802e97f906..0b56bf05c169 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -1027,7 +1027,7 @@ void xt_free_table_info(struct xt_table_info *info) } EXPORT_SYMBOL(xt_free_table_info); -/* Find table by name, grabs mutex & ref. Returns NULL on error. */ +/* Find table by name, grabs mutex & ref. Returns ERR_PTR on error. */ struct xt_table *xt_find_table_lock(struct net *net, u_int8_t af, const char *name) { @@ -1043,17 +1043,17 @@ struct xt_table *xt_find_table_lock(struct net *net, u_int8_t af, /* Table doesn't exist in this netns, re-try init */ list_for_each_entry(t, &init_net.xt.tables[af], list) { + int err; + if (strcmp(t->name, name)) continue; - if (!try_module_get(t->me)) { - mutex_unlock(&xt[af].mutex); - return NULL; - } - + if (!try_module_get(t->me)) + goto out; mutex_unlock(&xt[af].mutex); - if (t->table_init(net) != 0) { + err = t->table_init(net); + if (err < 0) { module_put(t->me); - return NULL; + return ERR_PTR(err); } found = t; @@ -1073,10 +1073,28 @@ struct xt_table *xt_find_table_lock(struct net *net, u_int8_t af, module_put(found->me); out: mutex_unlock(&xt[af].mutex); - return NULL; + return ERR_PTR(-ENOENT); } EXPORT_SYMBOL_GPL(xt_find_table_lock); +struct xt_table *xt_request_find_table_lock(struct net *net, u_int8_t af, + const char *name) +{ + struct xt_table *t = xt_find_table_lock(net, af, name); + +#ifdef CONFIG_MODULES + if (IS_ERR(t)) { + int err = request_module("%stable_%s", xt_prefix[af], name); + if (err < 0) + return ERR_PTR(err); + t = xt_find_table_lock(net, af, name); + } +#endif + + return t; +} +EXPORT_SYMBOL_GPL(xt_request_find_table_lock); + void xt_table_unlock(struct xt_table *table) { mutex_unlock(&xt[table->af].mutex); @@ -1344,7 +1362,6 @@ static int xt_table_open(struct inode *inode, struct file *file) } static const struct file_operations xt_table_ops = { - .owner = THIS_MODULE, .open = xt_table_open, .read = seq_read, .llseek = seq_lseek, @@ -1397,7 +1414,7 @@ static void *xt_mttg_seq_next(struct seq_file *seq, void *v, loff_t *ppos, trav->curr = trav->curr->next; if (trav->curr != trav->head) break; - /* fallthru, _stop will unlock */ + /* fall through */ default: return NULL; } @@ -1480,7 +1497,6 @@ static int xt_match_open(struct inode *inode, struct file *file) } static const struct file_operations xt_match_ops = { - .owner = THIS_MODULE, .open = xt_match_open, .read = seq_read, .llseek = seq_lseek, @@ -1533,7 +1549,6 @@ static int xt_target_open(struct inode *inode, struct file *file) } static const struct file_operations xt_target_ops = { - .owner = THIS_MODULE, .open = xt_target_open, .read = seq_read, .llseek = seq_lseek, diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c index 9dae4d665965..99bb8e410f22 100644 --- a/net/netfilter/xt_TCPMSS.c +++ b/net/netfilter/xt_TCPMSS.c @@ -48,7 +48,6 @@ static u_int32_t tcpmss_reverse_mtu(struct net *net, unsigned int family) { struct flowi fl; - const struct nf_afinfo *ai; struct rtable *rt = NULL; u_int32_t mtu = ~0U; @@ -62,10 +61,8 @@ static u_int32_t tcpmss_reverse_mtu(struct net *net, memset(fl6, 0, sizeof(*fl6)); fl6->daddr = ipv6_hdr(skb)->saddr; } - ai = nf_get_afinfo(family); - if (ai != NULL) - ai->route(net, (struct dst_entry **)&rt, &fl, false); + nf_route(net, (struct dst_entry **)&rt, &fl, false, family); if (rt != NULL) { mtu = dst_mtu(&rt->dst); dst_release(&rt->dst); diff --git a/net/netfilter/xt_addrtype.c b/net/netfilter/xt_addrtype.c index 3b2be2ae6987..911a7c0da504 100644 --- a/net/netfilter/xt_addrtype.c +++ b/net/netfilter/xt_addrtype.c @@ -36,7 +36,7 @@ MODULE_ALIAS("ip6t_addrtype"); static u32 match_lookup_rt6(struct net *net, const struct net_device *dev, const struct in6_addr *addr, u16 mask) { - const struct nf_afinfo *afinfo; + const struct nf_ipv6_ops *v6ops; struct flowi6 flow; struct rt6_info *rt; u32 ret = 0; @@ -47,17 +47,14 @@ static u32 match_lookup_rt6(struct net *net, const struct net_device *dev, if (dev) flow.flowi6_oif = dev->ifindex; - afinfo = nf_get_afinfo(NFPROTO_IPV6); - if (afinfo != NULL) { - const struct nf_ipv6_ops *v6ops; - + v6ops = nf_get_ipv6_ops(); + if (v6ops) { if (dev && (mask & XT_ADDRTYPE_LOCAL)) { - v6ops = nf_get_ipv6_ops(); - if (v6ops && v6ops->chk_addr(net, addr, dev, true)) + if (v6ops->chk_addr(net, addr, dev, true)) ret = XT_ADDRTYPE_LOCAL; } - route_err = afinfo->route(net, (struct dst_entry **)&rt, - flowi6_to_flowi(&flow), false); + route_err = v6ops->route(net, (struct dst_entry **)&rt, + flowi6_to_flowi(&flow), false); } else { route_err = 1; } diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c index a6214f235333..b1b17b9353e1 100644 --- a/net/netfilter/xt_connlimit.c +++ b/net/netfilter/xt_connlimit.c @@ -12,292 +12,30 @@ * GPL (C) 1999 Rusty Russell (rusty@rustcorp.com.au). */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt -#include <linux/in.h> -#include <linux/in6.h> -#include <linux/ip.h> -#include <linux/ipv6.h> -#include <linux/jhash.h> -#include <linux/slab.h> -#include <linux/list.h> -#include <linux/rbtree.h> + #include <linux/module.h> -#include <linux/random.h> #include <linux/skbuff.h> -#include <linux/spinlock.h> -#include <linux/netfilter/nf_conntrack_tcp.h> #include <linux/netfilter/x_tables.h> #include <linux/netfilter/xt_connlimit.h> + #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_core.h> #include <net/netfilter/nf_conntrack_tuple.h> #include <net/netfilter/nf_conntrack_zones.h> - -#define CONNLIMIT_SLOTS 256U - -#ifdef CONFIG_LOCKDEP -#define CONNLIMIT_LOCK_SLOTS 8U -#else -#define CONNLIMIT_LOCK_SLOTS 256U -#endif - -#define CONNLIMIT_GC_MAX_NODES 8 - -/* we will save the tuples of all connections we care about */ -struct xt_connlimit_conn { - struct hlist_node node; - struct nf_conntrack_tuple tuple; -}; - -struct xt_connlimit_rb { - struct rb_node node; - struct hlist_head hhead; /* connections/hosts in same subnet */ - union nf_inet_addr addr; /* search key */ -}; - -static spinlock_t xt_connlimit_locks[CONNLIMIT_LOCK_SLOTS] __cacheline_aligned_in_smp; - -struct xt_connlimit_data { - struct rb_root climit_root[CONNLIMIT_SLOTS]; -}; - -static u_int32_t connlimit_rnd __read_mostly; -static struct kmem_cache *connlimit_rb_cachep __read_mostly; -static struct kmem_cache *connlimit_conn_cachep __read_mostly; - -static inline unsigned int connlimit_iphash(__be32 addr) -{ - return jhash_1word((__force __u32)addr, - connlimit_rnd) % CONNLIMIT_SLOTS; -} - -static inline unsigned int -connlimit_iphash6(const union nf_inet_addr *addr) -{ - return jhash2((u32 *)addr->ip6, ARRAY_SIZE(addr->ip6), - connlimit_rnd) % CONNLIMIT_SLOTS; -} - -static inline bool already_closed(const struct nf_conn *conn) -{ - if (nf_ct_protonum(conn) == IPPROTO_TCP) - return conn->proto.tcp.state == TCP_CONNTRACK_TIME_WAIT || - conn->proto.tcp.state == TCP_CONNTRACK_CLOSE; - else - return 0; -} - -static int -same_source(const union nf_inet_addr *addr, - const union nf_inet_addr *u3, u_int8_t family) -{ - if (family == NFPROTO_IPV4) - return ntohl(addr->ip) - ntohl(u3->ip); - - return memcmp(addr->ip6, u3->ip6, sizeof(addr->ip6)); -} - -static bool add_hlist(struct hlist_head *head, - const struct nf_conntrack_tuple *tuple, - const union nf_inet_addr *addr) -{ - struct xt_connlimit_conn *conn; - - conn = kmem_cache_alloc(connlimit_conn_cachep, GFP_ATOMIC); - if (conn == NULL) - return false; - conn->tuple = *tuple; - hlist_add_head(&conn->node, head); - return true; -} - -static unsigned int check_hlist(struct net *net, - struct hlist_head *head, - const struct nf_conntrack_tuple *tuple, - const struct nf_conntrack_zone *zone, - bool *addit) -{ - const struct nf_conntrack_tuple_hash *found; - struct xt_connlimit_conn *conn; - struct hlist_node *n; - struct nf_conn *found_ct; - unsigned int length = 0; - - *addit = true; - - /* check the saved connections */ - hlist_for_each_entry_safe(conn, n, head, node) { - found = nf_conntrack_find_get(net, zone, &conn->tuple); - if (found == NULL) { - hlist_del(&conn->node); - kmem_cache_free(connlimit_conn_cachep, conn); - continue; - } - - found_ct = nf_ct_tuplehash_to_ctrack(found); - - if (nf_ct_tuple_equal(&conn->tuple, tuple)) { - /* - * Just to be sure we have it only once in the list. - * We should not see tuples twice unless someone hooks - * this into a table without "-p tcp --syn". - */ - *addit = false; - } else if (already_closed(found_ct)) { - /* - * we do not care about connections which are - * closed already -> ditch it - */ - nf_ct_put(found_ct); - hlist_del(&conn->node); - kmem_cache_free(connlimit_conn_cachep, conn); - continue; - } - - nf_ct_put(found_ct); - length++; - } - - return length; -} - -static void tree_nodes_free(struct rb_root *root, - struct xt_connlimit_rb *gc_nodes[], - unsigned int gc_count) -{ - struct xt_connlimit_rb *rbconn; - - while (gc_count) { - rbconn = gc_nodes[--gc_count]; - rb_erase(&rbconn->node, root); - kmem_cache_free(connlimit_rb_cachep, rbconn); - } -} - -static unsigned int -count_tree(struct net *net, struct rb_root *root, - const struct nf_conntrack_tuple *tuple, - const union nf_inet_addr *addr, - u8 family, const struct nf_conntrack_zone *zone) -{ - struct xt_connlimit_rb *gc_nodes[CONNLIMIT_GC_MAX_NODES]; - struct rb_node **rbnode, *parent; - struct xt_connlimit_rb *rbconn; - struct xt_connlimit_conn *conn; - unsigned int gc_count; - bool no_gc = false; - - restart: - gc_count = 0; - parent = NULL; - rbnode = &(root->rb_node); - while (*rbnode) { - int diff; - bool addit; - - rbconn = rb_entry(*rbnode, struct xt_connlimit_rb, node); - - parent = *rbnode; - diff = same_source(addr, &rbconn->addr, family); - if (diff < 0) { - rbnode = &((*rbnode)->rb_left); - } else if (diff > 0) { - rbnode = &((*rbnode)->rb_right); - } else { - /* same source network -> be counted! */ - unsigned int count; - count = check_hlist(net, &rbconn->hhead, tuple, zone, &addit); - - tree_nodes_free(root, gc_nodes, gc_count); - if (!addit) - return count; - - if (!add_hlist(&rbconn->hhead, tuple, addr)) - return 0; /* hotdrop */ - - return count + 1; - } - - if (no_gc || gc_count >= ARRAY_SIZE(gc_nodes)) - continue; - - /* only used for GC on hhead, retval and 'addit' ignored */ - check_hlist(net, &rbconn->hhead, tuple, zone, &addit); - if (hlist_empty(&rbconn->hhead)) - gc_nodes[gc_count++] = rbconn; - } - - if (gc_count) { - no_gc = true; - tree_nodes_free(root, gc_nodes, gc_count); - /* tree_node_free before new allocation permits - * allocator to re-use newly free'd object. - * - * This is a rare event; in most cases we will find - * existing node to re-use. (or gc_count is 0). - */ - goto restart; - } - - /* no match, need to insert new node */ - rbconn = kmem_cache_alloc(connlimit_rb_cachep, GFP_ATOMIC); - if (rbconn == NULL) - return 0; - - conn = kmem_cache_alloc(connlimit_conn_cachep, GFP_ATOMIC); - if (conn == NULL) { - kmem_cache_free(connlimit_rb_cachep, rbconn); - return 0; - } - - conn->tuple = *tuple; - rbconn->addr = *addr; - - INIT_HLIST_HEAD(&rbconn->hhead); - hlist_add_head(&conn->node, &rbconn->hhead); - - rb_link_node(&rbconn->node, parent, rbnode); - rb_insert_color(&rbconn->node, root); - return 1; -} - -static int count_them(struct net *net, - struct xt_connlimit_data *data, - const struct nf_conntrack_tuple *tuple, - const union nf_inet_addr *addr, - u_int8_t family, - const struct nf_conntrack_zone *zone) -{ - struct rb_root *root; - int count; - u32 hash; - - if (family == NFPROTO_IPV6) - hash = connlimit_iphash6(addr); - else - hash = connlimit_iphash(addr->ip); - root = &data->climit_root[hash]; - - spin_lock_bh(&xt_connlimit_locks[hash % CONNLIMIT_LOCK_SLOTS]); - - count = count_tree(net, root, tuple, addr, family, zone); - - spin_unlock_bh(&xt_connlimit_locks[hash % CONNLIMIT_LOCK_SLOTS]); - - return count; -} +#include <net/netfilter/nf_conntrack_count.h> static bool connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par) { struct net *net = xt_net(par); const struct xt_connlimit_info *info = par->matchinfo; - union nf_inet_addr addr; struct nf_conntrack_tuple tuple; const struct nf_conntrack_tuple *tuple_ptr = &tuple; const struct nf_conntrack_zone *zone = &nf_ct_zone_dflt; enum ip_conntrack_info ctinfo; const struct nf_conn *ct; unsigned int connections; + u32 key[5]; ct = nf_ct_get(skb, &ctinfo); if (ct != NULL) { @@ -310,6 +48,7 @@ connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par) if (xt_family(par) == NFPROTO_IPV6) { const struct ipv6hdr *iph = ipv6_hdr(skb); + union nf_inet_addr addr; unsigned int i; memcpy(&addr.ip6, (info->flags & XT_CONNLIMIT_DADDR) ? @@ -317,22 +56,24 @@ connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par) for (i = 0; i < ARRAY_SIZE(addr.ip6); ++i) addr.ip6[i] &= info->mask.ip6[i]; + memcpy(key, &addr, sizeof(addr.ip6)); + key[4] = zone->id; } else { const struct iphdr *iph = ip_hdr(skb); - addr.ip = (info->flags & XT_CONNLIMIT_DADDR) ? + key[0] = (info->flags & XT_CONNLIMIT_DADDR) ? iph->daddr : iph->saddr; - addr.ip &= info->mask.ip; + key[0] &= info->mask.ip; + key[1] = zone->id; } - connections = count_them(net, info->data, tuple_ptr, &addr, - xt_family(par), zone); + connections = nf_conncount_count(net, info->data, key, + xt_family(par), tuple_ptr, zone); if (connections == 0) /* kmalloc failed, drop it entirely */ goto hotdrop; - return (connections > info->limit) ^ - !!(info->flags & XT_CONNLIMIT_INVERT); + return (connections > info->limit) ^ !!(info->flags & XT_CONNLIMIT_INVERT); hotdrop: par->hotdrop = true; @@ -342,61 +83,27 @@ connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par) static int connlimit_mt_check(const struct xt_mtchk_param *par) { struct xt_connlimit_info *info = par->matchinfo; - unsigned int i; - int ret; + unsigned int keylen; - net_get_random_once(&connlimit_rnd, sizeof(connlimit_rnd)); - - ret = nf_ct_netns_get(par->net, par->family); - if (ret < 0) { - pr_info("cannot load conntrack support for " - "address family %u\n", par->family); - return ret; - } + keylen = sizeof(u32); + if (par->family == NFPROTO_IPV6) + keylen += sizeof(struct in6_addr); + else + keylen += sizeof(struct in_addr); /* init private data */ - info->data = kmalloc(sizeof(struct xt_connlimit_data), GFP_KERNEL); - if (info->data == NULL) { - nf_ct_netns_put(par->net, par->family); - return -ENOMEM; - } - - for (i = 0; i < ARRAY_SIZE(info->data->climit_root); ++i) - info->data->climit_root[i] = RB_ROOT; + info->data = nf_conncount_init(par->net, par->family, keylen); + if (IS_ERR(info->data)) + return PTR_ERR(info->data); return 0; } -static void destroy_tree(struct rb_root *r) -{ - struct xt_connlimit_conn *conn; - struct xt_connlimit_rb *rbconn; - struct hlist_node *n; - struct rb_node *node; - - while ((node = rb_first(r)) != NULL) { - rbconn = rb_entry(node, struct xt_connlimit_rb, node); - - rb_erase(node, r); - - hlist_for_each_entry_safe(conn, n, &rbconn->hhead, node) - kmem_cache_free(connlimit_conn_cachep, conn); - - kmem_cache_free(connlimit_rb_cachep, rbconn); - } -} - static void connlimit_mt_destroy(const struct xt_mtdtor_param *par) { const struct xt_connlimit_info *info = par->matchinfo; - unsigned int i; - - nf_ct_netns_put(par->net, par->family); - - for (i = 0; i < ARRAY_SIZE(info->data->climit_root); ++i) - destroy_tree(&info->data->climit_root[i]); - kfree(info->data); + nf_conncount_destroy(par->net, par->family, info->data); } static struct xt_match connlimit_mt_reg __read_mostly = { @@ -413,40 +120,12 @@ static struct xt_match connlimit_mt_reg __read_mostly = { static int __init connlimit_mt_init(void) { - int ret, i; - - BUILD_BUG_ON(CONNLIMIT_LOCK_SLOTS > CONNLIMIT_SLOTS); - BUILD_BUG_ON((CONNLIMIT_SLOTS % CONNLIMIT_LOCK_SLOTS) != 0); - - for (i = 0; i < CONNLIMIT_LOCK_SLOTS; ++i) - spin_lock_init(&xt_connlimit_locks[i]); - - connlimit_conn_cachep = kmem_cache_create("xt_connlimit_conn", - sizeof(struct xt_connlimit_conn), - 0, 0, NULL); - if (!connlimit_conn_cachep) - return -ENOMEM; - - connlimit_rb_cachep = kmem_cache_create("xt_connlimit_rb", - sizeof(struct xt_connlimit_rb), - 0, 0, NULL); - if (!connlimit_rb_cachep) { - kmem_cache_destroy(connlimit_conn_cachep); - return -ENOMEM; - } - ret = xt_register_match(&connlimit_mt_reg); - if (ret != 0) { - kmem_cache_destroy(connlimit_conn_cachep); - kmem_cache_destroy(connlimit_rb_cachep); - } - return ret; + return xt_register_match(&connlimit_mt_reg); } static void __exit connlimit_mt_exit(void) { xt_unregister_match(&connlimit_mt_reg); - kmem_cache_destroy(connlimit_conn_cachep); - kmem_cache_destroy(connlimit_rb_cachep); } module_init(connlimit_mt_init); diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index 5da8746f7b88..ca6847403ca2 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -353,7 +353,7 @@ static int htable_create(struct net *net, struct hashlimit_cfg3 *cfg, static bool select_all(const struct xt_hashlimit_htable *ht, const struct dsthash_ent *he) { - return 1; + return true; } static bool select_gc(const struct xt_hashlimit_htable *ht, @@ -1266,7 +1266,6 @@ static int dl_proc_open(struct inode *inode, struct file *file) } static const struct file_operations dl_file_ops_v2 = { - .owner = THIS_MODULE, .open = dl_proc_open_v2, .read = seq_read, .llseek = seq_lseek, @@ -1274,7 +1273,6 @@ static const struct file_operations dl_file_ops_v2 = { }; static const struct file_operations dl_file_ops_v1 = { - .owner = THIS_MODULE, .open = dl_proc_open_v1, .read = seq_read, .llseek = seq_lseek, @@ -1282,7 +1280,6 @@ static const struct file_operations dl_file_ops_v1 = { }; static const struct file_operations dl_file_ops = { - .owner = THIS_MODULE, .open = dl_proc_open, .read = seq_read, .llseek = seq_lseek, diff --git a/net/netfilter/xt_ipcomp.c b/net/netfilter/xt_ipcomp.c index 000e70377f85..7ca64a50db04 100644 --- a/net/netfilter/xt_ipcomp.c +++ b/net/netfilter/xt_ipcomp.c @@ -58,7 +58,7 @@ static bool comp_mt(const struct sk_buff *skb, struct xt_action_param *par) */ pr_debug("Dropping evil IPComp tinygram.\n"); par->hotdrop = true; - return 0; + return false; } return spi_match(compinfo->spis[0], compinfo->spis[1], diff --git a/net/netfilter/xt_policy.c b/net/netfilter/xt_policy.c index 2b4ab189bba7..5639fb03bdd9 100644 --- a/net/netfilter/xt_policy.c +++ b/net/netfilter/xt_policy.c @@ -93,7 +93,8 @@ match_policy_out(const struct sk_buff *skb, const struct xt_policy_info *info, if (dst->xfrm == NULL) return -1; - for (i = 0; dst && dst->xfrm; dst = dst->child, i++) { + for (i = 0; dst && dst->xfrm; + dst = ((struct xfrm_dst *)dst)->child, i++) { pos = strict ? i : 0; if (pos >= info->len) return 0; diff --git a/net/netfilter/xt_set.c b/net/netfilter/xt_set.c index 64285702afd5..16b6b11ee83f 100644 --- a/net/netfilter/xt_set.c +++ b/net/netfilter/xt_set.c @@ -39,13 +39,17 @@ match_set(ip_set_id_t index, const struct sk_buff *skb, return inv; } -#define ADT_OPT(n, f, d, fs, cfs, t) \ -struct ip_set_adt_opt n = { \ - .family = f, \ - .dim = d, \ - .flags = fs, \ - .cmdflags = cfs, \ - .ext.timeout = t, \ +#define ADT_OPT(n, f, d, fs, cfs, t, p, b, po, bo) \ +struct ip_set_adt_opt n = { \ + .family = f, \ + .dim = d, \ + .flags = fs, \ + .cmdflags = cfs, \ + .ext.timeout = t, \ + .ext.packets = p, \ + .ext.bytes = b, \ + .ext.packets_op = po, \ + .ext.bytes_op = bo, \ } /* Revision 0 interface: backward compatible with netfilter/iptables */ @@ -56,7 +60,8 @@ set_match_v0(const struct sk_buff *skb, struct xt_action_param *par) const struct xt_set_info_match_v0 *info = par->matchinfo; ADT_OPT(opt, xt_family(par), info->match_set.u.compat.dim, - info->match_set.u.compat.flags, 0, UINT_MAX); + info->match_set.u.compat.flags, 0, UINT_MAX, + 0, 0, 0, 0); return match_set(info->match_set.index, skb, par, &opt, info->match_set.u.compat.flags & IPSET_INV_MATCH); @@ -119,7 +124,8 @@ set_match_v1(const struct sk_buff *skb, struct xt_action_param *par) const struct xt_set_info_match_v1 *info = par->matchinfo; ADT_OPT(opt, xt_family(par), info->match_set.dim, - info->match_set.flags, 0, UINT_MAX); + info->match_set.flags, 0, UINT_MAX, + 0, 0, 0, 0); if (opt.flags & IPSET_RETURN_NOMATCH) opt.cmdflags |= IPSET_FLAG_RETURN_NOMATCH; @@ -161,45 +167,21 @@ set_match_v1_destroy(const struct xt_mtdtor_param *par) /* Revision 3 match */ static bool -match_counter0(u64 counter, const struct ip_set_counter_match0 *info) -{ - switch (info->op) { - case IPSET_COUNTER_NONE: - return true; - case IPSET_COUNTER_EQ: - return counter == info->value; - case IPSET_COUNTER_NE: - return counter != info->value; - case IPSET_COUNTER_LT: - return counter < info->value; - case IPSET_COUNTER_GT: - return counter > info->value; - } - return false; -} - -static bool set_match_v3(const struct sk_buff *skb, struct xt_action_param *par) { const struct xt_set_info_match_v3 *info = par->matchinfo; - int ret; ADT_OPT(opt, xt_family(par), info->match_set.dim, - info->match_set.flags, info->flags, UINT_MAX); + info->match_set.flags, info->flags, UINT_MAX, + info->packets.value, info->bytes.value, + info->packets.op, info->bytes.op); if (info->packets.op != IPSET_COUNTER_NONE || info->bytes.op != IPSET_COUNTER_NONE) opt.cmdflags |= IPSET_FLAG_MATCH_COUNTERS; - ret = match_set(info->match_set.index, skb, par, &opt, - info->match_set.flags & IPSET_INV_MATCH); - - if (!(ret && opt.cmdflags & IPSET_FLAG_MATCH_COUNTERS)) - return ret; - - if (!match_counter0(opt.ext.packets, &info->packets)) - return false; - return match_counter0(opt.ext.bytes, &info->bytes); + return match_set(info->match_set.index, skb, par, &opt, + info->match_set.flags & IPSET_INV_MATCH); } #define set_match_v3_checkentry set_match_v1_checkentry @@ -208,45 +190,21 @@ set_match_v3(const struct sk_buff *skb, struct xt_action_param *par) /* Revision 4 match */ static bool -match_counter(u64 counter, const struct ip_set_counter_match *info) -{ - switch (info->op) { - case IPSET_COUNTER_NONE: - return true; - case IPSET_COUNTER_EQ: - return counter == info->value; - case IPSET_COUNTER_NE: - return counter != info->value; - case IPSET_COUNTER_LT: - return counter < info->value; - case IPSET_COUNTER_GT: - return counter > info->value; - } - return false; -} - -static bool set_match_v4(const struct sk_buff *skb, struct xt_action_param *par) { const struct xt_set_info_match_v4 *info = par->matchinfo; - int ret; ADT_OPT(opt, xt_family(par), info->match_set.dim, - info->match_set.flags, info->flags, UINT_MAX); + info->match_set.flags, info->flags, UINT_MAX, + info->packets.value, info->bytes.value, + info->packets.op, info->bytes.op); if (info->packets.op != IPSET_COUNTER_NONE || info->bytes.op != IPSET_COUNTER_NONE) opt.cmdflags |= IPSET_FLAG_MATCH_COUNTERS; - ret = match_set(info->match_set.index, skb, par, &opt, - info->match_set.flags & IPSET_INV_MATCH); - - if (!(ret && opt.cmdflags & IPSET_FLAG_MATCH_COUNTERS)) - return ret; - - if (!match_counter(opt.ext.packets, &info->packets)) - return false; - return match_counter(opt.ext.bytes, &info->bytes); + return match_set(info->match_set.index, skb, par, &opt, + info->match_set.flags & IPSET_INV_MATCH); } #define set_match_v4_checkentry set_match_v1_checkentry @@ -260,9 +218,11 @@ set_target_v0(struct sk_buff *skb, const struct xt_action_param *par) const struct xt_set_info_target_v0 *info = par->targinfo; ADT_OPT(add_opt, xt_family(par), info->add_set.u.compat.dim, - info->add_set.u.compat.flags, 0, UINT_MAX); + info->add_set.u.compat.flags, 0, UINT_MAX, + 0, 0, 0, 0); ADT_OPT(del_opt, xt_family(par), info->del_set.u.compat.dim, - info->del_set.u.compat.flags, 0, UINT_MAX); + info->del_set.u.compat.flags, 0, UINT_MAX, + 0, 0, 0, 0); if (info->add_set.index != IPSET_INVALID_ID) ip_set_add(info->add_set.index, skb, par, &add_opt); @@ -333,9 +293,11 @@ set_target_v1(struct sk_buff *skb, const struct xt_action_param *par) const struct xt_set_info_target_v1 *info = par->targinfo; ADT_OPT(add_opt, xt_family(par), info->add_set.dim, - info->add_set.flags, 0, UINT_MAX); + info->add_set.flags, 0, UINT_MAX, + 0, 0, 0, 0); ADT_OPT(del_opt, xt_family(par), info->del_set.dim, - info->del_set.flags, 0, UINT_MAX); + info->del_set.flags, 0, UINT_MAX, + 0, 0, 0, 0); if (info->add_set.index != IPSET_INVALID_ID) ip_set_add(info->add_set.index, skb, par, &add_opt); @@ -402,9 +364,11 @@ set_target_v2(struct sk_buff *skb, const struct xt_action_param *par) const struct xt_set_info_target_v2 *info = par->targinfo; ADT_OPT(add_opt, xt_family(par), info->add_set.dim, - info->add_set.flags, info->flags, info->timeout); + info->add_set.flags, info->flags, info->timeout, + 0, 0, 0, 0); ADT_OPT(del_opt, xt_family(par), info->del_set.dim, - info->del_set.flags, 0, UINT_MAX); + info->del_set.flags, 0, UINT_MAX, + 0, 0, 0, 0); /* Normalize to fit into jiffies */ if (add_opt.ext.timeout != IPSET_NO_TIMEOUT && @@ -432,11 +396,14 @@ set_target_v3(struct sk_buff *skb, const struct xt_action_param *par) int ret; ADT_OPT(add_opt, xt_family(par), info->add_set.dim, - info->add_set.flags, info->flags, info->timeout); + info->add_set.flags, info->flags, info->timeout, + 0, 0, 0, 0); ADT_OPT(del_opt, xt_family(par), info->del_set.dim, - info->del_set.flags, 0, UINT_MAX); + info->del_set.flags, 0, UINT_MAX, + 0, 0, 0, 0); ADT_OPT(map_opt, xt_family(par), info->map_set.dim, - info->map_set.flags, 0, UINT_MAX); + info->map_set.flags, 0, UINT_MAX, + 0, 0, 0, 0); /* Normalize to fit into jiffies */ if (add_opt.ext.timeout != IPSET_NO_TIMEOUT && diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 84a4e4c3be4b..2ad445c1d27c 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -65,6 +65,7 @@ #include <linux/net_namespace.h> #include <net/net_namespace.h> +#include <net/netns/generic.h> #include <net/sock.h> #include <net/scm.h> #include <net/netlink.h> @@ -145,8 +146,6 @@ static atomic_t nl_table_users = ATOMIC_INIT(0); static BLOCKING_NOTIFIER_HEAD(netlink_chain); -static DEFINE_SPINLOCK(netlink_tap_lock); -static struct list_head netlink_tap_all __read_mostly; static const struct rhashtable_params netlink_rhashtable_params; @@ -173,14 +172,24 @@ static struct sk_buff *netlink_to_full_skb(const struct sk_buff *skb, return new; } +static unsigned int netlink_tap_net_id; + +struct netlink_tap_net { + struct list_head netlink_tap_all; + struct mutex netlink_tap_lock; +}; + int netlink_add_tap(struct netlink_tap *nt) { + struct net *net = dev_net(nt->dev); + struct netlink_tap_net *nn = net_generic(net, netlink_tap_net_id); + if (unlikely(nt->dev->type != ARPHRD_NETLINK)) return -EINVAL; - spin_lock(&netlink_tap_lock); - list_add_rcu(&nt->list, &netlink_tap_all); - spin_unlock(&netlink_tap_lock); + mutex_lock(&nn->netlink_tap_lock); + list_add_rcu(&nt->list, &nn->netlink_tap_all); + mutex_unlock(&nn->netlink_tap_lock); __module_get(nt->module); @@ -190,12 +199,14 @@ EXPORT_SYMBOL_GPL(netlink_add_tap); static int __netlink_remove_tap(struct netlink_tap *nt) { + struct net *net = dev_net(nt->dev); + struct netlink_tap_net *nn = net_generic(net, netlink_tap_net_id); bool found = false; struct netlink_tap *tmp; - spin_lock(&netlink_tap_lock); + mutex_lock(&nn->netlink_tap_lock); - list_for_each_entry(tmp, &netlink_tap_all, list) { + list_for_each_entry(tmp, &nn->netlink_tap_all, list) { if (nt == tmp) { list_del_rcu(&nt->list); found = true; @@ -205,7 +216,7 @@ static int __netlink_remove_tap(struct netlink_tap *nt) pr_warn("__netlink_remove_tap: %p not found\n", nt); out: - spin_unlock(&netlink_tap_lock); + mutex_unlock(&nn->netlink_tap_lock); if (found) module_put(nt->module); @@ -224,6 +235,26 @@ int netlink_remove_tap(struct netlink_tap *nt) } EXPORT_SYMBOL_GPL(netlink_remove_tap); +static __net_init int netlink_tap_init_net(struct net *net) +{ + struct netlink_tap_net *nn = net_generic(net, netlink_tap_net_id); + + INIT_LIST_HEAD(&nn->netlink_tap_all); + mutex_init(&nn->netlink_tap_lock); + return 0; +} + +static void __net_exit netlink_tap_exit_net(struct net *net) +{ +} + +static struct pernet_operations netlink_tap_net_ops = { + .init = netlink_tap_init_net, + .exit = netlink_tap_exit_net, + .id = &netlink_tap_net_id, + .size = sizeof(struct netlink_tap_net), +}; + static bool netlink_filter_tap(const struct sk_buff *skb) { struct sock *sk = skb->sk; @@ -277,7 +308,7 @@ static int __netlink_deliver_tap_skb(struct sk_buff *skb, return ret; } -static void __netlink_deliver_tap(struct sk_buff *skb) +static void __netlink_deliver_tap(struct sk_buff *skb, struct netlink_tap_net *nn) { int ret; struct netlink_tap *tmp; @@ -285,19 +316,21 @@ static void __netlink_deliver_tap(struct sk_buff *skb) if (!netlink_filter_tap(skb)) return; - list_for_each_entry_rcu(tmp, &netlink_tap_all, list) { + list_for_each_entry_rcu(tmp, &nn->netlink_tap_all, list) { ret = __netlink_deliver_tap_skb(skb, tmp->dev); if (unlikely(ret)) break; } } -static void netlink_deliver_tap(struct sk_buff *skb) +static void netlink_deliver_tap(struct net *net, struct sk_buff *skb) { + struct netlink_tap_net *nn = net_generic(net, netlink_tap_net_id); + rcu_read_lock(); - if (unlikely(!list_empty(&netlink_tap_all))) - __netlink_deliver_tap(skb); + if (unlikely(!list_empty(&nn->netlink_tap_all))) + __netlink_deliver_tap(skb, nn); rcu_read_unlock(); } @@ -306,7 +339,7 @@ static void netlink_deliver_tap_kernel(struct sock *dst, struct sock *src, struct sk_buff *skb) { if (!(netlink_is_kernel(dst) && netlink_is_kernel(src))) - netlink_deliver_tap(skb); + netlink_deliver_tap(sock_net(dst), skb); } static void netlink_overrun(struct sock *sk) @@ -1216,7 +1249,7 @@ static int __netlink_sendskb(struct sock *sk, struct sk_buff *skb) { int len = skb->len; - netlink_deliver_tap(skb); + netlink_deliver_tap(sock_net(sk), skb); skb_queue_tail(&sk->sk_receive_queue, skb); sk->sk_data_ready(sk); @@ -2482,8 +2515,9 @@ static int netlink_walk_start(struct nl_seq_iter *iter) return err; } - err = rhashtable_walk_start(&iter->hti); - return err == -EAGAIN ? 0 : err; + rhashtable_walk_start(&iter->hti); + + return 0; } static void netlink_walk_stop(struct nl_seq_iter *iter) @@ -2604,7 +2638,6 @@ static int netlink_seq_open(struct inode *inode, struct file *file) } static const struct file_operations netlink_seq_fops = { - .owner = THIS_MODULE, .open = netlink_seq_open, .read = seq_read, .llseek = seq_lseek, @@ -2734,12 +2767,11 @@ static int __init netlink_proto_init(void) } } - INIT_LIST_HEAD(&netlink_tap_all); - netlink_add_usersock_entry(); sock_register(&netlink_family_ops); register_pernet_subsys(&netlink_net_ops); + register_pernet_subsys(&netlink_tap_net_ops); /* The netlink device handler may be needed early. */ rtnetlink_init(); out: diff --git a/net/netlink/diag.c b/net/netlink/diag.c index 8faa20b4d457..7dda33b9b784 100644 --- a/net/netlink/diag.c +++ b/net/netlink/diag.c @@ -115,11 +115,7 @@ static int __netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, if (!s_num) rhashtable_walk_enter(&tbl->hash, hti); - ret = rhashtable_walk_start(hti); - if (ret == -EAGAIN) - ret = 0; - if (ret) - goto stop; + rhashtable_walk_start(hti); while ((nlsk = rhashtable_walk_next(hti))) { if (IS_ERR(nlsk)) { @@ -146,8 +142,8 @@ static int __netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, } } -stop: rhashtable_walk_stop(hti); + if (ret) goto done; diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 7ed9d4422a73..9ba30c63be3d 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -1344,7 +1344,6 @@ static int nr_info_open(struct inode *inode, struct file *file) } static const struct file_operations nr_info_fops = { - .owner = THIS_MODULE, .open = nr_info_open, .read = seq_read, .llseek = seq_lseek, diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c index 75e6ba970fde..b5a7dcb30991 100644 --- a/net/netrom/nr_route.c +++ b/net/netrom/nr_route.c @@ -901,7 +901,6 @@ static int nr_node_info_open(struct inode *inode, struct file *file) } const struct file_operations nr_nodes_fops = { - .owner = THIS_MODULE, .open = nr_node_info_open, .read = seq_read, .llseek = seq_lseek, @@ -968,7 +967,6 @@ static int nr_neigh_info_open(struct inode *inode, struct file *file) } const struct file_operations nr_neigh_fops = { - .owner = THIS_MODULE, .open = nr_neigh_info_open, .read = seq_read, .llseek = seq_lseek, diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index b27c5c6d9cab..62f36cc938ca 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -1266,14 +1266,14 @@ static int parse_nat(const struct nlattr *attr, /* Do not allow flags if no type is given. */ if (info->range.flags) { OVS_NLERR(log, - "NAT flags may be given only when NAT range (SRC or DST) is also specified.\n" + "NAT flags may be given only when NAT range (SRC or DST) is also specified." ); return -EINVAL; } info->nat = OVS_CT_NAT; /* NAT existing connections. */ } else if (!info->commit) { OVS_NLERR(log, - "NAT attributes may be specified only when CT COMMIT flag is also specified.\n" + "NAT attributes may be specified only when CT COMMIT flag is also specified." ); return -EINVAL; } diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index f039064ce922..56b8e7167790 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -56,12 +56,12 @@ u64 ovs_flow_used_time(unsigned long flow_jiffies) { - struct timespec cur_ts; + struct timespec64 cur_ts; u64 cur_ms, idle_ms; - ktime_get_ts(&cur_ts); + ktime_get_ts64(&cur_ts); idle_ms = jiffies_to_msecs(jiffies - flow_jiffies); - cur_ms = (u64)cur_ts.tv_sec * MSEC_PER_SEC + + cur_ms = (u64)(u32)cur_ts.tv_sec * MSEC_PER_SEC + cur_ts.tv_nsec / NSEC_PER_MSEC; return cur_ms - idle_ms; diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index f143908b651d..7322aa1e382e 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -49,6 +49,7 @@ #include <net/mpls.h> #include <net/vxlan.h> #include <net/tun_proto.h> +#include <net/erspan.h> #include "flow_netlink.h" @@ -329,7 +330,8 @@ size_t ovs_tun_key_attr_size(void) + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_CSUM */ + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_OAM */ + nla_total_size(256) /* OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS */ - /* OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS is mutually exclusive with + /* OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS and + * OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS is mutually exclusive with * OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS and covered by it. */ + nla_total_size(2) /* OVS_TUNNEL_KEY_ATTR_TP_SRC */ @@ -400,6 +402,7 @@ static const struct ovs_len_tbl ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1] .next = ovs_vxlan_ext_key_lens }, [OVS_TUNNEL_KEY_ATTR_IPV6_SRC] = { .len = sizeof(struct in6_addr) }, [OVS_TUNNEL_KEY_ATTR_IPV6_DST] = { .len = sizeof(struct in6_addr) }, + [OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS] = { .len = OVS_ATTR_VARIABLE }, }; static const struct ovs_len_tbl @@ -631,6 +634,33 @@ static int vxlan_tun_opt_from_nlattr(const struct nlattr *attr, return 0; } +static int erspan_tun_opt_from_nlattr(const struct nlattr *a, + struct sw_flow_match *match, bool is_mask, + bool log) +{ + unsigned long opt_key_offset; + + BUILD_BUG_ON(sizeof(struct erspan_metadata) > + sizeof(match->key->tun_opts)); + + if (nla_len(a) > sizeof(match->key->tun_opts)) { + OVS_NLERR(log, "ERSPAN option length err (len %d, max %zu).", + nla_len(a), sizeof(match->key->tun_opts)); + return -EINVAL; + } + + if (!is_mask) + SW_FLOW_KEY_PUT(match, tun_opts_len, + sizeof(struct erspan_metadata), false); + else + SW_FLOW_KEY_PUT(match, tun_opts_len, 0xff, true); + + opt_key_offset = TUN_METADATA_OFFSET(nla_len(a)); + SW_FLOW_KEY_MEMCPY_OFFSET(match, opt_key_offset, nla_data(a), + nla_len(a), is_mask); + return 0; +} + static int ip_tun_from_nlattr(const struct nlattr *attr, struct sw_flow_match *match, bool is_mask, bool log) @@ -738,6 +768,20 @@ static int ip_tun_from_nlattr(const struct nlattr *attr, break; case OVS_TUNNEL_KEY_ATTR_PAD: break; + case OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS: + if (opts_type) { + OVS_NLERR(log, "Multiple metadata blocks provided"); + return -EINVAL; + } + + err = erspan_tun_opt_from_nlattr(a, match, is_mask, + log); + if (err) + return err; + + tun_flags |= TUNNEL_ERSPAN_OPT; + opts_type = type; + break; default: OVS_NLERR(log, "Unknown IP tunnel attribute %d", type); @@ -862,6 +906,10 @@ static int __ip_tun_to_nlattr(struct sk_buff *skb, else if (output->tun_flags & TUNNEL_VXLAN_OPT && vxlan_opt_to_nlattr(skb, tun_opts, swkey_tun_opts_len)) return -EMSGSIZE; + else if (output->tun_flags & TUNNEL_ERSPAN_OPT && + nla_put(skb, OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS, + swkey_tun_opts_len, tun_opts)) + return -EMSGSIZE; } return 0; @@ -2454,7 +2502,7 @@ static int validate_geneve_opts(struct sw_flow_key *key) option = (struct geneve_opt *)((u8 *)option + len); opts_len -= len; - }; + } key->tun_key.tun_flags |= crit_opt ? TUNNEL_CRIT_OPT : 0; @@ -2486,8 +2534,10 @@ static int validate_and_copy_set_tun(const struct nlattr *attr, break; case OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS: break; + case OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS: + break; } - }; + } start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SET, log); if (start < 0) diff --git a/net/openvswitch/meter.c b/net/openvswitch/meter.c index 3fbfc78991ac..04b94281a30b 100644 --- a/net/openvswitch/meter.c +++ b/net/openvswitch/meter.c @@ -488,7 +488,7 @@ bool ovs_meter_execute(struct datapath *dp, struct sk_buff *skb, long long int max_bucket_size; band = &meter->bands[i]; - max_bucket_size = (band->burst_size + band->rate) * 1000; + max_bucket_size = (band->burst_size + band->rate) * 1000LL; band->bucket += delta_ms * band->rate; if (band->bucket > max_bucket_size) diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c index 04a3128adcf0..bb95c43aae76 100644 --- a/net/openvswitch/vport-internal_dev.c +++ b/net/openvswitch/vport-internal_dev.c @@ -16,7 +16,6 @@ * 02110-1301, USA */ -#include <linux/hardirq.h> #include <linux/if_vlan.h> #include <linux/kernel.h> #include <linux/netdevice.h> @@ -126,18 +125,12 @@ internal_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats) } } -static void internal_set_rx_headroom(struct net_device *dev, int new_hr) -{ - dev->needed_headroom = new_hr < 0 ? 0 : new_hr; -} - static const struct net_device_ops internal_dev_netdev_ops = { .ndo_open = internal_dev_open, .ndo_stop = internal_dev_stop, .ndo_start_xmit = internal_dev_xmit, .ndo_set_mac_address = eth_mac_addr, .ndo_get_stats64 = internal_get_stats, - .ndo_set_rx_headroom = internal_set_rx_headroom, }; static struct rtnl_link_ops internal_dev_link_ops __read_mostly = { @@ -154,7 +147,7 @@ static void do_setup(struct net_device *netdev) netdev->priv_flags &= ~IFF_TX_SKB_SHARING; netdev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_OPENVSWITCH | - IFF_PHONY_HEADROOM | IFF_NO_QUEUE; + IFF_NO_QUEUE; netdev->needs_free_netdev = true; netdev->priv_destructor = internal_dev_destructor; netdev->ethtool_ops = &internal_dev_ethtool_ops; @@ -195,7 +188,6 @@ static struct vport *internal_dev_create(const struct vport_parms *parms) err = -ENOMEM; goto error_free_netdev; } - vport->dev->needed_headroom = vport->dp->max_headroom; dev_net_set(vport->dev, ovs_dp_get_net(vport->dp)); internal_dev = internal_dev_priv(vport->dev); diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 3b4d6a3cf190..1d1483007e46 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -247,12 +247,13 @@ static int packet_direct_xmit(struct sk_buff *skb) struct sk_buff *orig_skb = skb; struct netdev_queue *txq; int ret = NETDEV_TX_BUSY; + bool again = false; if (unlikely(!netif_running(dev) || !netif_carrier_ok(dev))) goto drop; - skb = validate_xmit_skb_list(skb, dev); + skb = validate_xmit_skb_list(skb, dev, &again); if (skb != orig_skb) goto drop; @@ -4530,7 +4531,6 @@ static int packet_seq_open(struct inode *inode, struct file *file) } static const struct file_operations packet_seq_fops = { - .owner = THIS_MODULE, .open = packet_seq_open, .read = seq_read, .llseek = seq_lseek, diff --git a/net/phonet/pn_netlink.c b/net/phonet/pn_netlink.c index da754fc926e7..871eaf2cb85e 100644 --- a/net/phonet/pn_netlink.c +++ b/net/phonet/pn_netlink.c @@ -299,16 +299,21 @@ out: int __init phonet_netlink_register(void) { - int err = __rtnl_register(PF_PHONET, RTM_NEWADDR, addr_doit, - NULL, 0); + int err = rtnl_register_module(THIS_MODULE, PF_PHONET, RTM_NEWADDR, + addr_doit, NULL, 0); if (err) return err; - /* Further __rtnl_register() cannot fail */ - __rtnl_register(PF_PHONET, RTM_DELADDR, addr_doit, NULL, 0); - __rtnl_register(PF_PHONET, RTM_GETADDR, NULL, getaddr_dumpit, 0); - __rtnl_register(PF_PHONET, RTM_NEWROUTE, route_doit, NULL, 0); - __rtnl_register(PF_PHONET, RTM_DELROUTE, route_doit, NULL, 0); - __rtnl_register(PF_PHONET, RTM_GETROUTE, NULL, route_dumpit, 0); + /* Further rtnl_register_module() cannot fail */ + rtnl_register_module(THIS_MODULE, PF_PHONET, RTM_DELADDR, + addr_doit, NULL, 0); + rtnl_register_module(THIS_MODULE, PF_PHONET, RTM_GETADDR, + NULL, getaddr_dumpit, 0); + rtnl_register_module(THIS_MODULE, PF_PHONET, RTM_NEWROUTE, + route_doit, NULL, 0); + rtnl_register_module(THIS_MODULE, PF_PHONET, RTM_DELROUTE, + route_doit, NULL, 0); + rtnl_register_module(THIS_MODULE, PF_PHONET, RTM_GETROUTE, + NULL, route_dumpit, 0); return 0; } diff --git a/net/phonet/socket.c b/net/phonet/socket.c index 44417480dab7..08f6751d2030 100644 --- a/net/phonet/socket.c +++ b/net/phonet/socket.c @@ -635,7 +635,6 @@ static int pn_sock_open(struct inode *inode, struct file *file) } const struct file_operations pn_sock_seq_fops = { - .owner = THIS_MODULE, .open = pn_sock_open, .read = seq_read, .llseek = seq_lseek, @@ -818,7 +817,6 @@ static int pn_res_open(struct inode *inode, struct file *file) } const struct file_operations pn_res_seq_fops = { - .owner = THIS_MODULE, .open = pn_res_open, .read = seq_read, .llseek = seq_lseek, diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c index 77ab05e23001..5fb3929e3d7d 100644 --- a/net/qrtr/qrtr.c +++ b/net/qrtr/qrtr.c @@ -1116,9 +1116,13 @@ static int __init qrtr_proto_init(void) return rc; } - rtnl_register(PF_QIPCRTR, RTM_NEWADDR, qrtr_addr_doit, NULL, 0); + rc = rtnl_register_module(THIS_MODULE, PF_QIPCRTR, RTM_NEWADDR, qrtr_addr_doit, NULL, 0); + if (rc) { + sock_unregister(qrtr_family.family); + proto_unregister(&qrtr_proto); + } - return 0; + return rc; } postcore_initcall(qrtr_proto_init); diff --git a/net/rds/bind.c b/net/rds/bind.c index 75d43dc8e96b..5aa3a64aa4f0 100644 --- a/net/rds/bind.c +++ b/net/rds/bind.c @@ -114,6 +114,7 @@ static int rds_add_bound(struct rds_sock *rs, __be32 addr, __be16 *port) rs, &addr, (int)ntohs(*port)); break; } else { + rs->rs_bound_addr = 0; rds_sock_put(rs); ret = -ENOMEM; break; diff --git a/net/rds/cong.c b/net/rds/cong.c index 8398fee7c866..8d19fd25dce3 100644 --- a/net/rds/cong.c +++ b/net/rds/cong.c @@ -219,7 +219,11 @@ void rds_cong_queue_updates(struct rds_cong_map *map) spin_lock_irqsave(&rds_cong_lock, flags); list_for_each_entry(conn, &map->m_conn_list, c_map_item) { - if (!test_and_set_bit(0, &conn->c_map_queued)) { + struct rds_conn_path *cp = &conn->c_path[0]; + + rcu_read_lock(); + if (!test_and_set_bit(0, &conn->c_map_queued) && + !test_bit(RDS_DESTROY_PENDING, &cp->cp_flags)) { rds_stats_inc(s_cong_update_queued); /* We cannot inline the call to rds_send_xmit() here * for two reasons (both pertaining to a TCP transport): @@ -235,9 +239,9 @@ void rds_cong_queue_updates(struct rds_cong_map *map) * therefore trigger warnings. * Defer the xmit to rds_send_worker() instead. */ - queue_delayed_work(rds_wq, - &conn->c_path[0].cp_send_w, 0); + queue_delayed_work(rds_wq, &cp->cp_send_w, 0); } + rcu_read_unlock(); } spin_unlock_irqrestore(&rds_cong_lock, flags); diff --git a/net/rds/connection.c b/net/rds/connection.c index 7ee2d5d68b78..b10c0ef36d8d 100644 --- a/net/rds/connection.c +++ b/net/rds/connection.c @@ -230,8 +230,8 @@ static struct rds_connection *__rds_conn_create(struct net *net, rdsdebug("allocated conn %p for %pI4 -> %pI4 over %s %s\n", conn, &laddr, &faddr, - trans->t_name ? trans->t_name : "[unknown]", - is_outgoing ? "(outgoing)" : ""); + strnlen(trans->t_name, sizeof(trans->t_name)) ? trans->t_name : + "[unknown]", is_outgoing ? "(outgoing)" : ""); /* * Since we ran without holding the conn lock, someone could @@ -382,10 +382,13 @@ static void rds_conn_path_destroy(struct rds_conn_path *cp) { struct rds_message *rm, *rtmp; + set_bit(RDS_DESTROY_PENDING, &cp->cp_flags); + if (!cp->cp_transport_data) return; /* make sure lingering queued work won't try to ref the conn */ + synchronize_rcu(); cancel_delayed_work_sync(&cp->cp_send_w); cancel_delayed_work_sync(&cp->cp_recv_w); @@ -403,6 +406,11 @@ static void rds_conn_path_destroy(struct rds_conn_path *cp) if (cp->cp_xmit_rm) rds_message_put(cp->cp_xmit_rm); + WARN_ON(delayed_work_pending(&cp->cp_send_w)); + WARN_ON(delayed_work_pending(&cp->cp_recv_w)); + WARN_ON(delayed_work_pending(&cp->cp_conn_w)); + WARN_ON(work_pending(&cp->cp_down_w)); + cp->cp_conn->c_trans->conn_free(cp->cp_transport_data); } @@ -424,7 +432,6 @@ void rds_conn_destroy(struct rds_connection *conn) "%pI4\n", conn, &conn->c_laddr, &conn->c_faddr); - conn->c_destroy_in_prog = 1; /* Ensure conn will not be scheduled for reconnect */ spin_lock_irq(&rds_conn_lock); hlist_del_init_rcu(&conn->c_hash_node); @@ -445,7 +452,6 @@ void rds_conn_destroy(struct rds_connection *conn) */ rds_cong_remove_conn(conn); - put_net(conn->c_net); kfree(conn->c_path); kmem_cache_free(rds_conn_slab, conn); @@ -684,10 +690,13 @@ void rds_conn_path_drop(struct rds_conn_path *cp, bool destroy) { atomic_set(&cp->cp_state, RDS_CONN_ERROR); - if (!destroy && cp->cp_conn->c_destroy_in_prog) + rcu_read_lock(); + if (!destroy && test_bit(RDS_DESTROY_PENDING, &cp->cp_flags)) { + rcu_read_unlock(); return; - + } queue_work(rds_wq, &cp->cp_down_w); + rcu_read_unlock(); } EXPORT_SYMBOL_GPL(rds_conn_path_drop); @@ -704,9 +713,15 @@ EXPORT_SYMBOL_GPL(rds_conn_drop); */ void rds_conn_path_connect_if_down(struct rds_conn_path *cp) { + rcu_read_lock(); + if (test_bit(RDS_DESTROY_PENDING, &cp->cp_flags)) { + rcu_read_unlock(); + return; + } if (rds_conn_path_state(cp) == RDS_CONN_DOWN && !test_and_set_bit(RDS_RECONNECT_PENDING, &cp->cp_flags)) queue_delayed_work(rds_wq, &cp->cp_conn_w, 0); + rcu_read_unlock(); } EXPORT_SYMBOL_GPL(rds_conn_path_connect_if_down); diff --git a/net/rds/rds.h b/net/rds/rds.h index c349c71babff..374ae83b60d4 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -88,6 +88,7 @@ enum { #define RDS_RECONNECT_PENDING 1 #define RDS_IN_XMIT 2 #define RDS_RECV_REFILL 3 +#define RDS_DESTROY_PENDING 4 /* Max number of multipaths per RDS connection. Must be a power of 2 */ #define RDS_MPATH_WORKERS 8 @@ -139,8 +140,7 @@ struct rds_connection { __be32 c_faddr; unsigned int c_loopback:1, c_ping_triggered:1, - c_destroy_in_prog:1, - c_pad_to_32:29; + c_pad_to_32:30; int c_npaths; struct rds_connection *c_passive; struct rds_transport *c_trans; @@ -150,7 +150,7 @@ struct rds_connection { /* Protocol version */ unsigned int c_version; - struct net *c_net; + possible_net_t c_net; struct list_head c_map_item; unsigned long c_map_queued; @@ -165,13 +165,13 @@ struct rds_connection { static inline struct net *rds_conn_net(struct rds_connection *conn) { - return conn->c_net; + return read_pnet(&conn->c_net); } static inline void rds_conn_net_set(struct rds_connection *conn, struct net *net) { - conn->c_net = get_net(net); + write_pnet(&conn->c_net, net); } #define RDS_FLAG_CONG_BITMAP 0x01 diff --git a/net/rds/send.c b/net/rds/send.c index f72466c63f0c..d3e32d1f3c7d 100644 --- a/net/rds/send.c +++ b/net/rds/send.c @@ -162,6 +162,12 @@ restart: goto out; } + if (test_bit(RDS_DESTROY_PENDING, &cp->cp_flags)) { + release_in_xmit(cp); + ret = -ENETUNREACH; /* dont requeue send work */ + goto out; + } + /* * we record the send generation after doing the xmit acquire. * if someone else manages to jump in and do some work, we'll use @@ -437,7 +443,12 @@ over_batch: !list_empty(&cp->cp_send_queue)) && !raced) { if (batch_count < send_batch_count) goto restart; - queue_delayed_work(rds_wq, &cp->cp_send_w, 1); + rcu_read_lock(); + if (test_bit(RDS_DESTROY_PENDING, &cp->cp_flags)) + ret = -ENETUNREACH; + else + queue_delayed_work(rds_wq, &cp->cp_send_w, 1); + rcu_read_unlock(); } else if (raced) { rds_stats_inc(s_send_lock_queue_raced); } @@ -1151,6 +1162,11 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len) else cpath = &conn->c_path[0]; + if (test_bit(RDS_DESTROY_PENDING, &cpath->cp_flags)) { + ret = -EAGAIN; + goto out; + } + rds_conn_path_connect_if_down(cpath); ret = rds_cong_wait(conn->c_fcong, dport, nonblock, rs); @@ -1190,9 +1206,17 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len) rds_stats_inc(s_send_queued); ret = rds_send_xmit(cpath); - if (ret == -ENOMEM || ret == -EAGAIN) - queue_delayed_work(rds_wq, &cpath->cp_send_w, 1); - + if (ret == -ENOMEM || ret == -EAGAIN) { + ret = 0; + rcu_read_lock(); + if (test_bit(RDS_DESTROY_PENDING, &cpath->cp_flags)) + ret = -ENETUNREACH; + else + queue_delayed_work(rds_wq, &cpath->cp_send_w, 1); + rcu_read_unlock(); + } + if (ret) + goto out; rds_message_put(rm); return payload_len; @@ -1270,7 +1294,10 @@ rds_send_probe(struct rds_conn_path *cp, __be16 sport, rds_stats_inc(s_send_pong); /* schedule the send work on rds_wq */ - queue_delayed_work(rds_wq, &cp->cp_send_w, 1); + rcu_read_lock(); + if (!test_bit(RDS_DESTROY_PENDING, &cp->cp_flags)) + queue_delayed_work(rds_wq, &cp->cp_send_w, 1); + rcu_read_unlock(); rds_message_put(rm); return 0; diff --git a/net/rds/tcp.c b/net/rds/tcp.c index ab7356e0ba83..9920d2f84eff 100644 --- a/net/rds/tcp.c +++ b/net/rds/tcp.c @@ -271,16 +271,33 @@ static int rds_tcp_laddr_check(struct net *net, __be32 addr) return -EADDRNOTAVAIL; } +static void rds_tcp_conn_free(void *arg) +{ + struct rds_tcp_connection *tc = arg; + unsigned long flags; + + rdsdebug("freeing tc %p\n", tc); + + spin_lock_irqsave(&rds_tcp_conn_lock, flags); + if (!tc->t_tcp_node_detached) + list_del(&tc->t_tcp_node); + spin_unlock_irqrestore(&rds_tcp_conn_lock, flags); + + kmem_cache_free(rds_tcp_conn_slab, tc); +} + static int rds_tcp_conn_alloc(struct rds_connection *conn, gfp_t gfp) { struct rds_tcp_connection *tc; - int i; + int i, j; + int ret = 0; for (i = 0; i < RDS_MPATH_WORKERS; i++) { tc = kmem_cache_alloc(rds_tcp_conn_slab, gfp); - if (!tc) - return -ENOMEM; - + if (!tc) { + ret = -ENOMEM; + break; + } mutex_init(&tc->t_conn_path_lock); tc->t_sock = NULL; tc->t_tinc = NULL; @@ -291,26 +308,17 @@ static int rds_tcp_conn_alloc(struct rds_connection *conn, gfp_t gfp) tc->t_cpath = &conn->c_path[i]; spin_lock_irq(&rds_tcp_conn_lock); + tc->t_tcp_node_detached = false; list_add_tail(&tc->t_tcp_node, &rds_tcp_conn_list); spin_unlock_irq(&rds_tcp_conn_lock); rdsdebug("rds_conn_path [%d] tc %p\n", i, conn->c_path[i].cp_transport_data); } - - return 0; -} - -static void rds_tcp_conn_free(void *arg) -{ - struct rds_tcp_connection *tc = arg; - unsigned long flags; - rdsdebug("freeing tc %p\n", tc); - - spin_lock_irqsave(&rds_tcp_conn_lock, flags); - list_del(&tc->t_tcp_node); - spin_unlock_irqrestore(&rds_tcp_conn_lock, flags); - - kmem_cache_free(rds_tcp_conn_slab, tc); + if (ret) { + for (j = 0; j < i; j++) + rds_tcp_conn_free(conn->c_path[j].cp_transport_data); + } + return ret; } static bool list_has_conn(struct list_head *list, struct rds_connection *conn) @@ -496,27 +504,6 @@ static struct pernet_operations rds_tcp_net_ops = { .size = sizeof(struct rds_tcp_net), }; -/* explicitly send a RST on each socket, thereby releasing any socket refcnts - * that may otherwise hold up netns deletion. - */ -static void rds_tcp_conn_paths_destroy(struct rds_connection *conn) -{ - struct rds_conn_path *cp; - struct rds_tcp_connection *tc; - int i; - struct sock *sk; - - for (i = 0; i < RDS_MPATH_WORKERS; i++) { - cp = &conn->c_path[i]; - tc = cp->cp_transport_data; - if (!tc->t_sock) - continue; - sk = tc->t_sock->sk; - sk->sk_prot->disconnect(sk, 0); - tcp_done(sk); - } -} - static void rds_tcp_kill_sock(struct net *net) { struct rds_tcp_connection *tc, *_tc; @@ -528,18 +515,20 @@ static void rds_tcp_kill_sock(struct net *net) rds_tcp_listen_stop(lsock, &rtn->rds_tcp_accept_w); spin_lock_irq(&rds_tcp_conn_lock); list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) { - struct net *c_net = tc->t_cpath->cp_conn->c_net; + struct net *c_net = read_pnet(&tc->t_cpath->cp_conn->c_net); if (net != c_net || !tc->t_sock) continue; - if (!list_has_conn(&tmp_list, tc->t_cpath->cp_conn)) + if (!list_has_conn(&tmp_list, tc->t_cpath->cp_conn)) { list_move_tail(&tc->t_tcp_node, &tmp_list); + } else { + list_del(&tc->t_tcp_node); + tc->t_tcp_node_detached = true; + } } spin_unlock_irq(&rds_tcp_conn_lock); - list_for_each_entry_safe(tc, _tc, &tmp_list, t_tcp_node) { - rds_tcp_conn_paths_destroy(tc->t_cpath->cp_conn); + list_for_each_entry_safe(tc, _tc, &tmp_list, t_tcp_node) rds_conn_destroy(tc->t_cpath->cp_conn); - } } void *rds_tcp_listen_sock_def_readable(struct net *net) @@ -587,7 +576,7 @@ static void rds_tcp_sysctl_reset(struct net *net) spin_lock_irq(&rds_tcp_conn_lock); list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) { - struct net *c_net = tc->t_cpath->cp_conn->c_net; + struct net *c_net = read_pnet(&tc->t_cpath->cp_conn->c_net); if (net != c_net || !tc->t_sock) continue; diff --git a/net/rds/tcp.h b/net/rds/tcp.h index 864ca7d8f019..c6fa080e9b6d 100644 --- a/net/rds/tcp.h +++ b/net/rds/tcp.h @@ -12,6 +12,7 @@ struct rds_tcp_incoming { struct rds_tcp_connection { struct list_head t_tcp_node; + bool t_tcp_node_detached; struct rds_conn_path *t_cpath; /* t_conn_path_lock synchronizes the connection establishment between * rds_tcp_accept_one and rds_tcp_conn_path_connect diff --git a/net/rds/tcp_connect.c b/net/rds/tcp_connect.c index 46f74dad0e16..534c67aeb20f 100644 --- a/net/rds/tcp_connect.c +++ b/net/rds/tcp_connect.c @@ -170,7 +170,7 @@ void rds_tcp_conn_path_shutdown(struct rds_conn_path *cp) cp->cp_conn, tc, sock); if (sock) { - if (cp->cp_conn->c_destroy_in_prog) + if (test_bit(RDS_DESTROY_PENDING, &cp->cp_flags)) rds_tcp_set_linger(sock); sock->ops->shutdown(sock, RCV_SHUTDOWN | SEND_SHUTDOWN); lock_sock(sock->sk); diff --git a/net/rds/tcp_recv.c b/net/rds/tcp_recv.c index e006ef8e6d40..dd707b9e73e5 100644 --- a/net/rds/tcp_recv.c +++ b/net/rds/tcp_recv.c @@ -321,8 +321,12 @@ void rds_tcp_data_ready(struct sock *sk) ready = tc->t_orig_data_ready; rds_tcp_stats_inc(s_tcp_data_ready_calls); - if (rds_tcp_read_sock(cp, GFP_ATOMIC) == -ENOMEM) - queue_delayed_work(rds_wq, &cp->cp_recv_w, 0); + if (rds_tcp_read_sock(cp, GFP_ATOMIC) == -ENOMEM) { + rcu_read_lock(); + if (!test_bit(RDS_DESTROY_PENDING, &cp->cp_flags)) + queue_delayed_work(rds_wq, &cp->cp_recv_w, 0); + rcu_read_unlock(); + } out: read_unlock_bh(&sk->sk_callback_lock); ready(sk); diff --git a/net/rds/tcp_send.c b/net/rds/tcp_send.c index 9b76e0fa1722..16f65744d984 100644 --- a/net/rds/tcp_send.c +++ b/net/rds/tcp_send.c @@ -202,8 +202,11 @@ void rds_tcp_write_space(struct sock *sk) tc->t_last_seen_una = rds_tcp_snd_una(tc); rds_send_path_drop_acked(cp, rds_tcp_snd_una(tc), rds_tcp_is_acked); - if ((refcount_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) + rcu_read_lock(); + if ((refcount_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf && + !test_bit(RDS_DESTROY_PENDING, &cp->cp_flags)) queue_delayed_work(rds_wq, &cp->cp_send_w, 0); + rcu_read_unlock(); out: read_unlock_bh(&sk->sk_callback_lock); diff --git a/net/rds/threads.c b/net/rds/threads.c index f121daa402c8..eb76db1360b0 100644 --- a/net/rds/threads.c +++ b/net/rds/threads.c @@ -87,8 +87,12 @@ void rds_connect_path_complete(struct rds_conn_path *cp, int curr) cp->cp_reconnect_jiffies = 0; set_bit(0, &cp->cp_conn->c_map_queued); - queue_delayed_work(rds_wq, &cp->cp_send_w, 0); - queue_delayed_work(rds_wq, &cp->cp_recv_w, 0); + rcu_read_lock(); + if (!test_bit(RDS_DESTROY_PENDING, &cp->cp_flags)) { + queue_delayed_work(rds_wq, &cp->cp_send_w, 0); + queue_delayed_work(rds_wq, &cp->cp_recv_w, 0); + } + rcu_read_unlock(); } EXPORT_SYMBOL_GPL(rds_connect_path_complete); @@ -133,7 +137,10 @@ void rds_queue_reconnect(struct rds_conn_path *cp) set_bit(RDS_RECONNECT_PENDING, &cp->cp_flags); if (cp->cp_reconnect_jiffies == 0) { cp->cp_reconnect_jiffies = rds_sysctl_reconnect_min_jiffies; - queue_delayed_work(rds_wq, &cp->cp_conn_w, 0); + rcu_read_lock(); + if (!test_bit(RDS_DESTROY_PENDING, &cp->cp_flags)) + queue_delayed_work(rds_wq, &cp->cp_conn_w, 0); + rcu_read_unlock(); return; } @@ -141,8 +148,11 @@ void rds_queue_reconnect(struct rds_conn_path *cp) rdsdebug("%lu delay %lu ceil conn %p for %pI4 -> %pI4\n", rand % cp->cp_reconnect_jiffies, cp->cp_reconnect_jiffies, conn, &conn->c_laddr, &conn->c_faddr); - queue_delayed_work(rds_wq, &cp->cp_conn_w, - rand % cp->cp_reconnect_jiffies); + rcu_read_lock(); + if (!test_bit(RDS_DESTROY_PENDING, &cp->cp_flags)) + queue_delayed_work(rds_wq, &cp->cp_conn_w, + rand % cp->cp_reconnect_jiffies); + rcu_read_unlock(); cp->cp_reconnect_jiffies = min(cp->cp_reconnect_jiffies * 2, rds_sysctl_reconnect_max_jiffies); diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index 6a5c4992cf61..083bd251406f 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -1461,7 +1461,6 @@ static int rose_info_open(struct inode *inode, struct file *file) } static const struct file_operations rose_info_fops = { - .owner = THIS_MODULE, .open = rose_info_open, .read = seq_read, .llseek = seq_lseek, diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c index 8ca3124df83f..178619ddab68 100644 --- a/net/rose/rose_route.c +++ b/net/rose/rose_route.c @@ -1156,7 +1156,6 @@ static int rose_nodes_open(struct inode *inode, struct file *file) } const struct file_operations rose_nodes_fops = { - .owner = THIS_MODULE, .open = rose_nodes_open, .read = seq_read, .llseek = seq_lseek, @@ -1240,7 +1239,6 @@ static int rose_neigh_open(struct inode *inode, struct file *file) } const struct file_operations rose_neigh_fops = { - .owner = THIS_MODULE, .open = rose_neigh_open, .read = seq_read, .llseek = seq_lseek, @@ -1326,7 +1324,6 @@ static int rose_route_open(struct inode *inode, struct file *file) } const struct file_operations rose_routes_fops = { - .owner = THIS_MODULE, .open = rose_route_open, .read = seq_read, .llseek = seq_lseek, diff --git a/net/rxrpc/proc.c b/net/rxrpc/proc.c index 7421656963a9..f79f260c6ddc 100644 --- a/net/rxrpc/proc.c +++ b/net/rxrpc/proc.c @@ -125,7 +125,6 @@ static int rxrpc_call_seq_open(struct inode *inode, struct file *file) } const struct file_operations rxrpc_call_seq_fops = { - .owner = THIS_MODULE, .open = rxrpc_call_seq_open, .read = seq_read, .llseek = seq_lseek, @@ -217,7 +216,6 @@ static int rxrpc_connection_seq_open(struct inode *inode, struct file *file) } const struct file_operations rxrpc_connection_seq_fops = { - .owner = THIS_MODULE, .open = rxrpc_connection_seq_open, .read = seq_read, .llseek = seq_lseek, diff --git a/net/sched/Kconfig b/net/sched/Kconfig index c03d86a7775e..f24a6ae6819a 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -857,17 +857,14 @@ config NET_ACT_TUNNEL_KEY config NET_IFE_SKBMARK tristate "Support to encoding decoding skb mark on IFE action" depends on NET_ACT_IFE - ---help--- config NET_IFE_SKBPRIO tristate "Support to encoding decoding skb prio on IFE action" depends on NET_ACT_IFE - ---help--- config NET_IFE_SKBTCINDEX tristate "Support to encoding decoding skb tcindex on IFE action" depends on NET_ACT_IFE - ---help--- config NET_CLS_IND bool "Incoming device classification" diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 4d33a50a8a6d..52622a3d2517 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -99,7 +99,7 @@ int __tcf_idr_release(struct tc_action *p, bool bind, bool strict) p->tcfa_refcnt--; if (p->tcfa_bindcnt <= 0 && p->tcfa_refcnt <= 0) { if (p->ops->cleanup) - p->ops->cleanup(p, bind); + p->ops->cleanup(p); tcf_idr_remove(p->idrinfo, p); ret = ACT_P_DELETED; } diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c index 5ef8ce8c83d4..b3f2c15affa7 100644 --- a/net/sched/act_bpf.c +++ b/net/sched/act_bpf.c @@ -357,7 +357,7 @@ out: return ret; } -static void tcf_bpf_cleanup(struct tc_action *act, int bind) +static void tcf_bpf_cleanup(struct tc_action *act) { struct tcf_bpf_cfg tmp; @@ -401,16 +401,14 @@ static __net_init int bpf_init_net(struct net *net) return tc_action_net_init(tn, &act_bpf_ops); } -static void __net_exit bpf_exit_net(struct net *net) +static void __net_exit bpf_exit_net(struct list_head *net_list) { - struct tc_action_net *tn = net_generic(net, bpf_net_id); - - tc_action_net_exit(tn); + tc_action_net_exit(net_list, bpf_net_id); } static struct pernet_operations bpf_net_ops = { .init = bpf_init_net, - .exit = bpf_exit_net, + .exit_batch = bpf_exit_net, .id = &bpf_net_id, .size = sizeof(struct tc_action_net), }; diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c index 10b7a8855a6c..2b15ba84e0c8 100644 --- a/net/sched/act_connmark.c +++ b/net/sched/act_connmark.c @@ -209,16 +209,14 @@ static __net_init int connmark_init_net(struct net *net) return tc_action_net_init(tn, &act_connmark_ops); } -static void __net_exit connmark_exit_net(struct net *net) +static void __net_exit connmark_exit_net(struct list_head *net_list) { - struct tc_action_net *tn = net_generic(net, connmark_net_id); - - tc_action_net_exit(tn); + tc_action_net_exit(net_list, connmark_net_id); } static struct pernet_operations connmark_net_ops = { .init = connmark_init_net, - .exit = connmark_exit_net, + .exit_batch = connmark_exit_net, .id = &connmark_net_id, .size = sizeof(struct tc_action_net), }; diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c index d836f998117b..b7ba9b06b147 100644 --- a/net/sched/act_csum.c +++ b/net/sched/act_csum.c @@ -49,6 +49,7 @@ static int tcf_csum_init(struct net *net, struct nlattr *nla, int bind) { struct tc_action_net *tn = net_generic(net, csum_net_id); + struct tcf_csum_params *params_old, *params_new; struct nlattr *tb[TCA_CSUM_MAX + 1]; struct tc_csum *parm; struct tcf_csum *p; @@ -67,7 +68,7 @@ static int tcf_csum_init(struct net *net, struct nlattr *nla, if (!tcf_idr_check(tn, parm->index, a, bind)) { ret = tcf_idr_create(tn, parm->index, est, a, - &act_csum_ops, bind, false); + &act_csum_ops, bind, true); if (ret) return ret; ret = ACT_P_CREATED; @@ -80,10 +81,21 @@ static int tcf_csum_init(struct net *net, struct nlattr *nla, } p = to_tcf_csum(*a); - spin_lock_bh(&p->tcf_lock); - p->tcf_action = parm->action; - p->update_flags = parm->update_flags; - spin_unlock_bh(&p->tcf_lock); + ASSERT_RTNL(); + + params_new = kzalloc(sizeof(*params_new), GFP_KERNEL); + if (unlikely(!params_new)) { + if (ret == ACT_P_CREATED) + tcf_idr_release(*a, bind); + return -ENOMEM; + } + params_old = rtnl_dereference(p->params); + + params_new->action = parm->action; + params_new->update_flags = parm->update_flags; + rcu_assign_pointer(p->params, params_new); + if (params_old) + kfree_rcu(params_old, rcu); if (ret == ACT_P_CREATED) tcf_idr_insert(tn, *a); @@ -539,19 +551,21 @@ static int tcf_csum(struct sk_buff *skb, const struct tc_action *a, struct tcf_result *res) { struct tcf_csum *p = to_tcf_csum(a); - int action; + struct tcf_csum_params *params; u32 update_flags; + int action; + + rcu_read_lock(); + params = rcu_dereference(p->params); - spin_lock(&p->tcf_lock); tcf_lastuse_update(&p->tcf_tm); - bstats_update(&p->tcf_bstats, skb); - action = p->tcf_action; - update_flags = p->update_flags; - spin_unlock(&p->tcf_lock); + bstats_cpu_update(this_cpu_ptr(p->common.cpu_bstats), skb); + action = params->action; if (unlikely(action == TC_ACT_SHOT)) - goto drop; + goto drop_stats; + update_flags = params->update_flags; switch (tc_skb_protocol(skb)) { case cpu_to_be16(ETH_P_IP): if (!tcf_csum_ipv4(skb, update_flags)) @@ -563,13 +577,16 @@ static int tcf_csum(struct sk_buff *skb, const struct tc_action *a, break; } +unlock: + rcu_read_unlock(); return action; drop: - spin_lock(&p->tcf_lock); - p->tcf_qstats.drops++; - spin_unlock(&p->tcf_lock); - return TC_ACT_SHOT; + action = TC_ACT_SHOT; + +drop_stats: + qstats_drop_inc(this_cpu_ptr(p->common.cpu_qstats)); + goto unlock; } static int tcf_csum_dump(struct sk_buff *skb, struct tc_action *a, int bind, @@ -577,15 +594,18 @@ static int tcf_csum_dump(struct sk_buff *skb, struct tc_action *a, int bind, { unsigned char *b = skb_tail_pointer(skb); struct tcf_csum *p = to_tcf_csum(a); + struct tcf_csum_params *params; struct tc_csum opt = { - .update_flags = p->update_flags, .index = p->tcf_index, - .action = p->tcf_action, .refcnt = p->tcf_refcnt - ref, .bindcnt = p->tcf_bindcnt - bind, }; struct tcf_t t; + params = rtnl_dereference(p->params); + opt.action = params->action; + opt.update_flags = params->update_flags; + if (nla_put(skb, TCA_CSUM_PARMS, sizeof(opt), &opt)) goto nla_put_failure; @@ -600,6 +620,15 @@ nla_put_failure: return -1; } +static void tcf_csum_cleanup(struct tc_action *a) +{ + struct tcf_csum *p = to_tcf_csum(a); + struct tcf_csum_params *params; + + params = rcu_dereference_protected(p->params, 1); + kfree_rcu(params, rcu); +} + static int tcf_csum_walker(struct net *net, struct sk_buff *skb, struct netlink_callback *cb, int type, const struct tc_action_ops *ops) @@ -623,6 +652,7 @@ static struct tc_action_ops act_csum_ops = { .act = tcf_csum, .dump = tcf_csum_dump, .init = tcf_csum_init, + .cleanup = tcf_csum_cleanup, .walk = tcf_csum_walker, .lookup = tcf_csum_search, .size = sizeof(struct tcf_csum), @@ -635,16 +665,14 @@ static __net_init int csum_init_net(struct net *net) return tc_action_net_init(tn, &act_csum_ops); } -static void __net_exit csum_exit_net(struct net *net) +static void __net_exit csum_exit_net(struct list_head *net_list) { - struct tc_action_net *tn = net_generic(net, csum_net_id); - - tc_action_net_exit(tn); + tc_action_net_exit(net_list, csum_net_id); } static struct pernet_operations csum_net_ops = { .init = csum_init_net, - .exit = csum_exit_net, + .exit_batch = csum_exit_net, .id = &csum_net_id, .size = sizeof(struct tc_action_net), }; diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index a0ac42b3ed06..b56986d41c87 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c @@ -235,16 +235,14 @@ static __net_init int gact_init_net(struct net *net) return tc_action_net_init(tn, &act_gact_ops); } -static void __net_exit gact_exit_net(struct net *net) +static void __net_exit gact_exit_net(struct list_head *net_list) { - struct tc_action_net *tn = net_generic(net, gact_net_id); - - tc_action_net_exit(tn); + tc_action_net_exit(net_list, gact_net_id); } static struct pernet_operations gact_net_ops = { .init = gact_init_net, - .exit = gact_exit_net, + .exit_batch = gact_exit_net, .id = &gact_net_id, .size = sizeof(struct tc_action_net), }; diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index 3007cb1310ea..5954e992685a 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -387,7 +387,7 @@ out_nlmsg_trim: } /* under ife->tcf_lock */ -static void _tcf_ife_cleanup(struct tc_action *a, int bind) +static void _tcf_ife_cleanup(struct tc_action *a) { struct tcf_ife_info *ife = to_ife(a); struct tcf_meta_info *e, *n; @@ -405,13 +405,13 @@ static void _tcf_ife_cleanup(struct tc_action *a, int bind) } } -static void tcf_ife_cleanup(struct tc_action *a, int bind) +static void tcf_ife_cleanup(struct tc_action *a) { struct tcf_ife_info *ife = to_ife(a); struct tcf_ife_params *p; spin_lock_bh(&ife->tcf_lock); - _tcf_ife_cleanup(a, bind); + _tcf_ife_cleanup(a); spin_unlock_bh(&ife->tcf_lock); p = rcu_dereference_protected(ife->params, 1); @@ -546,7 +546,7 @@ metadata_parse_err: if (exists) tcf_idr_release(*a, bind); if (ret == ACT_P_CREATED) - _tcf_ife_cleanup(*a, bind); + _tcf_ife_cleanup(*a); if (exists) spin_unlock_bh(&ife->tcf_lock); @@ -567,7 +567,7 @@ metadata_parse_err: err = use_all_metadata(ife); if (err) { if (ret == ACT_P_CREATED) - _tcf_ife_cleanup(*a, bind); + _tcf_ife_cleanup(*a); if (exists) spin_unlock_bh(&ife->tcf_lock); @@ -858,16 +858,14 @@ static __net_init int ife_init_net(struct net *net) return tc_action_net_init(tn, &act_ife_ops); } -static void __net_exit ife_exit_net(struct net *net) +static void __net_exit ife_exit_net(struct list_head *net_list) { - struct tc_action_net *tn = net_generic(net, ife_net_id); - - tc_action_net_exit(tn); + tc_action_net_exit(net_list, ife_net_id); } static struct pernet_operations ife_net_ops = { .init = ife_init_net, - .exit = ife_exit_net, + .exit_batch = ife_exit_net, .id = &ife_net_id, .size = sizeof(struct tc_action_net), }; diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index d9e399a7e3d5..06e380ae0928 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -77,7 +77,7 @@ static void ipt_destroy_target(struct xt_entry_target *t) module_put(par.target->me); } -static void tcf_ipt_release(struct tc_action *a, int bind) +static void tcf_ipt_release(struct tc_action *a) { struct tcf_ipt *ipt = to_ipt(a); ipt_destroy_target(ipt->tcfi_t); @@ -337,16 +337,14 @@ static __net_init int ipt_init_net(struct net *net) return tc_action_net_init(tn, &act_ipt_ops); } -static void __net_exit ipt_exit_net(struct net *net) +static void __net_exit ipt_exit_net(struct list_head *net_list) { - struct tc_action_net *tn = net_generic(net, ipt_net_id); - - tc_action_net_exit(tn); + tc_action_net_exit(net_list, ipt_net_id); } static struct pernet_operations ipt_net_ops = { .init = ipt_init_net, - .exit = ipt_exit_net, + .exit_batch = ipt_exit_net, .id = &ipt_net_id, .size = sizeof(struct tc_action_net), }; @@ -387,16 +385,14 @@ static __net_init int xt_init_net(struct net *net) return tc_action_net_init(tn, &act_xt_ops); } -static void __net_exit xt_exit_net(struct net *net) +static void __net_exit xt_exit_net(struct list_head *net_list) { - struct tc_action_net *tn = net_generic(net, xt_net_id); - - tc_action_net_exit(tn); + tc_action_net_exit(net_list, xt_net_id); } static struct pernet_operations xt_net_ops = { .init = xt_init_net, - .exit = xt_exit_net, + .exit_batch = xt_exit_net, .id = &xt_net_id, .size = sizeof(struct tc_action_net), }; diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 08b61849c2a2..e6ff88f72900 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -29,7 +29,6 @@ #include <net/tc_act/tc_mirred.h> static LIST_HEAD(mirred_list); -static DEFINE_SPINLOCK(mirred_list_lock); static bool tcf_mirred_is_act_redirect(int action) { @@ -50,18 +49,15 @@ static bool tcf_mirred_act_wants_ingress(int action) } } -static void tcf_mirred_release(struct tc_action *a, int bind) +static void tcf_mirred_release(struct tc_action *a) { struct tcf_mirred *m = to_mirred(a); struct net_device *dev; - /* We could be called either in a RCU callback or with RTNL lock held. */ - spin_lock_bh(&mirred_list_lock); list_del(&m->tcfm_list); - dev = rcu_dereference_protected(m->tcfm_dev, 1); + dev = rtnl_dereference(m->tcfm_dev); if (dev) dev_put(dev); - spin_unlock_bh(&mirred_list_lock); } static const struct nla_policy mirred_policy[TCA_MIRRED_MAX + 1] = { @@ -139,8 +135,6 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, m->tcf_action = parm->action; m->tcfm_eaction = parm->eaction; if (dev != NULL) { - m->tcfm_ifindex = parm->ifindex; - m->net = net; if (ret != ACT_P_CREATED) dev_put(rcu_dereference_protected(m->tcfm_dev, 1)); dev_hold(dev); @@ -149,9 +143,7 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, } if (ret == ACT_P_CREATED) { - spin_lock_bh(&mirred_list_lock); list_add(&m->tcfm_list, &mirred_list); - spin_unlock_bh(&mirred_list_lock); tcf_idr_insert(tn, *a); } @@ -247,13 +239,14 @@ static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind, { unsigned char *b = skb_tail_pointer(skb); struct tcf_mirred *m = to_mirred(a); + struct net_device *dev = rtnl_dereference(m->tcfm_dev); struct tc_mirred opt = { .index = m->tcf_index, .action = m->tcf_action, .refcnt = m->tcf_refcnt - ref, .bindcnt = m->tcf_bindcnt - bind, .eaction = m->tcfm_eaction, - .ifindex = m->tcfm_ifindex, + .ifindex = dev ? dev->ifindex : 0, }; struct tcf_t t; @@ -294,7 +287,6 @@ static int mirred_device_event(struct notifier_block *unused, ASSERT_RTNL(); if (event == NETDEV_UNREGISTER) { - spin_lock_bh(&mirred_list_lock); list_for_each_entry(m, &mirred_list, tcfm_list) { if (rcu_access_pointer(m->tcfm_dev) == dev) { dev_put(dev); @@ -304,7 +296,6 @@ static int mirred_device_event(struct notifier_block *unused, RCU_INIT_POINTER(m->tcfm_dev, NULL); } } - spin_unlock_bh(&mirred_list_lock); } return NOTIFY_DONE; @@ -318,7 +309,7 @@ static struct net_device *tcf_mirred_get_dev(const struct tc_action *a) { struct tcf_mirred *m = to_mirred(a); - return __dev_get_by_index(m->net, m->tcfm_ifindex); + return rtnl_dereference(m->tcfm_dev); } static struct tc_action_ops act_mirred_ops = { @@ -343,16 +334,14 @@ static __net_init int mirred_init_net(struct net *net) return tc_action_net_init(tn, &act_mirred_ops); } -static void __net_exit mirred_exit_net(struct net *net) +static void __net_exit mirred_exit_net(struct list_head *net_list) { - struct tc_action_net *tn = net_generic(net, mirred_net_id); - - tc_action_net_exit(tn); + tc_action_net_exit(net_list, mirred_net_id); } static struct pernet_operations mirred_net_ops = { .init = mirred_init_net, - .exit = mirred_exit_net, + .exit_batch = mirred_exit_net, .id = &mirred_net_id, .size = sizeof(struct tc_action_net), }; diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c index c365d01b99c8..98c6a4b2f523 100644 --- a/net/sched/act_nat.c +++ b/net/sched/act_nat.c @@ -310,16 +310,14 @@ static __net_init int nat_init_net(struct net *net) return tc_action_net_init(tn, &act_nat_ops); } -static void __net_exit nat_exit_net(struct net *net) +static void __net_exit nat_exit_net(struct list_head *net_list) { - struct tc_action_net *tn = net_generic(net, nat_net_id); - - tc_action_net_exit(tn); + tc_action_net_exit(net_list, nat_net_id); } static struct pernet_operations nat_net_ops = { .init = nat_init_net, - .exit = nat_exit_net, + .exit_batch = nat_exit_net, .id = &nat_net_id, .size = sizeof(struct tc_action_net), }; diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index 491fe5deb09e..349beaffb29e 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -216,7 +216,7 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla, return ret; } -static void tcf_pedit_cleanup(struct tc_action *a, int bind) +static void tcf_pedit_cleanup(struct tc_action *a) { struct tcf_pedit *p = to_pedit(a); struct tc_pedit_key *keys = p->tcfp_keys; @@ -453,16 +453,14 @@ static __net_init int pedit_init_net(struct net *net) return tc_action_net_init(tn, &act_pedit_ops); } -static void __net_exit pedit_exit_net(struct net *net) +static void __net_exit pedit_exit_net(struct list_head *net_list) { - struct tc_action_net *tn = net_generic(net, pedit_net_id); - - tc_action_net_exit(tn); + tc_action_net_exit(net_list, pedit_net_id); } static struct pernet_operations pedit_net_ops = { .init = pedit_init_net, - .exit = pedit_exit_net, + .exit_batch = pedit_exit_net, .id = &pedit_net_id, .size = sizeof(struct tc_action_net), }; diff --git a/net/sched/act_police.c b/net/sched/act_police.c index 3bb2ebf9e9ae..95d3c9097b25 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -118,13 +118,13 @@ static int tcf_act_police_init(struct net *net, struct nlattr *nla, police = to_police(*a); if (parm->rate.rate) { err = -ENOMEM; - R_tab = qdisc_get_rtab(&parm->rate, tb[TCA_POLICE_RATE]); + R_tab = qdisc_get_rtab(&parm->rate, tb[TCA_POLICE_RATE], NULL); if (R_tab == NULL) goto failure; if (parm->peakrate.rate) { P_tab = qdisc_get_rtab(&parm->peakrate, - tb[TCA_POLICE_PEAKRATE]); + tb[TCA_POLICE_PEAKRATE], NULL); if (P_tab == NULL) goto failure; } @@ -334,16 +334,14 @@ static __net_init int police_init_net(struct net *net) return tc_action_net_init(tn, &act_police_ops); } -static void __net_exit police_exit_net(struct net *net) +static void __net_exit police_exit_net(struct list_head *net_list) { - struct tc_action_net *tn = net_generic(net, police_net_id); - - tc_action_net_exit(tn); + tc_action_net_exit(net_list, police_net_id); } static struct pernet_operations police_net_ops = { .init = police_init_net, - .exit = police_exit_net, + .exit_batch = police_exit_net, .id = &police_net_id, .size = sizeof(struct tc_action_net), }; diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c index 9438969290a6..1ba0df238756 100644 --- a/net/sched/act_sample.c +++ b/net/sched/act_sample.c @@ -96,7 +96,7 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla, return ret; } -static void tcf_sample_cleanup(struct tc_action *a, int bind) +static void tcf_sample_cleanup(struct tc_action *a) { struct tcf_sample *s = to_sample(a); struct psample_group *psample_group; @@ -236,16 +236,14 @@ static __net_init int sample_init_net(struct net *net) return tc_action_net_init(tn, &act_sample_ops); } -static void __net_exit sample_exit_net(struct net *net) +static void __net_exit sample_exit_net(struct list_head *net_list) { - struct tc_action_net *tn = net_generic(net, sample_net_id); - - tc_action_net_exit(tn); + tc_action_net_exit(net_list, sample_net_id); } static struct pernet_operations sample_net_ops = { .init = sample_init_net, - .exit = sample_exit_net, + .exit_batch = sample_exit_net, .id = &sample_net_id, .size = sizeof(struct tc_action_net), }; diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c index e7b57e5071a3..425eac11f6da 100644 --- a/net/sched/act_simple.c +++ b/net/sched/act_simple.c @@ -47,7 +47,7 @@ static int tcf_simp(struct sk_buff *skb, const struct tc_action *a, return d->tcf_action; } -static void tcf_simp_release(struct tc_action *a, int bind) +static void tcf_simp_release(struct tc_action *a) { struct tcf_defact *d = to_defact(a); kfree(d->tcfd_defdata); @@ -204,16 +204,14 @@ static __net_init int simp_init_net(struct net *net) return tc_action_net_init(tn, &act_simp_ops); } -static void __net_exit simp_exit_net(struct net *net) +static void __net_exit simp_exit_net(struct list_head *net_list) { - struct tc_action_net *tn = net_generic(net, simp_net_id); - - tc_action_net_exit(tn); + tc_action_net_exit(net_list, simp_net_id); } static struct pernet_operations simp_net_ops = { .init = simp_init_net, - .exit = simp_exit_net, + .exit_batch = simp_exit_net, .id = &simp_net_id, .size = sizeof(struct tc_action_net), }; diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c index 59949d61f20d..5a3f691bb545 100644 --- a/net/sched/act_skbedit.c +++ b/net/sched/act_skbedit.c @@ -241,16 +241,14 @@ static __net_init int skbedit_init_net(struct net *net) return tc_action_net_init(tn, &act_skbedit_ops); } -static void __net_exit skbedit_exit_net(struct net *net) +static void __net_exit skbedit_exit_net(struct list_head *net_list) { - struct tc_action_net *tn = net_generic(net, skbedit_net_id); - - tc_action_net_exit(tn); + tc_action_net_exit(net_list, skbedit_net_id); } static struct pernet_operations skbedit_net_ops = { .init = skbedit_init_net, - .exit = skbedit_exit_net, + .exit_batch = skbedit_exit_net, .id = &skbedit_net_id, .size = sizeof(struct tc_action_net), }; diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c index b642ad3d39dd..fa975262dbac 100644 --- a/net/sched/act_skbmod.c +++ b/net/sched/act_skbmod.c @@ -184,7 +184,7 @@ static int tcf_skbmod_init(struct net *net, struct nlattr *nla, return ret; } -static void tcf_skbmod_cleanup(struct tc_action *a, int bind) +static void tcf_skbmod_cleanup(struct tc_action *a) { struct tcf_skbmod *d = to_skbmod(a); struct tcf_skbmod_params *p; @@ -266,16 +266,14 @@ static __net_init int skbmod_init_net(struct net *net) return tc_action_net_init(tn, &act_skbmod_ops); } -static void __net_exit skbmod_exit_net(struct net *net) +static void __net_exit skbmod_exit_net(struct list_head *net_list) { - struct tc_action_net *tn = net_generic(net, skbmod_net_id); - - tc_action_net_exit(tn); + tc_action_net_exit(net_list, skbmod_net_id); } static struct pernet_operations skbmod_net_ops = { .init = skbmod_init_net, - .exit = skbmod_exit_net, + .exit_batch = skbmod_exit_net, .id = &skbmod_net_id, .size = sizeof(struct tc_action_net), }; diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c index 30c96274c638..0e23aac09ad6 100644 --- a/net/sched/act_tunnel_key.c +++ b/net/sched/act_tunnel_key.c @@ -201,7 +201,7 @@ err_out: return ret; } -static void tunnel_key_release(struct tc_action *a, int bind) +static void tunnel_key_release(struct tc_action *a) { struct tcf_tunnel_key *t = to_tunnel_key(a); struct tcf_tunnel_key_params *params; @@ -325,16 +325,14 @@ static __net_init int tunnel_key_init_net(struct net *net) return tc_action_net_init(tn, &act_tunnel_key_ops); } -static void __net_exit tunnel_key_exit_net(struct net *net) +static void __net_exit tunnel_key_exit_net(struct list_head *net_list) { - struct tc_action_net *tn = net_generic(net, tunnel_key_net_id); - - tc_action_net_exit(tn); + tc_action_net_exit(net_list, tunnel_key_net_id); } static struct pernet_operations tunnel_key_net_ops = { .init = tunnel_key_init_net, - .exit = tunnel_key_exit_net, + .exit_batch = tunnel_key_exit_net, .id = &tunnel_key_net_id, .size = sizeof(struct tc_action_net), }; diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c index 97f717a13ad5..e1a1b3f3983a 100644 --- a/net/sched/act_vlan.c +++ b/net/sched/act_vlan.c @@ -219,7 +219,7 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla, return ret; } -static void tcf_vlan_cleanup(struct tc_action *a, int bind) +static void tcf_vlan_cleanup(struct tc_action *a) { struct tcf_vlan *v = to_vlan(a); struct tcf_vlan_params *p; @@ -301,16 +301,14 @@ static __net_init int vlan_init_net(struct net *net) return tc_action_net_init(tn, &act_vlan_ops); } -static void __net_exit vlan_exit_net(struct net *net) +static void __net_exit vlan_exit_net(struct list_head *net_list) { - struct tc_action_net *tn = net_generic(net, vlan_net_id); - - tc_action_net_exit(tn); + tc_action_net_exit(net_list, vlan_net_id); } static struct pernet_operations vlan_net_ops = { .init = vlan_init_net, - .exit = vlan_exit_net, + .exit_batch = vlan_exit_net, .id = &vlan_net_id, .size = sizeof(struct tc_action_net), }; diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index b9d63d2246e6..bcb4ccb5f894 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -24,6 +24,7 @@ #include <linux/init.h> #include <linux/kmod.h> #include <linux/slab.h> +#include <linux/idr.h> #include <net/net_namespace.h> #include <net/sock.h> #include <net/netlink.h> @@ -121,8 +122,8 @@ static inline u32 tcf_auto_prio(struct tcf_proto *tp) } static struct tcf_proto *tcf_proto_create(const char *kind, u32 protocol, - u32 prio, u32 parent, struct Qdisc *q, - struct tcf_chain *chain) + u32 prio, struct tcf_chain *chain, + struct netlink_ext_ack *extack) { struct tcf_proto *tp; int err; @@ -148,6 +149,7 @@ static struct tcf_proto *tcf_proto_create(const char *kind, u32 protocol, module_put(tp->ops->owner); err = -EAGAIN; } else { + NL_SET_ERR_MSG(extack, "TC classifier not found"); err = -ENOENT; } goto errout; @@ -156,8 +158,6 @@ static struct tcf_proto *tcf_proto_create(const char *kind, u32 protocol, tp->classify = tp->ops->classify; tp->protocol = protocol; tp->prio = prio; - tp->classid = parent; - tp->q = q; tp->chain = chain; err = tp->ops->init(tp); @@ -172,13 +172,20 @@ errout: return ERR_PTR(err); } -static void tcf_proto_destroy(struct tcf_proto *tp) +static void tcf_proto_destroy(struct tcf_proto *tp, + struct netlink_ext_ack *extack) { - tp->ops->destroy(tp); + tp->ops->destroy(tp, extack); module_put(tp->ops->owner); kfree_rcu(tp, rcu); } +struct tcf_filter_chain_list_item { + struct list_head list; + tcf_chain_head_change_t *chain_head_change; + void *chain_head_change_priv; +}; + static struct tcf_chain *tcf_chain_create(struct tcf_block *block, u32 chain_index) { @@ -187,6 +194,7 @@ static struct tcf_chain *tcf_chain_create(struct tcf_block *block, chain = kzalloc(sizeof(*chain), GFP_KERNEL); if (!chain) return NULL; + INIT_LIST_HEAD(&chain->filter_chain_list); list_add_tail(&chain->list, &block->chain_list); chain->block = block; chain->index = chain_index; @@ -194,12 +202,19 @@ static struct tcf_chain *tcf_chain_create(struct tcf_block *block, return chain; } +static void tcf_chain_head_change_item(struct tcf_filter_chain_list_item *item, + struct tcf_proto *tp_head) +{ + if (item->chain_head_change) + item->chain_head_change(tp_head, item->chain_head_change_priv); +} static void tcf_chain_head_change(struct tcf_chain *chain, struct tcf_proto *tp_head) { - if (chain->chain_head_change) - chain->chain_head_change(tp_head, - chain->chain_head_change_priv); + struct tcf_filter_chain_list_item *item; + + list_for_each_entry(item, &chain->filter_chain_list, list) + tcf_chain_head_change_item(item, tp_head); } static void tcf_chain_flush(struct tcf_chain *chain) @@ -209,7 +224,7 @@ static void tcf_chain_flush(struct tcf_chain *chain) tcf_chain_head_change(chain, NULL); while (tp) { RCU_INIT_POINTER(chain->filter_chain, tp->next); - tcf_proto_destroy(tp); + tcf_proto_destroy(tp, NULL); tp = rtnl_dereference(chain->filter_chain); tcf_chain_put(chain); } @@ -217,8 +232,12 @@ static void tcf_chain_flush(struct tcf_chain *chain) static void tcf_chain_destroy(struct tcf_chain *chain) { + struct tcf_block *block = chain->block; + list_del(&chain->list); kfree(chain); + if (list_empty(&block->chain_list)) + kfree(block); } static void tcf_chain_hold(struct tcf_chain *chain) @@ -249,62 +268,300 @@ void tcf_chain_put(struct tcf_chain *chain) } EXPORT_SYMBOL(tcf_chain_put); -static void tcf_block_offload_cmd(struct tcf_block *block, struct Qdisc *q, - struct tcf_block_ext_info *ei, - enum tc_block_command command) +static bool tcf_block_offload_in_use(struct tcf_block *block) +{ + return block->offloadcnt; +} + +static int tcf_block_offload_cmd(struct tcf_block *block, + struct net_device *dev, + struct tcf_block_ext_info *ei, + enum tc_block_command command) { - struct net_device *dev = q->dev_queue->dev; struct tc_block_offload bo = {}; - if (!dev->netdev_ops->ndo_setup_tc) - return; bo.command = command; bo.binder_type = ei->binder_type; bo.block = block; - dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_BLOCK, &bo); + return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_BLOCK, &bo); } -static void tcf_block_offload_bind(struct tcf_block *block, struct Qdisc *q, - struct tcf_block_ext_info *ei) +static int tcf_block_offload_bind(struct tcf_block *block, struct Qdisc *q, + struct tcf_block_ext_info *ei) { - tcf_block_offload_cmd(block, q, ei, TC_BLOCK_BIND); + struct net_device *dev = q->dev_queue->dev; + int err; + + if (!dev->netdev_ops->ndo_setup_tc) + goto no_offload_dev_inc; + + /* If tc offload feature is disabled and the block we try to bind + * to already has some offloaded filters, forbid to bind. + */ + if (!tc_can_offload(dev) && tcf_block_offload_in_use(block)) + return -EOPNOTSUPP; + + err = tcf_block_offload_cmd(block, dev, ei, TC_BLOCK_BIND); + if (err == -EOPNOTSUPP) + goto no_offload_dev_inc; + return err; + +no_offload_dev_inc: + if (tcf_block_offload_in_use(block)) + return -EOPNOTSUPP; + block->nooffloaddevcnt++; + return 0; } static void tcf_block_offload_unbind(struct tcf_block *block, struct Qdisc *q, struct tcf_block_ext_info *ei) { - tcf_block_offload_cmd(block, q, ei, TC_BLOCK_UNBIND); + struct net_device *dev = q->dev_queue->dev; + int err; + + if (!dev->netdev_ops->ndo_setup_tc) + goto no_offload_dev_dec; + err = tcf_block_offload_cmd(block, dev, ei, TC_BLOCK_UNBIND); + if (err == -EOPNOTSUPP) + goto no_offload_dev_dec; + return; + +no_offload_dev_dec: + WARN_ON(block->nooffloaddevcnt-- == 0); } -int tcf_block_get_ext(struct tcf_block **p_block, struct Qdisc *q, - struct tcf_block_ext_info *ei) +static int +tcf_chain_head_change_cb_add(struct tcf_chain *chain, + struct tcf_block_ext_info *ei, + struct netlink_ext_ack *extack) +{ + struct tcf_filter_chain_list_item *item; + + item = kmalloc(sizeof(*item), GFP_KERNEL); + if (!item) { + NL_SET_ERR_MSG(extack, "Memory allocation for head change callback item failed"); + return -ENOMEM; + } + item->chain_head_change = ei->chain_head_change; + item->chain_head_change_priv = ei->chain_head_change_priv; + if (chain->filter_chain) + tcf_chain_head_change_item(item, chain->filter_chain); + list_add(&item->list, &chain->filter_chain_list); + return 0; +} + +static void +tcf_chain_head_change_cb_del(struct tcf_chain *chain, + struct tcf_block_ext_info *ei) +{ + struct tcf_filter_chain_list_item *item; + + list_for_each_entry(item, &chain->filter_chain_list, list) { + if ((!ei->chain_head_change && !ei->chain_head_change_priv) || + (item->chain_head_change == ei->chain_head_change && + item->chain_head_change_priv == ei->chain_head_change_priv)) { + tcf_chain_head_change_item(item, NULL); + list_del(&item->list); + kfree(item); + return; + } + } + WARN_ON(1); +} + +struct tcf_net { + struct idr idr; +}; + +static unsigned int tcf_net_id; + +static int tcf_block_insert(struct tcf_block *block, struct net *net, + u32 block_index, struct netlink_ext_ack *extack) +{ + struct tcf_net *tn = net_generic(net, tcf_net_id); + int err; + + err = idr_alloc_ext(&tn->idr, block, NULL, block_index, + block_index + 1, GFP_KERNEL); + if (err) + return err; + block->index = block_index; + return 0; +} + +static void tcf_block_remove(struct tcf_block *block, struct net *net) +{ + struct tcf_net *tn = net_generic(net, tcf_net_id); + + idr_remove_ext(&tn->idr, block->index); +} + +static struct tcf_block *tcf_block_create(struct net *net, struct Qdisc *q, + struct netlink_ext_ack *extack) { - struct tcf_block *block = kzalloc(sizeof(*block), GFP_KERNEL); + struct tcf_block *block; struct tcf_chain *chain; int err; - if (!block) - return -ENOMEM; + block = kzalloc(sizeof(*block), GFP_KERNEL); + if (!block) { + NL_SET_ERR_MSG(extack, "Memory allocation for block failed"); + return ERR_PTR(-ENOMEM); + } INIT_LIST_HEAD(&block->chain_list); INIT_LIST_HEAD(&block->cb_list); + INIT_LIST_HEAD(&block->owner_list); /* Create chain 0 by default, it has to be always present. */ chain = tcf_chain_create(block, 0); if (!chain) { + NL_SET_ERR_MSG(extack, "Failed to create new tcf chain"); err = -ENOMEM; goto err_chain_create; } - WARN_ON(!ei->chain_head_change); - chain->chain_head_change = ei->chain_head_change; - chain->chain_head_change_priv = ei->chain_head_change_priv; block->net = qdisc_net(q); + block->refcnt = 1; + block->net = net; block->q = q; - tcf_block_offload_bind(block, q, ei); - *p_block = block; - return 0; + return block; err_chain_create: kfree(block); + return ERR_PTR(err); +} + +static struct tcf_block *tcf_block_lookup(struct net *net, u32 block_index) +{ + struct tcf_net *tn = net_generic(net, tcf_net_id); + + return idr_find_ext(&tn->idr, block_index); +} + +static struct tcf_chain *tcf_block_chain_zero(struct tcf_block *block) +{ + return list_first_entry(&block->chain_list, struct tcf_chain, list); +} + +struct tcf_block_owner_item { + struct list_head list; + struct Qdisc *q; + enum tcf_block_binder_type binder_type; +}; + +static void +tcf_block_owner_netif_keep_dst(struct tcf_block *block, + struct Qdisc *q, + enum tcf_block_binder_type binder_type) +{ + if (block->keep_dst && + binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS && + binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_EGRESS) + netif_keep_dst(qdisc_dev(q)); +} + +void tcf_block_netif_keep_dst(struct tcf_block *block) +{ + struct tcf_block_owner_item *item; + + block->keep_dst = true; + list_for_each_entry(item, &block->owner_list, list) + tcf_block_owner_netif_keep_dst(block, item->q, + item->binder_type); +} +EXPORT_SYMBOL(tcf_block_netif_keep_dst); + +static int tcf_block_owner_add(struct tcf_block *block, + struct Qdisc *q, + enum tcf_block_binder_type binder_type) +{ + struct tcf_block_owner_item *item; + + item = kmalloc(sizeof(*item), GFP_KERNEL); + if (!item) + return -ENOMEM; + item->q = q; + item->binder_type = binder_type; + list_add(&item->list, &block->owner_list); + return 0; +} + +static void tcf_block_owner_del(struct tcf_block *block, + struct Qdisc *q, + enum tcf_block_binder_type binder_type) +{ + struct tcf_block_owner_item *item; + + list_for_each_entry(item, &block->owner_list, list) { + if (item->q == q && item->binder_type == binder_type) { + list_del(&item->list); + kfree(item); + return; + } + } + WARN_ON(1); +} + +int tcf_block_get_ext(struct tcf_block **p_block, struct Qdisc *q, + struct tcf_block_ext_info *ei, + struct netlink_ext_ack *extack) +{ + struct net *net = qdisc_net(q); + struct tcf_block *block = NULL; + bool created = false; + int err; + + if (ei->block_index) { + /* block_index not 0 means the shared block is requested */ + block = tcf_block_lookup(net, ei->block_index); + if (block) + block->refcnt++; + } + + if (!block) { + block = tcf_block_create(net, q, extack); + if (IS_ERR(block)) + return PTR_ERR(block); + created = true; + if (ei->block_index) { + err = tcf_block_insert(block, net, + ei->block_index, extack); + if (err) + goto err_block_insert; + } + } + + err = tcf_block_owner_add(block, q, ei->binder_type); + if (err) + goto err_block_owner_add; + + tcf_block_owner_netif_keep_dst(block, q, ei->binder_type); + + err = tcf_chain_head_change_cb_add(tcf_block_chain_zero(block), + ei, extack); + if (err) + goto err_chain_head_change_cb_add; + + err = tcf_block_offload_bind(block, q, ei); + if (err) + goto err_block_offload_bind; + + *p_block = block; + return 0; + +err_block_offload_bind: + tcf_chain_head_change_cb_del(tcf_block_chain_zero(block), ei); +err_chain_head_change_cb_add: + tcf_block_owner_del(block, q, ei->binder_type); +err_block_owner_add: + if (created) { + if (tcf_block_shared(block)) + tcf_block_remove(block, net); +err_block_insert: + kfree(tcf_block_chain_zero(block)); + kfree(block); + } else { + block->refcnt--; + } return err; } EXPORT_SYMBOL(tcf_block_get_ext); @@ -317,7 +574,8 @@ static void tcf_chain_head_change_dflt(struct tcf_proto *tp_head, void *priv) } int tcf_block_get(struct tcf_block **p_block, - struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q) + struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q, + struct netlink_ext_ack *extack) { struct tcf_block_ext_info ei = { .chain_head_change = tcf_chain_head_change_dflt, @@ -325,53 +583,47 @@ int tcf_block_get(struct tcf_block **p_block, }; WARN_ON(!p_filter_chain); - return tcf_block_get_ext(p_block, q, &ei); + return tcf_block_get_ext(p_block, q, &ei, extack); } EXPORT_SYMBOL(tcf_block_get); -static void tcf_block_put_final(struct work_struct *work) -{ - struct tcf_block *block = container_of(work, struct tcf_block, work); - struct tcf_chain *chain, *tmp; - - rtnl_lock(); - - /* At this point, all the chains should have refcnt == 1. */ - list_for_each_entry_safe(chain, tmp, &block->chain_list, list) - tcf_chain_put(chain); - rtnl_unlock(); - kfree(block); -} - /* XXX: Standalone actions are not allowed to jump to any chain, and bound * actions should be all removed after flushing. */ void tcf_block_put_ext(struct tcf_block *block, struct Qdisc *q, struct tcf_block_ext_info *ei) { - struct tcf_chain *chain; + struct tcf_chain *chain, *tmp; if (!block) return; - /* Hold a refcnt for all chains, except 0, so that they don't disappear - * while we are iterating. - */ - list_for_each_entry(chain, &block->chain_list, list) - if (chain->index) + tcf_chain_head_change_cb_del(tcf_block_chain_zero(block), ei); + tcf_block_owner_del(block, q, ei->binder_type); + + if (--block->refcnt == 0) { + if (tcf_block_shared(block)) + tcf_block_remove(block, block->net); + + /* Hold a refcnt for all chains, so that they don't disappear + * while we are iterating. + */ + list_for_each_entry(chain, &block->chain_list, list) tcf_chain_hold(chain); - list_for_each_entry(chain, &block->chain_list, list) - tcf_chain_flush(chain); + list_for_each_entry(chain, &block->chain_list, list) + tcf_chain_flush(chain); + } tcf_block_offload_unbind(block, q, ei); - INIT_WORK(&block->work, tcf_block_put_final); - /* Wait for existing RCU callbacks to cool down, make sure their works - * have been queued before this. We can not flush pending works here - * because we are holding the RTNL lock. - */ - rcu_barrier(); - tcf_queue_work(&block->work); + if (block->refcnt == 0) { + /* At this point, all the chains should have refcnt >= 1. */ + list_for_each_entry_safe(chain, tmp, &block->chain_list, list) + tcf_chain_put(chain); + + /* Finally, put chain 0 and allow block to be freed. */ + tcf_chain_put(tcf_block_chain_zero(block)); + } } EXPORT_SYMBOL(tcf_block_put_ext); @@ -429,9 +681,16 @@ struct tcf_block_cb *__tcf_block_cb_register(struct tcf_block *block, { struct tcf_block_cb *block_cb; + /* At this point, playback of previous block cb calls is not supported, + * so forbid to register to block which already has some offloaded + * filters present. + */ + if (tcf_block_offload_in_use(block)) + return ERR_PTR(-EOPNOTSUPP); + block_cb = kzalloc(sizeof(*block_cb), GFP_KERNEL); if (!block_cb) - return NULL; + return ERR_PTR(-ENOMEM); block_cb->cb = cb; block_cb->cb_ident = cb_ident; block_cb->cb_priv = cb_priv; @@ -447,7 +706,7 @@ int tcf_block_cb_register(struct tcf_block *block, struct tcf_block_cb *block_cb; block_cb = __tcf_block_cb_register(block, cb, cb_ident, cb_priv); - return block_cb ? 0 : -ENOMEM; + return IS_ERR(block_cb) ? PTR_ERR(block_cb) : 0; } EXPORT_SYMBOL(tcf_block_cb_register); @@ -477,6 +736,10 @@ static int tcf_block_cb_call(struct tcf_block *block, enum tc_setup_type type, int ok_count = 0; int err; + /* Make sure all netdevs sharing this block are offload-capable. */ + if (block->nooffloaddevcnt && err_stop) + return -EOPNOTSUPP; + list_for_each_entry(block_cb, &block->cb_list, list) { err = block_cb->cb(type, type_data, block_cb->cb_priv); if (err) { @@ -530,8 +793,9 @@ reclassify: #ifdef CONFIG_NET_CLS_ACT reset: if (unlikely(limit++ >= max_reclassify_loop)) { - net_notice_ratelimited("%s: reclassify loop, rule prio %u, protocol %02x\n", - tp->q->ops->id, tp->prio & 0xffff, + net_notice_ratelimited("%u: reclassify loop, rule prio %u, protocol %02x\n", + tp->chain->block->index, + tp->prio & 0xffff, ntohs(tp->protocol)); return TC_ACT_SHOT; } @@ -604,8 +868,9 @@ static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain, } static int tcf_fill_node(struct net *net, struct sk_buff *skb, - struct tcf_proto *tp, struct Qdisc *q, u32 parent, - void *fh, u32 portid, u32 seq, u16 flags, int event) + struct tcf_proto *tp, struct tcf_block *block, + struct Qdisc *q, u32 parent, void *fh, + u32 portid, u32 seq, u16 flags, int event) { struct tcmsg *tcm; struct nlmsghdr *nlh; @@ -618,8 +883,13 @@ static int tcf_fill_node(struct net *net, struct sk_buff *skb, tcm->tcm_family = AF_UNSPEC; tcm->tcm__pad1 = 0; tcm->tcm__pad2 = 0; - tcm->tcm_ifindex = qdisc_dev(q)->ifindex; - tcm->tcm_parent = parent; + if (q) { + tcm->tcm_ifindex = qdisc_dev(q)->ifindex; + tcm->tcm_parent = parent; + } else { + tcm->tcm_ifindex = TCM_IFINDEX_MAGIC_BLOCK; + tcm->tcm_block_index = block->index; + } tcm->tcm_info = TC_H_MAKE(tp->prio, tp->protocol); if (nla_put_string(skb, TCA_KIND, tp->ops->kind)) goto nla_put_failure; @@ -642,8 +912,8 @@ nla_put_failure: static int tfilter_notify(struct net *net, struct sk_buff *oskb, struct nlmsghdr *n, struct tcf_proto *tp, - struct Qdisc *q, u32 parent, - void *fh, int event, bool unicast) + struct tcf_block *block, struct Qdisc *q, + u32 parent, void *fh, int event, bool unicast) { struct sk_buff *skb; u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; @@ -652,8 +922,8 @@ static int tfilter_notify(struct net *net, struct sk_buff *oskb, if (!skb) return -ENOBUFS; - if (tcf_fill_node(net, skb, tp, q, parent, fh, portid, n->nlmsg_seq, - n->nlmsg_flags, event) <= 0) { + if (tcf_fill_node(net, skb, tp, block, q, parent, fh, portid, + n->nlmsg_seq, n->nlmsg_flags, event) <= 0) { kfree_skb(skb); return -EINVAL; } @@ -667,8 +937,9 @@ static int tfilter_notify(struct net *net, struct sk_buff *oskb, static int tfilter_del_notify(struct net *net, struct sk_buff *oskb, struct nlmsghdr *n, struct tcf_proto *tp, - struct Qdisc *q, u32 parent, - void *fh, bool unicast, bool *last) + struct tcf_block *block, struct Qdisc *q, + u32 parent, void *fh, bool unicast, bool *last, + struct netlink_ext_ack *extack) { struct sk_buff *skb; u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; @@ -678,13 +949,14 @@ static int tfilter_del_notify(struct net *net, struct sk_buff *oskb, if (!skb) return -ENOBUFS; - if (tcf_fill_node(net, skb, tp, q, parent, fh, portid, n->nlmsg_seq, - n->nlmsg_flags, RTM_DELTFILTER) <= 0) { + if (tcf_fill_node(net, skb, tp, block, q, parent, fh, portid, + n->nlmsg_seq, n->nlmsg_flags, RTM_DELTFILTER) <= 0) { + NL_SET_ERR_MSG(extack, "Failed to build del event notification"); kfree_skb(skb); return -EINVAL; } - err = tp->ops->delete(tp, fh, last); + err = tp->ops->delete(tp, fh, last, extack); if (err) { kfree_skb(skb); return err; @@ -693,20 +965,24 @@ static int tfilter_del_notify(struct net *net, struct sk_buff *oskb, if (unicast) return netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT); - return rtnetlink_send(skb, net, portid, RTNLGRP_TC, - n->nlmsg_flags & NLM_F_ECHO); + err = rtnetlink_send(skb, net, portid, RTNLGRP_TC, + n->nlmsg_flags & NLM_F_ECHO); + if (err < 0) + NL_SET_ERR_MSG(extack, "Failed to send filter delete notification"); + return err; } static void tfilter_notify_chain(struct net *net, struct sk_buff *oskb, - struct Qdisc *q, u32 parent, - struct nlmsghdr *n, + struct tcf_block *block, struct Qdisc *q, + u32 parent, struct nlmsghdr *n, struct tcf_chain *chain, int event) { struct tcf_proto *tp; for (tp = rtnl_dereference(chain->filter_chain); tp; tp = rtnl_dereference(tp->next)) - tfilter_notify(net, oskb, n, tp, q, parent, 0, event, false); + tfilter_notify(net, oskb, n, tp, block, + q, parent, 0, event, false); } /* Add/change/delete/get a filter node */ @@ -722,13 +998,11 @@ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n, bool prio_allocate; u32 parent; u32 chain_index; - struct net_device *dev; - struct Qdisc *q; + struct Qdisc *q = NULL; struct tcf_chain_info chain_info; struct tcf_chain *chain = NULL; struct tcf_block *block; struct tcf_proto *tp; - const struct Qdisc_class_ops *cops; unsigned long cl; void *fh; int err; @@ -755,8 +1029,10 @@ replay: if (prio == 0) { switch (n->nlmsg_type) { case RTM_DELTFILTER: - if (protocol || t->tcm_handle || tca[TCA_KIND]) + if (protocol || t->tcm_handle || tca[TCA_KIND]) { + NL_SET_ERR_MSG(extack, "Cannot flush filters with protocol, handle or kind set"); return -ENOENT; + } break; case RTM_NEWTFILTER: /* If no priority is provided by the user, @@ -769,63 +1045,91 @@ replay: } /* fall-through */ default: + NL_SET_ERR_MSG(extack, "Invalid filter command with priority of zero"); return -ENOENT; } } /* Find head of filter chain. */ - /* Find link */ - dev = __dev_get_by_index(net, t->tcm_ifindex); - if (dev == NULL) - return -ENODEV; - - /* Find qdisc */ - if (!parent) { - q = dev->qdisc; - parent = q->handle; + if (t->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK) { + block = tcf_block_lookup(net, t->tcm_block_index); + if (!block) { + NL_SET_ERR_MSG(extack, "Block of given index was not found"); + err = -EINVAL; + goto errout; + } } else { - q = qdisc_lookup(dev, TC_H_MAJ(t->tcm_parent)); - if (q == NULL) - return -EINVAL; - } + const struct Qdisc_class_ops *cops; + struct net_device *dev; - /* Is it classful? */ - cops = q->ops->cl_ops; - if (!cops) - return -EINVAL; + /* Find link */ + dev = __dev_get_by_index(net, t->tcm_ifindex); + if (!dev) + return -ENODEV; - if (!cops->tcf_block) - return -EOPNOTSUPP; + /* Find qdisc */ + if (!parent) { + q = dev->qdisc; + parent = q->handle; + } else { + q = qdisc_lookup(dev, TC_H_MAJ(t->tcm_parent)); + if (!q) { + NL_SET_ERR_MSG(extack, "Parent Qdisc doesn't exists"); + return -EINVAL; + } + } - /* Do we search for filter, attached to class? */ - if (TC_H_MIN(parent)) { - cl = cops->find(q, parent); - if (cl == 0) - return -ENOENT; - } + /* Is it classful? */ + cops = q->ops->cl_ops; + if (!cops) { + NL_SET_ERR_MSG(extack, "Qdisc not classful"); + return -EINVAL; + } - /* And the last stroke */ - block = cops->tcf_block(q, cl); - if (!block) { - err = -EINVAL; - goto errout; + if (!cops->tcf_block) { + NL_SET_ERR_MSG(extack, "Class doesn't support blocks"); + return -EOPNOTSUPP; + } + + /* Do we search for filter, attached to class? */ + if (TC_H_MIN(parent)) { + cl = cops->find(q, parent); + if (cl == 0) { + NL_SET_ERR_MSG(extack, "Specified class doesn't exist"); + return -ENOENT; + } + } + + /* And the last stroke */ + block = cops->tcf_block(q, cl, extack); + if (!block) { + err = -EINVAL; + goto errout; + } + if (tcf_block_shared(block)) { + NL_SET_ERR_MSG(extack, "This filter block is shared. Please use the block index to manipulate the filters"); + err = -EOPNOTSUPP; + goto errout; + } } chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0; if (chain_index > TC_ACT_EXT_VAL_MASK) { + NL_SET_ERR_MSG(extack, "Specified chain index exceeds upper limit"); err = -EINVAL; goto errout; } chain = tcf_chain_get(block, chain_index, n->nlmsg_type == RTM_NEWTFILTER); if (!chain) { + NL_SET_ERR_MSG(extack, "Cannot find specified filter chain"); err = n->nlmsg_type == RTM_NEWTFILTER ? -ENOMEM : -EINVAL; goto errout; } if (n->nlmsg_type == RTM_DELTFILTER && prio == 0) { - tfilter_notify_chain(net, skb, q, parent, n, + tfilter_notify_chain(net, skb, block, q, parent, n, chain, RTM_DELTFILTER); tcf_chain_flush(chain); err = 0; @@ -835,6 +1139,7 @@ replay: tp = tcf_chain_tp_find(chain, &chain_info, protocol, prio, prio_allocate); if (IS_ERR(tp)) { + NL_SET_ERR_MSG(extack, "Filter with specified priority/protocol not found"); err = PTR_ERR(tp); goto errout; } @@ -843,12 +1148,14 @@ replay: /* Proto-tcf does not exist, create new one */ if (tca[TCA_KIND] == NULL || !protocol) { + NL_SET_ERR_MSG(extack, "Filter kind and protocol must be specified"); err = -EINVAL; goto errout; } if (n->nlmsg_type != RTM_NEWTFILTER || !(n->nlmsg_flags & NLM_F_CREATE)) { + NL_SET_ERR_MSG(extack, "Need both RTM_NEWTFILTER and NLM_F_CREATE to create a new filter"); err = -ENOENT; goto errout; } @@ -857,13 +1164,14 @@ replay: prio = tcf_auto_prio(tcf_chain_tp_prev(&chain_info)); tp = tcf_proto_create(nla_data(tca[TCA_KIND]), - protocol, prio, parent, q, chain); + protocol, prio, chain, extack); if (IS_ERR(tp)) { err = PTR_ERR(tp); goto errout; } tp_created = 1; } else if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) { + NL_SET_ERR_MSG(extack, "Specified filter kind does not match existing one"); err = -EINVAL; goto errout; } @@ -873,15 +1181,16 @@ replay: if (!fh) { if (n->nlmsg_type == RTM_DELTFILTER && t->tcm_handle == 0) { tcf_chain_tp_remove(chain, &chain_info, tp); - tfilter_notify(net, skb, n, tp, q, parent, fh, + tfilter_notify(net, skb, n, tp, block, q, parent, fh, RTM_DELTFILTER, false); - tcf_proto_destroy(tp); + tcf_proto_destroy(tp, extack); err = 0; goto errout; } if (n->nlmsg_type != RTM_NEWTFILTER || !(n->nlmsg_flags & NLM_F_CREATE)) { + NL_SET_ERR_MSG(extack, "Need both RTM_NEWTFILTER and NLM_F_CREATE to create a new filter"); err = -ENOENT; goto errout; } @@ -892,41 +1201,47 @@ replay: case RTM_NEWTFILTER: if (n->nlmsg_flags & NLM_F_EXCL) { if (tp_created) - tcf_proto_destroy(tp); + tcf_proto_destroy(tp, NULL); + NL_SET_ERR_MSG(extack, "Filter already exists"); err = -EEXIST; goto errout; } break; case RTM_DELTFILTER: - err = tfilter_del_notify(net, skb, n, tp, q, parent, - fh, false, &last); + err = tfilter_del_notify(net, skb, n, tp, block, + q, parent, fh, false, &last, + extack); if (err) goto errout; if (last) { tcf_chain_tp_remove(chain, &chain_info, tp); - tcf_proto_destroy(tp); + tcf_proto_destroy(tp, extack); } goto errout; case RTM_GETTFILTER: - err = tfilter_notify(net, skb, n, tp, q, parent, fh, - RTM_NEWTFILTER, true); + err = tfilter_notify(net, skb, n, tp, block, q, parent, + fh, RTM_NEWTFILTER, true); + if (err < 0) + NL_SET_ERR_MSG(extack, "Failed to send filter notify message"); goto errout; default: + NL_SET_ERR_MSG(extack, "Invalid netlink message type"); err = -EINVAL; goto errout; } } err = tp->ops->change(net, skb, tp, cl, t->tcm_handle, tca, &fh, - n->nlmsg_flags & NLM_F_CREATE ? TCA_ACT_NOREPLACE : TCA_ACT_REPLACE); + n->nlmsg_flags & NLM_F_CREATE ? TCA_ACT_NOREPLACE : TCA_ACT_REPLACE, + extack); if (err == 0) { if (tp_created) tcf_chain_tp_insert(chain, &chain_info, tp); - tfilter_notify(net, skb, n, tp, q, parent, fh, + tfilter_notify(net, skb, n, tp, block, q, parent, fh, RTM_NEWTFILTER, false); } else { if (tp_created) - tcf_proto_destroy(tp); + tcf_proto_destroy(tp, NULL); } errout: @@ -942,6 +1257,7 @@ struct tcf_dump_args { struct tcf_walker w; struct sk_buff *skb; struct netlink_callback *cb; + struct tcf_block *block; struct Qdisc *q; u32 parent; }; @@ -951,7 +1267,7 @@ static int tcf_node_dump(struct tcf_proto *tp, void *n, struct tcf_walker *arg) struct tcf_dump_args *a = (void *)arg; struct net *net = sock_net(a->skb->sk); - return tcf_fill_node(net, a->skb, tp, a->q, a->parent, + return tcf_fill_node(net, a->skb, tp, a->block, a->q, a->parent, n, NETLINK_CB(a->cb->skb).portid, a->cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTFILTER); @@ -962,6 +1278,7 @@ static bool tcf_chain_dump(struct tcf_chain *chain, struct Qdisc *q, u32 parent, long index_start, long *p_index) { struct net *net = sock_net(skb->sk); + struct tcf_block *block = chain->block; struct tcmsg *tcm = nlmsg_data(cb->nlh); struct tcf_dump_args arg; struct tcf_proto *tp; @@ -980,7 +1297,7 @@ static bool tcf_chain_dump(struct tcf_chain *chain, struct Qdisc *q, u32 parent, memset(&cb->args[1], 0, sizeof(cb->args) - sizeof(cb->args[0])); if (cb->args[1] == 0) { - if (tcf_fill_node(net, skb, tp, q, parent, 0, + if (tcf_fill_node(net, skb, tp, block, q, parent, 0, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTFILTER) <= 0) @@ -993,6 +1310,7 @@ static bool tcf_chain_dump(struct tcf_chain *chain, struct Qdisc *q, u32 parent, arg.w.fn = tcf_node_dump; arg.skb = skb; arg.cb = cb; + arg.block = block; arg.q = q; arg.parent = parent; arg.w.stop = 0; @@ -1011,13 +1329,10 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb) { struct net *net = sock_net(skb->sk); struct nlattr *tca[TCA_MAX + 1]; - struct net_device *dev; - struct Qdisc *q; + struct Qdisc *q = NULL; struct tcf_block *block; struct tcf_chain *chain; struct tcmsg *tcm = nlmsg_data(cb->nlh); - unsigned long cl = 0; - const struct Qdisc_class_ops *cops; long index_start; long index; u32 parent; @@ -1030,32 +1345,51 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb) if (err) return err; - dev = __dev_get_by_index(net, tcm->tcm_ifindex); - if (!dev) - return skb->len; - - parent = tcm->tcm_parent; - if (!parent) { - q = dev->qdisc; - parent = q->handle; + if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK) { + block = tcf_block_lookup(net, tcm->tcm_block_index); + if (!block) + goto out; + /* If we work with block index, q is NULL and parent value + * will never be used in the following code. The check + * in tcf_fill_node prevents it. However, compiler does not + * see that far, so set parent to zero to silence the warning + * about parent being uninitialized. + */ + parent = 0; } else { - q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent)); - } - if (!q) - goto out; - cops = q->ops->cl_ops; - if (!cops) - goto out; - if (!cops->tcf_block) - goto out; - if (TC_H_MIN(tcm->tcm_parent)) { - cl = cops->find(q, tcm->tcm_parent); - if (cl == 0) + const struct Qdisc_class_ops *cops; + struct net_device *dev; + unsigned long cl = 0; + + dev = __dev_get_by_index(net, tcm->tcm_ifindex); + if (!dev) + return skb->len; + + parent = tcm->tcm_parent; + if (!parent) { + q = dev->qdisc; + parent = q->handle; + } else { + q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent)); + } + if (!q) + goto out; + cops = q->ops->cl_ops; + if (!cops) + goto out; + if (!cops->tcf_block) + goto out; + if (TC_H_MIN(tcm->tcm_parent)) { + cl = cops->find(q, tcm->tcm_parent); + if (cl == 0) + goto out; + } + block = cops->tcf_block(q, cl, NULL); + if (!block) goto out; + if (tcf_block_shared(block)) + q = NULL; } - block = cops->tcf_block(q, cl); - if (!block) - goto out; index_start = cb->args[0]; index = 0; @@ -1090,7 +1424,8 @@ void tcf_exts_destroy(struct tcf_exts *exts) EXPORT_SYMBOL(tcf_exts_destroy); int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, - struct nlattr *rate_tlv, struct tcf_exts *exts, bool ovr) + struct nlattr *rate_tlv, struct tcf_exts *exts, bool ovr, + struct netlink_ext_ack *extack) { #ifdef CONFIG_NET_CLS_ACT { @@ -1123,8 +1458,10 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, } #else if ((exts->action && tb[exts->action]) || - (exts->police && tb[exts->police])) + (exts->police && tb[exts->police])) { + NL_SET_ERR_MSG(extack, "Classifier actions are not supported per compile options (CONFIG_NET_CLS_ACT)"); return -EOPNOTSUPP; + } #endif return 0; @@ -1258,18 +1595,50 @@ int tc_setup_cb_call(struct tcf_block *block, struct tcf_exts *exts, } EXPORT_SYMBOL(tc_setup_cb_call); +static __net_init int tcf_net_init(struct net *net) +{ + struct tcf_net *tn = net_generic(net, tcf_net_id); + + idr_init(&tn->idr); + return 0; +} + +static void __net_exit tcf_net_exit(struct net *net) +{ + struct tcf_net *tn = net_generic(net, tcf_net_id); + + idr_destroy(&tn->idr); +} + +static struct pernet_operations tcf_net_ops = { + .init = tcf_net_init, + .exit = tcf_net_exit, + .id = &tcf_net_id, + .size = sizeof(struct tcf_net), +}; + static int __init tc_filter_init(void) { + int err; + tc_filter_wq = alloc_ordered_workqueue("tc_filter_workqueue", 0); if (!tc_filter_wq) return -ENOMEM; + err = register_pernet_subsys(&tcf_net_ops); + if (err) + goto err_register_pernet_subsys; + rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_ctl_tfilter, NULL, 0); rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_ctl_tfilter, NULL, 0); rtnl_register(PF_UNSPEC, RTM_GETTFILTER, tc_ctl_tfilter, tc_dump_tfilter, 0); return 0; + +err_register_pernet_subsys: + destroy_workqueue(tc_filter_wq); + return err; } subsys_initcall(tc_filter_init); diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c index 5f169ded347e..d333f5c5101d 100644 --- a/net/sched/cls_basic.c +++ b/net/sched/cls_basic.c @@ -112,7 +112,7 @@ static void basic_delete_filter(struct rcu_head *head) tcf_queue_work(&f->work); } -static void basic_destroy(struct tcf_proto *tp) +static void basic_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack) { struct basic_head *head = rtnl_dereference(tp->root); struct basic_filter *f, *n; @@ -130,7 +130,8 @@ static void basic_destroy(struct tcf_proto *tp) kfree_rcu(head, rcu); } -static int basic_delete(struct tcf_proto *tp, void *arg, bool *last) +static int basic_delete(struct tcf_proto *tp, void *arg, bool *last, + struct netlink_ext_ack *extack) { struct basic_head *head = rtnl_dereference(tp->root); struct basic_filter *f = arg; @@ -152,11 +153,12 @@ static const struct nla_policy basic_policy[TCA_BASIC_MAX + 1] = { static int basic_set_parms(struct net *net, struct tcf_proto *tp, struct basic_filter *f, unsigned long base, struct nlattr **tb, - struct nlattr *est, bool ovr) + struct nlattr *est, bool ovr, + struct netlink_ext_ack *extack) { int err; - err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr); + err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr, extack); if (err < 0) return err; @@ -175,7 +177,8 @@ static int basic_set_parms(struct net *net, struct tcf_proto *tp, static int basic_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, - struct nlattr **tca, void **arg, bool ovr) + struct nlattr **tca, void **arg, bool ovr, + struct netlink_ext_ack *extack) { int err; struct basic_head *head = rtnl_dereference(tp->root); @@ -221,7 +224,8 @@ static int basic_change(struct net *net, struct sk_buff *in_skb, fnew->handle = idr_index; } - err = basic_set_parms(net, tp, fnew, base, tb, tca[TCA_RATE], ovr); + err = basic_set_parms(net, tp, fnew, base, tb, tca[TCA_RATE], ovr, + extack); if (err < 0) { if (!fold) idr_remove_ext(&head->handle_idr, fnew->handle); diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index a62586e2dbdb..8e5326bc6440 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -147,7 +147,8 @@ static bool cls_bpf_is_ebpf(const struct cls_bpf_prog *prog) } static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog, - struct cls_bpf_prog *oldprog) + struct cls_bpf_prog *oldprog, + struct netlink_ext_ack *extack) { struct tcf_block *block = tp->chain->block; struct tc_cls_bpf_offload cls_bpf = {}; @@ -158,22 +159,25 @@ static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog, skip_sw = prog && tc_skip_sw(prog->gen_flags); obj = prog ?: oldprog; - tc_cls_common_offload_init(&cls_bpf.common, tp); + tc_cls_common_offload_init(&cls_bpf.common, tp, obj->gen_flags, + extack); cls_bpf.command = TC_CLSBPF_OFFLOAD; cls_bpf.exts = &obj->exts; cls_bpf.prog = prog ? prog->filter : NULL; cls_bpf.oldprog = oldprog ? oldprog->filter : NULL; cls_bpf.name = obj->bpf_name; cls_bpf.exts_integrated = obj->exts_integrated; - cls_bpf.gen_flags = obj->gen_flags; + + if (oldprog) + tcf_block_offload_dec(block, &oldprog->gen_flags); err = tc_setup_cb_call(block, NULL, TC_SETUP_CLSBPF, &cls_bpf, skip_sw); if (prog) { if (err < 0) { - cls_bpf_offload_cmd(tp, oldprog, prog); + cls_bpf_offload_cmd(tp, oldprog, prog, extack); return err; } else if (err > 0) { - prog->gen_flags |= TCA_CLS_FLAGS_IN_HW; + tcf_block_offload_inc(block, &prog->gen_flags); } } @@ -189,7 +193,8 @@ static u32 cls_bpf_flags(u32 flags) } static int cls_bpf_offload(struct tcf_proto *tp, struct cls_bpf_prog *prog, - struct cls_bpf_prog *oldprog) + struct cls_bpf_prog *oldprog, + struct netlink_ext_ack *extack) { if (prog && oldprog && cls_bpf_flags(prog->gen_flags) != @@ -203,15 +208,16 @@ static int cls_bpf_offload(struct tcf_proto *tp, struct cls_bpf_prog *prog, if (!prog && !oldprog) return 0; - return cls_bpf_offload_cmd(tp, prog, oldprog); + return cls_bpf_offload_cmd(tp, prog, oldprog, extack); } static void cls_bpf_stop_offload(struct tcf_proto *tp, - struct cls_bpf_prog *prog) + struct cls_bpf_prog *prog, + struct netlink_ext_ack *extack) { int err; - err = cls_bpf_offload_cmd(tp, NULL, prog); + err = cls_bpf_offload_cmd(tp, NULL, prog, extack); if (err) pr_err("Stopping hardware offload failed: %d\n", err); } @@ -222,13 +228,12 @@ static void cls_bpf_offload_update_stats(struct tcf_proto *tp, struct tcf_block *block = tp->chain->block; struct tc_cls_bpf_offload cls_bpf = {}; - tc_cls_common_offload_init(&cls_bpf.common, tp); + tc_cls_common_offload_init(&cls_bpf.common, tp, prog->gen_flags, NULL); cls_bpf.command = TC_CLSBPF_STATS; cls_bpf.exts = &prog->exts; cls_bpf.prog = prog->filter; cls_bpf.name = prog->bpf_name; cls_bpf.exts_integrated = prog->exts_integrated; - cls_bpf.gen_flags = prog->gen_flags; tc_setup_cb_call(block, NULL, TC_SETUP_CLSBPF, &cls_bpf, false); } @@ -285,12 +290,13 @@ static void cls_bpf_delete_prog_rcu(struct rcu_head *rcu) tcf_queue_work(&prog->work); } -static void __cls_bpf_delete(struct tcf_proto *tp, struct cls_bpf_prog *prog) +static void __cls_bpf_delete(struct tcf_proto *tp, struct cls_bpf_prog *prog, + struct netlink_ext_ack *extack) { struct cls_bpf_head *head = rtnl_dereference(tp->root); idr_remove_ext(&head->handle_idr, prog->handle); - cls_bpf_stop_offload(tp, prog); + cls_bpf_stop_offload(tp, prog, extack); list_del_rcu(&prog->link); tcf_unbind_filter(tp, &prog->res); if (tcf_exts_get_net(&prog->exts)) @@ -299,22 +305,24 @@ static void __cls_bpf_delete(struct tcf_proto *tp, struct cls_bpf_prog *prog) __cls_bpf_delete_prog(prog); } -static int cls_bpf_delete(struct tcf_proto *tp, void *arg, bool *last) +static int cls_bpf_delete(struct tcf_proto *tp, void *arg, bool *last, + struct netlink_ext_ack *extack) { struct cls_bpf_head *head = rtnl_dereference(tp->root); - __cls_bpf_delete(tp, arg); + __cls_bpf_delete(tp, arg, extack); *last = list_empty(&head->plist); return 0; } -static void cls_bpf_destroy(struct tcf_proto *tp) +static void cls_bpf_destroy(struct tcf_proto *tp, + struct netlink_ext_ack *extack) { struct cls_bpf_head *head = rtnl_dereference(tp->root); struct cls_bpf_prog *prog, *tmp; list_for_each_entry_safe(prog, tmp, &head->plist, link) - __cls_bpf_delete(tp, prog); + __cls_bpf_delete(tp, prog, extack); idr_destroy(&head->handle_idr); kfree_rcu(head, rcu); @@ -399,15 +407,16 @@ static int cls_bpf_prog_from_efd(struct nlattr **tb, struct cls_bpf_prog *prog, prog->bpf_name = name; prog->filter = fp; - if (fp->dst_needed && !(tp->q->flags & TCQ_F_INGRESS)) - netif_keep_dst(qdisc_dev(tp->q)); + if (fp->dst_needed) + tcf_block_netif_keep_dst(tp->chain->block); return 0; } static int cls_bpf_set_parms(struct net *net, struct tcf_proto *tp, struct cls_bpf_prog *prog, unsigned long base, - struct nlattr **tb, struct nlattr *est, bool ovr) + struct nlattr **tb, struct nlattr *est, bool ovr, + struct netlink_ext_ack *extack) { bool is_bpf, is_ebpf, have_exts = false; u32 gen_flags = 0; @@ -418,7 +427,7 @@ static int cls_bpf_set_parms(struct net *net, struct tcf_proto *tp, if ((!is_bpf && !is_ebpf) || (is_bpf && is_ebpf)) return -EINVAL; - ret = tcf_exts_validate(net, tp, tb, est, &prog->exts, ovr); + ret = tcf_exts_validate(net, tp, tb, est, &prog->exts, ovr, extack); if (ret < 0) return ret; @@ -456,7 +465,7 @@ static int cls_bpf_set_parms(struct net *net, struct tcf_proto *tp, static int cls_bpf_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, - void **arg, bool ovr) + void **arg, bool ovr, struct netlink_ext_ack *extack) { struct cls_bpf_head *head = rtnl_dereference(tp->root); struct cls_bpf_prog *oldprog = *arg; @@ -504,11 +513,12 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb, prog->handle = handle; } - ret = cls_bpf_set_parms(net, tp, prog, base, tb, tca[TCA_RATE], ovr); + ret = cls_bpf_set_parms(net, tp, prog, base, tb, tca[TCA_RATE], ovr, + extack); if (ret < 0) goto errout_idr; - ret = cls_bpf_offload(tp, prog, oldprog); + ret = cls_bpf_offload(tp, prog, oldprog, extack); if (ret) goto errout_parms; diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c index 309d5899265f..762da5c0cf5e 100644 --- a/net/sched/cls_cgroup.c +++ b/net/sched/cls_cgroup.c @@ -91,7 +91,8 @@ static void cls_cgroup_destroy_rcu(struct rcu_head *root) static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, - void **arg, bool ovr) + void **arg, bool ovr, + struct netlink_ext_ack *extack) { struct nlattr *tb[TCA_CGROUP_MAX + 1]; struct cls_cgroup_head *head = rtnl_dereference(tp->root); @@ -121,7 +122,8 @@ static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb, if (err < 0) goto errout; - err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &new->exts, ovr); + err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &new->exts, ovr, + extack); if (err < 0) goto errout; @@ -141,7 +143,8 @@ errout: return err; } -static void cls_cgroup_destroy(struct tcf_proto *tp) +static void cls_cgroup_destroy(struct tcf_proto *tp, + struct netlink_ext_ack *extack) { struct cls_cgroup_head *head = rtnl_dereference(tp->root); @@ -154,7 +157,8 @@ static void cls_cgroup_destroy(struct tcf_proto *tp) } } -static int cls_cgroup_delete(struct tcf_proto *tp, void *arg, bool *last) +static int cls_cgroup_delete(struct tcf_proto *tp, void *arg, bool *last, + struct netlink_ext_ack *extack) { return -EOPNOTSUPP; } diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c index 25c2a888e1f0..cd5fe383afdd 100644 --- a/net/sched/cls_flow.c +++ b/net/sched/cls_flow.c @@ -401,7 +401,7 @@ static void flow_destroy_filter(struct rcu_head *head) static int flow_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, - void **arg, bool ovr) + void **arg, bool ovr, struct netlink_ext_ack *extack) { struct flow_head *head = rtnl_dereference(tp->root); struct flow_filter *fold, *fnew; @@ -454,7 +454,8 @@ static int flow_change(struct net *net, struct sk_buff *in_skb, if (err < 0) goto err2; - err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &fnew->exts, ovr); + err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &fnew->exts, ovr, + extack); if (err < 0) goto err2; @@ -526,7 +527,7 @@ static int flow_change(struct net *net, struct sk_buff *in_skb, timer_setup(&fnew->perturb_timer, flow_perturbation, TIMER_DEFERRABLE); - netif_keep_dst(qdisc_dev(tp->q)); + tcf_block_netif_keep_dst(tp->chain->block); if (tb[TCA_FLOW_KEYS]) { fnew->keymask = keymask; @@ -574,7 +575,8 @@ err1: return err; } -static int flow_delete(struct tcf_proto *tp, void *arg, bool *last) +static int flow_delete(struct tcf_proto *tp, void *arg, bool *last, + struct netlink_ext_ack *extack) { struct flow_head *head = rtnl_dereference(tp->root); struct flow_filter *f = arg; @@ -598,7 +600,7 @@ static int flow_init(struct tcf_proto *tp) return 0; } -static void flow_destroy(struct tcf_proto *tp) +static void flow_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack) { struct flow_head *head = rtnl_dereference(tp->root); struct flow_filter *f, *next; diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 543a3e875d05..dc9acaafc0a8 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -166,6 +166,7 @@ static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp, * so do it rather here. */ skb_key.basic.n_proto = skb->protocol; + skb_flow_dissect_tunnel_info(skb, &head->dissector, &skb_key); skb_flow_dissect(skb, &head->dissector, &skb_key, 0); fl_set_masked_key(&skb_mkey, &skb_key, &head->mask); @@ -217,30 +218,33 @@ static void fl_destroy_filter(struct rcu_head *head) tcf_queue_work(&f->work); } -static void fl_hw_destroy_filter(struct tcf_proto *tp, struct cls_fl_filter *f) +static void fl_hw_destroy_filter(struct tcf_proto *tp, struct cls_fl_filter *f, + struct netlink_ext_ack *extack) { struct tc_cls_flower_offload cls_flower = {}; struct tcf_block *block = tp->chain->block; - tc_cls_common_offload_init(&cls_flower.common, tp); + tc_cls_common_offload_init(&cls_flower.common, tp, f->flags, extack); cls_flower.command = TC_CLSFLOWER_DESTROY; cls_flower.cookie = (unsigned long) f; tc_setup_cb_call(block, &f->exts, TC_SETUP_CLSFLOWER, &cls_flower, false); + tcf_block_offload_dec(block, &f->flags); } static int fl_hw_replace_filter(struct tcf_proto *tp, struct flow_dissector *dissector, struct fl_flow_key *mask, - struct cls_fl_filter *f) + struct cls_fl_filter *f, + struct netlink_ext_ack *extack) { struct tc_cls_flower_offload cls_flower = {}; struct tcf_block *block = tp->chain->block; bool skip_sw = tc_skip_sw(f->flags); int err; - tc_cls_common_offload_init(&cls_flower.common, tp); + tc_cls_common_offload_init(&cls_flower.common, tp, f->flags, extack); cls_flower.command = TC_CLSFLOWER_REPLACE; cls_flower.cookie = (unsigned long) f; cls_flower.dissector = dissector; @@ -252,10 +256,10 @@ static int fl_hw_replace_filter(struct tcf_proto *tp, err = tc_setup_cb_call(block, &f->exts, TC_SETUP_CLSFLOWER, &cls_flower, skip_sw); if (err < 0) { - fl_hw_destroy_filter(tp, f); + fl_hw_destroy_filter(tp, f, NULL); return err; } else if (err > 0) { - f->flags |= TCA_CLS_FLAGS_IN_HW; + tcf_block_offload_inc(block, &f->flags); } if (skip_sw && !(f->flags & TCA_CLS_FLAGS_IN_HW)) @@ -269,7 +273,7 @@ static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f) struct tc_cls_flower_offload cls_flower = {}; struct tcf_block *block = tp->chain->block; - tc_cls_common_offload_init(&cls_flower.common, tp); + tc_cls_common_offload_init(&cls_flower.common, tp, f->flags, NULL); cls_flower.command = TC_CLSFLOWER_STATS; cls_flower.cookie = (unsigned long) f; cls_flower.exts = &f->exts; @@ -279,14 +283,15 @@ static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f) &cls_flower, false); } -static void __fl_delete(struct tcf_proto *tp, struct cls_fl_filter *f) +static void __fl_delete(struct tcf_proto *tp, struct cls_fl_filter *f, + struct netlink_ext_ack *extack) { struct cls_fl_head *head = rtnl_dereference(tp->root); idr_remove_ext(&head->handle_idr, f->handle); list_del_rcu(&f->list); if (!tc_skip_hw(f->flags)) - fl_hw_destroy_filter(tp, f); + fl_hw_destroy_filter(tp, f, extack); tcf_unbind_filter(tp, &f->res); if (tcf_exts_get_net(&f->exts)) call_rcu(&f->rcu, fl_destroy_filter); @@ -312,13 +317,13 @@ static void fl_destroy_rcu(struct rcu_head *rcu) schedule_work(&head->work); } -static void fl_destroy(struct tcf_proto *tp) +static void fl_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack) { struct cls_fl_head *head = rtnl_dereference(tp->root); struct cls_fl_filter *f, *next; list_for_each_entry_safe(f, next, &head->filters, list) - __fl_delete(tp, f); + __fl_delete(tp, f, extack); idr_destroy(&head->handle_idr); __module_get(THIS_MODULE); @@ -524,13 +529,14 @@ static void fl_set_key_ip(struct nlattr **tb, } static int fl_set_key(struct net *net, struct nlattr **tb, - struct fl_flow_key *key, struct fl_flow_key *mask) + struct fl_flow_key *key, struct fl_flow_key *mask, + struct netlink_ext_ack *extack) { __be16 ethertype; int ret = 0; #ifdef CONFIG_NET_CLS_IND if (tb[TCA_FLOWER_INDEV]) { - int err = tcf_change_indev(net, tb[TCA_FLOWER_INDEV]); + int err = tcf_change_indev(net, tb[TCA_FLOWER_INDEV], extack); if (err < 0) return err; key->indev_ifindex = err; @@ -825,11 +831,12 @@ static int fl_check_assign_mask(struct cls_fl_head *head, static int fl_set_parms(struct net *net, struct tcf_proto *tp, struct cls_fl_filter *f, struct fl_flow_mask *mask, unsigned long base, struct nlattr **tb, - struct nlattr *est, bool ovr) + struct nlattr *est, bool ovr, + struct netlink_ext_ack *extack) { int err; - err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr); + err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr, extack); if (err < 0) return err; @@ -838,7 +845,7 @@ static int fl_set_parms(struct net *net, struct tcf_proto *tp, tcf_bind_filter(tp, &f->res, base); } - err = fl_set_key(net, tb, &f->key, &mask->key); + err = fl_set_key(net, tb, &f->key, &mask->key, extack); if (err) return err; @@ -851,7 +858,7 @@ static int fl_set_parms(struct net *net, struct tcf_proto *tp, static int fl_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, - void **arg, bool ovr) + void **arg, bool ovr, struct netlink_ext_ack *extack) { struct cls_fl_head *head = rtnl_dereference(tp->root); struct cls_fl_filter *fold = *arg; @@ -914,7 +921,8 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, } } - err = fl_set_parms(net, tp, fnew, &mask, base, tb, tca[TCA_RATE], ovr); + err = fl_set_parms(net, tp, fnew, &mask, base, tb, tca[TCA_RATE], ovr, + extack); if (err) goto errout_idr; @@ -938,7 +946,8 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, err = fl_hw_replace_filter(tp, &head->dissector, &mask.key, - fnew); + fnew, + extack); if (err) goto errout_idr; } @@ -951,7 +960,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, rhashtable_remove_fast(&head->ht, &fold->ht_node, head->ht_params); if (!tc_skip_hw(fold->flags)) - fl_hw_destroy_filter(tp, fold); + fl_hw_destroy_filter(tp, fold, NULL); } *arg = fnew; @@ -981,7 +990,8 @@ errout_tb: return err; } -static int fl_delete(struct tcf_proto *tp, void *arg, bool *last) +static int fl_delete(struct tcf_proto *tp, void *arg, bool *last, + struct netlink_ext_ack *extack) { struct cls_fl_head *head = rtnl_dereference(tp->root); struct cls_fl_filter *f = arg; @@ -989,7 +999,7 @@ static int fl_delete(struct tcf_proto *tp, void *arg, bool *last) if (!tc_skip_sw(f->flags)) rhashtable_remove_fast(&head->ht, &f->ht_node, head->ht_params); - __fl_delete(tp, f); + __fl_delete(tp, f, extack); *last = list_empty(&head->filters); return 0; } diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c index 20f0de1a960a..8b207723fbc2 100644 --- a/net/sched/cls_fw.c +++ b/net/sched/cls_fw.c @@ -149,7 +149,7 @@ static void fw_delete_filter(struct rcu_head *head) tcf_queue_work(&f->work); } -static void fw_destroy(struct tcf_proto *tp) +static void fw_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack) { struct fw_head *head = rtnl_dereference(tp->root); struct fw_filter *f; @@ -172,7 +172,8 @@ static void fw_destroy(struct tcf_proto *tp) kfree_rcu(head, rcu); } -static int fw_delete(struct tcf_proto *tp, void *arg, bool *last) +static int fw_delete(struct tcf_proto *tp, void *arg, bool *last, + struct netlink_ext_ack *extack) { struct fw_head *head = rtnl_dereference(tp->root); struct fw_filter *f = arg; @@ -218,13 +219,15 @@ static const struct nla_policy fw_policy[TCA_FW_MAX + 1] = { static int fw_set_parms(struct net *net, struct tcf_proto *tp, struct fw_filter *f, struct nlattr **tb, - struct nlattr **tca, unsigned long base, bool ovr) + struct nlattr **tca, unsigned long base, bool ovr, + struct netlink_ext_ack *extack) { struct fw_head *head = rtnl_dereference(tp->root); u32 mask; int err; - err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &f->exts, ovr); + err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &f->exts, ovr, + extack); if (err < 0) return err; @@ -236,7 +239,7 @@ static int fw_set_parms(struct net *net, struct tcf_proto *tp, #ifdef CONFIG_NET_CLS_IND if (tb[TCA_FW_INDEV]) { int ret; - ret = tcf_change_indev(net, tb[TCA_FW_INDEV]); + ret = tcf_change_indev(net, tb[TCA_FW_INDEV], extack); if (ret < 0) return ret; f->ifindex = ret; @@ -257,7 +260,7 @@ static int fw_set_parms(struct net *net, struct tcf_proto *tp, static int fw_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, void **arg, - bool ovr) + bool ovr, struct netlink_ext_ack *extack) { struct fw_head *head = rtnl_dereference(tp->root); struct fw_filter *f = *arg; @@ -296,7 +299,7 @@ static int fw_change(struct net *net, struct sk_buff *in_skb, return err; } - err = fw_set_parms(net, tp, fnew, tb, tca, base, ovr); + err = fw_set_parms(net, tp, fnew, tb, tca, base, ovr, extack); if (err < 0) { tcf_exts_destroy(&fnew->exts); kfree(fnew); @@ -345,7 +348,7 @@ static int fw_change(struct net *net, struct sk_buff *in_skb, f->id = handle; f->tp = tp; - err = fw_set_parms(net, tp, f, tb, tca, base, ovr); + err = fw_set_parms(net, tp, f, tb, tca, base, ovr, extack); if (err < 0) goto errout; diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c index 66d4e0099158..2ba721a590a7 100644 --- a/net/sched/cls_matchall.c +++ b/net/sched/cls_matchall.c @@ -71,28 +71,31 @@ static void mall_destroy_rcu(struct rcu_head *rcu) static void mall_destroy_hw_filter(struct tcf_proto *tp, struct cls_mall_head *head, - unsigned long cookie) + unsigned long cookie, + struct netlink_ext_ack *extack) { struct tc_cls_matchall_offload cls_mall = {}; struct tcf_block *block = tp->chain->block; - tc_cls_common_offload_init(&cls_mall.common, tp); + tc_cls_common_offload_init(&cls_mall.common, tp, head->flags, extack); cls_mall.command = TC_CLSMATCHALL_DESTROY; cls_mall.cookie = cookie; tc_setup_cb_call(block, NULL, TC_SETUP_CLSMATCHALL, &cls_mall, false); + tcf_block_offload_dec(block, &head->flags); } static int mall_replace_hw_filter(struct tcf_proto *tp, struct cls_mall_head *head, - unsigned long cookie) + unsigned long cookie, + struct netlink_ext_ack *extack) { struct tc_cls_matchall_offload cls_mall = {}; struct tcf_block *block = tp->chain->block; bool skip_sw = tc_skip_sw(head->flags); int err; - tc_cls_common_offload_init(&cls_mall.common, tp); + tc_cls_common_offload_init(&cls_mall.common, tp, head->flags, extack); cls_mall.command = TC_CLSMATCHALL_REPLACE; cls_mall.exts = &head->exts; cls_mall.cookie = cookie; @@ -100,10 +103,10 @@ static int mall_replace_hw_filter(struct tcf_proto *tp, err = tc_setup_cb_call(block, NULL, TC_SETUP_CLSMATCHALL, &cls_mall, skip_sw); if (err < 0) { - mall_destroy_hw_filter(tp, head, cookie); + mall_destroy_hw_filter(tp, head, cookie, NULL); return err; } else if (err > 0) { - head->flags |= TCA_CLS_FLAGS_IN_HW; + tcf_block_offload_inc(block, &head->flags); } if (skip_sw && !(head->flags & TCA_CLS_FLAGS_IN_HW)) @@ -112,7 +115,7 @@ static int mall_replace_hw_filter(struct tcf_proto *tp, return 0; } -static void mall_destroy(struct tcf_proto *tp) +static void mall_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack) { struct cls_mall_head *head = rtnl_dereference(tp->root); @@ -120,7 +123,7 @@ static void mall_destroy(struct tcf_proto *tp) return; if (!tc_skip_hw(head->flags)) - mall_destroy_hw_filter(tp, head, (unsigned long) head); + mall_destroy_hw_filter(tp, head, (unsigned long) head, extack); if (tcf_exts_get_net(&head->exts)) call_rcu(&head->rcu, mall_destroy_rcu); @@ -141,11 +144,12 @@ static const struct nla_policy mall_policy[TCA_MATCHALL_MAX + 1] = { static int mall_set_parms(struct net *net, struct tcf_proto *tp, struct cls_mall_head *head, unsigned long base, struct nlattr **tb, - struct nlattr *est, bool ovr) + struct nlattr *est, bool ovr, + struct netlink_ext_ack *extack) { int err; - err = tcf_exts_validate(net, tp, tb, est, &head->exts, ovr); + err = tcf_exts_validate(net, tp, tb, est, &head->exts, ovr, extack); if (err < 0) return err; @@ -159,7 +163,7 @@ static int mall_set_parms(struct net *net, struct tcf_proto *tp, static int mall_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, - void **arg, bool ovr) + void **arg, bool ovr, struct netlink_ext_ack *extack) { struct cls_mall_head *head = rtnl_dereference(tp->root); struct nlattr *tb[TCA_MATCHALL_MAX + 1]; @@ -197,12 +201,14 @@ static int mall_change(struct net *net, struct sk_buff *in_skb, new->handle = handle; new->flags = flags; - err = mall_set_parms(net, tp, new, base, tb, tca[TCA_RATE], ovr); + err = mall_set_parms(net, tp, new, base, tb, tca[TCA_RATE], ovr, + extack); if (err) goto err_set_parms; if (!tc_skip_hw(new->flags)) { - err = mall_replace_hw_filter(tp, new, (unsigned long) new); + err = mall_replace_hw_filter(tp, new, (unsigned long)new, + extack); if (err) goto err_replace_hw_filter; } @@ -222,7 +228,8 @@ err_exts_init: return err; } -static int mall_delete(struct tcf_proto *tp, void *arg, bool *last) +static int mall_delete(struct tcf_proto *tp, void *arg, bool *last, + struct netlink_ext_ack *extack) { return -EOPNOTSUPP; } diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c index ac9a5b8825b9..21a03a8ee029 100644 --- a/net/sched/cls_route.c +++ b/net/sched/cls_route.c @@ -281,7 +281,7 @@ static void route4_delete_filter(struct rcu_head *head) tcf_queue_work(&f->work); } -static void route4_destroy(struct tcf_proto *tp) +static void route4_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack) { struct route4_head *head = rtnl_dereference(tp->root); int h1, h2; @@ -316,7 +316,8 @@ static void route4_destroy(struct tcf_proto *tp) kfree_rcu(head, rcu); } -static int route4_delete(struct tcf_proto *tp, void *arg, bool *last) +static int route4_delete(struct tcf_proto *tp, void *arg, bool *last, + struct netlink_ext_ack *extack) { struct route4_head *head = rtnl_dereference(tp->root); struct route4_filter *f = arg; @@ -389,7 +390,7 @@ static int route4_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, struct route4_filter *f, u32 handle, struct route4_head *head, struct nlattr **tb, struct nlattr *est, int new, - bool ovr) + bool ovr, struct netlink_ext_ack *extack) { u32 id = 0, to = 0, nhandle = 0x8000; struct route4_filter *fp; @@ -397,7 +398,7 @@ static int route4_set_parms(struct net *net, struct tcf_proto *tp, struct route4_bucket *b; int err; - err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr); + err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr, extack); if (err < 0) return err; @@ -471,7 +472,8 @@ static int route4_set_parms(struct net *net, struct tcf_proto *tp, static int route4_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, - struct nlattr **tca, void **arg, bool ovr) + struct nlattr **tca, void **arg, bool ovr, + struct netlink_ext_ack *extack) { struct route4_head *head = rtnl_dereference(tp->root); struct route4_filter __rcu **fp; @@ -515,7 +517,7 @@ static int route4_change(struct net *net, struct sk_buff *in_skb, } err = route4_set_parms(net, tp, base, f, handle, head, tb, - tca[TCA_RATE], new, ovr); + tca[TCA_RATE], new, ovr, extack); if (err < 0) goto errout; @@ -527,7 +529,7 @@ static int route4_change(struct net *net, struct sk_buff *in_skb, if (f->handle < f1->handle) break; - netif_keep_dst(qdisc_dev(tp->q)); + tcf_block_netif_keep_dst(tp->chain->block); rcu_assign_pointer(f->next, f1); rcu_assign_pointer(*fp, f); diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h index cf325625c99d..4f1297657c27 100644 --- a/net/sched/cls_rsvp.h +++ b/net/sched/cls_rsvp.h @@ -322,7 +322,7 @@ static void rsvp_delete_filter(struct tcf_proto *tp, struct rsvp_filter *f) __rsvp_delete_filter(f); } -static void rsvp_destroy(struct tcf_proto *tp) +static void rsvp_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack) { struct rsvp_head *data = rtnl_dereference(tp->root); int h1, h2; @@ -350,7 +350,8 @@ static void rsvp_destroy(struct tcf_proto *tp) kfree_rcu(data, rcu); } -static int rsvp_delete(struct tcf_proto *tp, void *arg, bool *last) +static int rsvp_delete(struct tcf_proto *tp, void *arg, bool *last, + struct netlink_ext_ack *extack) { struct rsvp_head *head = rtnl_dereference(tp->root); struct rsvp_filter *nfp, *f = arg; @@ -486,7 +487,7 @@ static int rsvp_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, - void **arg, bool ovr) + void **arg, bool ovr, struct netlink_ext_ack *extack) { struct rsvp_head *data = rtnl_dereference(tp->root); struct rsvp_filter *f, *nfp; @@ -511,7 +512,7 @@ static int rsvp_change(struct net *net, struct sk_buff *in_skb, err = tcf_exts_init(&e, TCA_RSVP_ACT, TCA_RSVP_POLICE); if (err < 0) return err; - err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr); + err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr, extack); if (err < 0) goto errout2; diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index 67467ae24c97..b49cc990a000 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c @@ -193,7 +193,8 @@ static void tcindex_destroy_fexts(struct rcu_head *head) tcf_queue_work(&f->work); } -static int tcindex_delete(struct tcf_proto *tp, void *arg, bool *last) +static int tcindex_delete(struct tcf_proto *tp, void *arg, bool *last, + struct netlink_ext_ack *extack) { struct tcindex_data *p = rtnl_dereference(tp->root); struct tcindex_filter_result *r = arg; @@ -246,7 +247,7 @@ static int tcindex_destroy_element(struct tcf_proto *tp, { bool last; - return tcindex_delete(tp, arg, &last); + return tcindex_delete(tp, arg, &last, NULL); } static void __tcindex_destroy(struct rcu_head *head) @@ -322,7 +323,7 @@ static int tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, u32 handle, struct tcindex_data *p, struct tcindex_filter_result *r, struct nlattr **tb, - struct nlattr *est, bool ovr) + struct nlattr *est, bool ovr, struct netlink_ext_ack *extack) { struct tcindex_filter_result new_filter_result, *old_r = r; struct tcindex_filter_result cr; @@ -334,7 +335,7 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, err = tcf_exts_init(&e, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE); if (err < 0) return err; - err = tcf_exts_validate(net, tp, tb, est, &e, ovr); + err = tcf_exts_validate(net, tp, tb, est, &e, ovr, extack); if (err < 0) goto errout; @@ -520,7 +521,8 @@ errout: static int tcindex_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, - struct nlattr **tca, void **arg, bool ovr) + struct nlattr **tca, void **arg, bool ovr, + struct netlink_ext_ack *extack) { struct nlattr *opt = tca[TCA_OPTIONS]; struct nlattr *tb[TCA_TCINDEX_MAX + 1]; @@ -540,7 +542,7 @@ tcindex_change(struct net *net, struct sk_buff *in_skb, return err; return tcindex_set_parms(net, tp, base, handle, p, r, tb, - tca[TCA_RATE], ovr); + tca[TCA_RATE], ovr, extack); } static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker) @@ -579,7 +581,8 @@ static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker) } } -static void tcindex_destroy(struct tcf_proto *tp) +static void tcindex_destroy(struct tcf_proto *tp, + struct netlink_ext_ack *extack) { struct tcindex_data *p = rtnl_dereference(tp->root); struct tcf_walker walker; diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index 507859cdd1cb..60c892c36a60 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -87,6 +87,7 @@ struct tc_u_hnode { unsigned int divisor; struct idr handle_idr; struct rcu_head rcu; + u32 flags; /* The 'ht' field MUST be the last field in structure to allow for * more entries allocated at end of structure. */ @@ -486,12 +487,13 @@ static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode *key) return 0; } -static void u32_clear_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h) +static void u32_clear_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h, + struct netlink_ext_ack *extack) { struct tcf_block *block = tp->chain->block; struct tc_cls_u32_offload cls_u32 = {}; - tc_cls_common_offload_init(&cls_u32.common, tp); + tc_cls_common_offload_init(&cls_u32.common, tp, h->flags, extack); cls_u32.command = TC_CLSU32_DELETE_HNODE; cls_u32.hnode.divisor = h->divisor; cls_u32.hnode.handle = h->handle; @@ -501,7 +503,7 @@ static void u32_clear_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h) } static int u32_replace_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h, - u32 flags) + u32 flags, struct netlink_ext_ack *extack) { struct tcf_block *block = tp->chain->block; struct tc_cls_u32_offload cls_u32 = {}; @@ -509,7 +511,7 @@ static int u32_replace_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h, bool offloaded = false; int err; - tc_cls_common_offload_init(&cls_u32.common, tp); + tc_cls_common_offload_init(&cls_u32.common, tp, flags, extack); cls_u32.command = TC_CLSU32_NEW_HNODE; cls_u32.hnode.divisor = h->divisor; cls_u32.hnode.handle = h->handle; @@ -517,7 +519,7 @@ static int u32_replace_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h, err = tc_setup_cb_call(block, NULL, TC_SETUP_CLSU32, &cls_u32, skip_sw); if (err < 0) { - u32_clear_hw_hnode(tp, h); + u32_clear_hw_hnode(tp, h, NULL); return err; } else if (err > 0) { offloaded = true; @@ -529,27 +531,29 @@ static int u32_replace_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h, return 0; } -static void u32_remove_hw_knode(struct tcf_proto *tp, u32 handle) +static void u32_remove_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n, + struct netlink_ext_ack *extack) { struct tcf_block *block = tp->chain->block; struct tc_cls_u32_offload cls_u32 = {}; - tc_cls_common_offload_init(&cls_u32.common, tp); + tc_cls_common_offload_init(&cls_u32.common, tp, n->flags, extack); cls_u32.command = TC_CLSU32_DELETE_KNODE; - cls_u32.knode.handle = handle; + cls_u32.knode.handle = n->handle; tc_setup_cb_call(block, NULL, TC_SETUP_CLSU32, &cls_u32, false); + tcf_block_offload_dec(block, &n->flags); } static int u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n, - u32 flags) + u32 flags, struct netlink_ext_ack *extack) { struct tcf_block *block = tp->chain->block; struct tc_cls_u32_offload cls_u32 = {}; bool skip_sw = tc_skip_sw(flags); int err; - tc_cls_common_offload_init(&cls_u32.common, tp); + tc_cls_common_offload_init(&cls_u32.common, tp, flags, extack); cls_u32.command = TC_CLSU32_REPLACE_KNODE; cls_u32.knode.handle = n->handle; cls_u32.knode.fshift = n->fshift; @@ -567,10 +571,10 @@ static int u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n, err = tc_setup_cb_call(block, NULL, TC_SETUP_CLSU32, &cls_u32, skip_sw); if (err < 0) { - u32_remove_hw_knode(tp, n->handle); + u32_remove_hw_knode(tp, n, NULL); return err; } else if (err > 0) { - n->flags |= TCA_CLS_FLAGS_IN_HW; + tcf_block_offload_inc(block, &n->flags); } if (skip_sw && !(n->flags & TCA_CLS_FLAGS_IN_HW)) @@ -579,7 +583,8 @@ static int u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n, return 0; } -static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht) +static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht, + struct netlink_ext_ack *extack) { struct tc_u_knode *n; unsigned int h; @@ -589,7 +594,7 @@ static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht) RCU_INIT_POINTER(ht->ht[h], rtnl_dereference(n->next)); tcf_unbind_filter(tp, &n->res); - u32_remove_hw_knode(tp, n->handle); + u32_remove_hw_knode(tp, n, extack); idr_remove_ext(&ht->handle_idr, n->handle); if (tcf_exts_get_net(&n->exts)) call_rcu(&n->rcu, u32_delete_key_freepf_rcu); @@ -599,7 +604,8 @@ static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht) } } -static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht) +static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht, + struct netlink_ext_ack *extack) { struct tc_u_common *tp_c = tp->data; struct tc_u_hnode __rcu **hn; @@ -607,14 +613,14 @@ static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht) WARN_ON(ht->refcnt); - u32_clear_hnode(tp, ht); + u32_clear_hnode(tp, ht, extack); hn = &tp_c->hlist; for (phn = rtnl_dereference(*hn); phn; hn = &phn->next, phn = rtnl_dereference(*hn)) { if (phn == ht) { - u32_clear_hw_hnode(tp, ht); + u32_clear_hw_hnode(tp, ht, extack); idr_destroy(&ht->handle_idr); idr_remove_ext(&tp_c->handle_idr, ht->handle); RCU_INIT_POINTER(*hn, ht->next); @@ -637,7 +643,7 @@ static bool ht_empty(struct tc_u_hnode *ht) return true; } -static void u32_destroy(struct tcf_proto *tp) +static void u32_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack) { struct tc_u_common *tp_c = tp->data; struct tc_u_hnode *root_ht = rtnl_dereference(tp->root); @@ -645,7 +651,7 @@ static void u32_destroy(struct tcf_proto *tp) WARN_ON(root_ht == NULL); if (root_ht && --root_ht->refcnt == 0) - u32_destroy_hnode(tp, root_ht); + u32_destroy_hnode(tp, root_ht, extack); if (--tp_c->refcnt == 0) { struct tc_u_hnode *ht; @@ -656,7 +662,7 @@ static void u32_destroy(struct tcf_proto *tp) ht; ht = rtnl_dereference(ht->next)) { ht->refcnt--; - u32_clear_hnode(tp, ht); + u32_clear_hnode(tp, ht, extack); } while ((ht = rtnl_dereference(tp_c->hlist)) != NULL) { @@ -671,7 +677,8 @@ static void u32_destroy(struct tcf_proto *tp) tp->data = NULL; } -static int u32_delete(struct tcf_proto *tp, void *arg, bool *last) +static int u32_delete(struct tcf_proto *tp, void *arg, bool *last, + struct netlink_ext_ack *extack) { struct tc_u_hnode *ht = arg; struct tc_u_hnode *root_ht = rtnl_dereference(tp->root); @@ -682,18 +689,21 @@ static int u32_delete(struct tcf_proto *tp, void *arg, bool *last) goto out; if (TC_U32_KEY(ht->handle)) { - u32_remove_hw_knode(tp, ht->handle); + u32_remove_hw_knode(tp, (struct tc_u_knode *)ht, extack); ret = u32_delete_key(tp, (struct tc_u_knode *)ht); goto out; } - if (root_ht == ht) + if (root_ht == ht) { + NL_SET_ERR_MSG_MOD(extack, "Not allowed to delete root node"); return -EINVAL; + } if (ht->refcnt == 1) { ht->refcnt--; - u32_destroy_hnode(tp, ht); + u32_destroy_hnode(tp, ht, extack); } else { + NL_SET_ERR_MSG_MOD(extack, "Can not delete in-use filter"); return -EBUSY; } @@ -764,11 +774,12 @@ static const struct nla_policy u32_policy[TCA_U32_MAX + 1] = { static int u32_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, struct tc_u_hnode *ht, struct tc_u_knode *n, struct nlattr **tb, - struct nlattr *est, bool ovr) + struct nlattr *est, bool ovr, + struct netlink_ext_ack *extack) { int err; - err = tcf_exts_validate(net, tp, tb, est, &n->exts, ovr); + err = tcf_exts_validate(net, tp, tb, est, &n->exts, ovr, extack); if (err < 0) return err; @@ -776,14 +787,18 @@ static int u32_set_parms(struct net *net, struct tcf_proto *tp, u32 handle = nla_get_u32(tb[TCA_U32_LINK]); struct tc_u_hnode *ht_down = NULL, *ht_old; - if (TC_U32_KEY(handle)) + if (TC_U32_KEY(handle)) { + NL_SET_ERR_MSG_MOD(extack, "u32 Link handle must be a hash table"); return -EINVAL; + } if (handle) { ht_down = u32_lookup_ht(ht->tp_c, handle); - if (ht_down == NULL) + if (!ht_down) { + NL_SET_ERR_MSG_MOD(extack, "Link hash table not found"); return -EINVAL; + } ht_down->refcnt++; } @@ -801,7 +816,7 @@ static int u32_set_parms(struct net *net, struct tcf_proto *tp, #ifdef CONFIG_NET_CLS_IND if (tb[TCA_U32_INDEV]) { int ret; - ret = tcf_change_indev(net, tb[TCA_U32_INDEV]); + ret = tcf_change_indev(net, tb[TCA_U32_INDEV], extack); if (ret < 0) return -EINVAL; n->ifindex = ret; @@ -892,7 +907,8 @@ static struct tc_u_knode *u32_init_knode(struct tcf_proto *tp, static int u32_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, - struct nlattr **tca, void **arg, bool ovr) + struct nlattr **tca, void **arg, bool ovr, + struct netlink_ext_ack *extack) { struct tc_u_common *tp_c = tp->data; struct tc_u_hnode *ht; @@ -906,28 +922,40 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, size_t size; #endif - if (opt == NULL) - return handle ? -EINVAL : 0; + if (!opt) { + if (handle) { + NL_SET_ERR_MSG_MOD(extack, "Filter handle requires options"); + return -EINVAL; + } else { + return 0; + } + } - err = nla_parse_nested(tb, TCA_U32_MAX, opt, u32_policy, NULL); + err = nla_parse_nested(tb, TCA_U32_MAX, opt, u32_policy, extack); if (err < 0) return err; if (tb[TCA_U32_FLAGS]) { flags = nla_get_u32(tb[TCA_U32_FLAGS]); - if (!tc_flags_valid(flags)) + if (!tc_flags_valid(flags)) { + NL_SET_ERR_MSG_MOD(extack, "Invalid filter flags"); return -EINVAL; + } } n = *arg; if (n) { struct tc_u_knode *new; - if (TC_U32_KEY(n->handle) == 0) + if (TC_U32_KEY(n->handle) == 0) { + NL_SET_ERR_MSG_MOD(extack, "Key node id cannot be zero"); return -EINVAL; + } - if (n->flags != flags) + if (n->flags != flags) { + NL_SET_ERR_MSG_MOD(extack, "Key node flags do not match passed flags"); return -EINVAL; + } new = u32_init_knode(tp, n); if (!new) @@ -935,14 +963,14 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, err = u32_set_parms(net, tp, base, rtnl_dereference(n->ht_up), new, tb, - tca[TCA_RATE], ovr); + tca[TCA_RATE], ovr, extack); if (err) { u32_destroy_key(tp, new, false); return err; } - err = u32_replace_hw_knode(tp, new, flags); + err = u32_replace_hw_knode(tp, new, flags, extack); if (err) { u32_destroy_key(tp, new, false); return err; @@ -961,10 +989,14 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, if (tb[TCA_U32_DIVISOR]) { unsigned int divisor = nla_get_u32(tb[TCA_U32_DIVISOR]); - if (--divisor > 0x100) + if (--divisor > 0x100) { + NL_SET_ERR_MSG_MOD(extack, "Exceeded maximum 256 hash buckets"); return -EINVAL; - if (TC_U32_KEY(handle)) + } + if (TC_U32_KEY(handle)) { + NL_SET_ERR_MSG_MOD(extack, "Divisor can only be used on a hash table"); return -EINVAL; + } ht = kzalloc(sizeof(*ht) + divisor*sizeof(void *), GFP_KERNEL); if (ht == NULL) return -ENOBUFS; @@ -988,8 +1020,9 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, ht->handle = handle; ht->prio = tp->prio; idr_init(&ht->handle_idr); + ht->flags = flags; - err = u32_replace_hw_hnode(tp, ht, flags); + err = u32_replace_hw_hnode(tp, ht, flags, extack); if (err) { idr_remove_ext(&tp_c->handle_idr, handle); kfree(ht); @@ -1010,20 +1043,26 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, htid = ht->handle; } else { ht = u32_lookup_ht(tp->data, TC_U32_HTID(htid)); - if (ht == NULL) + if (!ht) { + NL_SET_ERR_MSG_MOD(extack, "Specified hash table not found"); return -EINVAL; + } } } else { ht = rtnl_dereference(tp->root); htid = ht->handle; } - if (ht->divisor < TC_U32_HASH(htid)) + if (ht->divisor < TC_U32_HASH(htid)) { + NL_SET_ERR_MSG_MOD(extack, "Specified hash table buckets exceed configured value"); return -EINVAL; + } if (handle) { - if (TC_U32_HTID(handle) && TC_U32_HTID(handle^htid)) + if (TC_U32_HTID(handle) && TC_U32_HTID(handle ^ htid)) { + NL_SET_ERR_MSG_MOD(extack, "Handle specified hash table address mismatch"); return -EINVAL; + } handle = htid | TC_U32_NODE(handle); err = idr_alloc_ext(&ht->handle_idr, NULL, NULL, handle, handle + 1, @@ -1034,6 +1073,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, handle = gen_new_kid(ht, htid); if (tb[TCA_U32_SEL] == NULL) { + NL_SET_ERR_MSG_MOD(extack, "Selector not specified"); err = -EINVAL; goto erridr; } @@ -1082,12 +1122,13 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, } #endif - err = u32_set_parms(net, tp, base, ht, n, tb, tca[TCA_RATE], ovr); + err = u32_set_parms(net, tp, base, ht, n, tb, tca[TCA_RATE], ovr, + extack); if (err == 0) { struct tc_u_knode __rcu **ins; struct tc_u_knode *pins; - err = u32_replace_hw_knode(tp, n, flags); + err = u32_replace_hw_knode(tp, n, flags, extack); if (err) goto errhw; diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 52529b7f8d96..d512f49ee83c 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -393,13 +393,16 @@ static __u8 __detect_linklayer(struct tc_ratespec *r, __u32 *rtab) static struct qdisc_rate_table *qdisc_rtab_list; struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, - struct nlattr *tab) + struct nlattr *tab, + struct netlink_ext_ack *extack) { struct qdisc_rate_table *rtab; if (tab == NULL || r->rate == 0 || r->cell_log == 0 || - nla_len(tab) != TC_RTAB_SIZE) + nla_len(tab) != TC_RTAB_SIZE) { + NL_SET_ERR_MSG(extack, "Invalid rate table parameters for searching"); return NULL; + } for (rtab = qdisc_rtab_list; rtab; rtab = rtab->next) { if (!memcmp(&rtab->rate, r, sizeof(struct tc_ratespec)) && @@ -418,6 +421,8 @@ struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, r->linklayer = __detect_linklayer(r, rtab->data); rtab->next = qdisc_rtab_list; qdisc_rtab_list = rtab; + } else { + NL_SET_ERR_MSG(extack, "Failed to allocate new qdisc rate table"); } return rtab; } @@ -449,7 +454,8 @@ static const struct nla_policy stab_policy[TCA_STAB_MAX + 1] = { [TCA_STAB_DATA] = { .type = NLA_BINARY }, }; -static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt) +static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt, + struct netlink_ext_ack *extack) { struct nlattr *tb[TCA_STAB_MAX + 1]; struct qdisc_size_table *stab; @@ -458,23 +464,29 @@ static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt) u16 *tab = NULL; int err; - err = nla_parse_nested(tb, TCA_STAB_MAX, opt, stab_policy, NULL); + err = nla_parse_nested(tb, TCA_STAB_MAX, opt, stab_policy, extack); if (err < 0) return ERR_PTR(err); - if (!tb[TCA_STAB_BASE]) + if (!tb[TCA_STAB_BASE]) { + NL_SET_ERR_MSG(extack, "Size table base attribute is missing"); return ERR_PTR(-EINVAL); + } s = nla_data(tb[TCA_STAB_BASE]); if (s->tsize > 0) { - if (!tb[TCA_STAB_DATA]) + if (!tb[TCA_STAB_DATA]) { + NL_SET_ERR_MSG(extack, "Size table data attribute is missing"); return ERR_PTR(-EINVAL); + } tab = nla_data(tb[TCA_STAB_DATA]); tsize = nla_len(tb[TCA_STAB_DATA]) / sizeof(u16); } - if (tsize != s->tsize || (!tab && tsize > 0)) + if (tsize != s->tsize || (!tab && tsize > 0)) { + NL_SET_ERR_MSG(extack, "Invalid size of size table"); return ERR_PTR(-EINVAL); + } list_for_each_entry(stab, &qdisc_stab_list, list) { if (memcmp(&stab->szopts, s, sizeof(*s))) @@ -669,7 +681,7 @@ int qdisc_class_hash_init(struct Qdisc_class_hash *clhash) unsigned int size = 4; clhash->hash = qdisc_class_hash_alloc(size); - if (clhash->hash == NULL) + if (!clhash->hash) return -ENOMEM; clhash->hashsize = size; clhash->hashmask = size - 1; @@ -779,6 +791,7 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid, unsigned char *b = skb_tail_pointer(skb); struct gnet_dump d; struct qdisc_size_table *stab; + u32 block_index; __u32 qlen; cond_resched(); @@ -795,11 +808,23 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid, tcm->tcm_info = refcount_read(&q->refcnt); if (nla_put_string(skb, TCA_KIND, q->ops->id)) goto nla_put_failure; - if (nla_put_u8(skb, TCA_HW_OFFLOAD, !!(q->flags & TCQ_F_OFFLOADED))) - goto nla_put_failure; + if (q->ops->ingress_block_get) { + block_index = q->ops->ingress_block_get(q); + if (block_index && + nla_put_u32(skb, TCA_INGRESS_BLOCK, block_index)) + goto nla_put_failure; + } + if (q->ops->egress_block_get) { + block_index = q->ops->egress_block_get(q); + if (block_index && + nla_put_u32(skb, TCA_EGRESS_BLOCK, block_index)) + goto nla_put_failure; + } if (q->ops->dump && q->ops->dump(q, skb) < 0) goto nla_put_failure; - qlen = q->q.qlen; + if (nla_put_u8(skb, TCA_HW_OFFLOAD, !!(q->flags & TCQ_F_OFFLOADED))) + goto nla_put_failure; + qlen = qdisc_qlen_sum(q); stab = rtnl_dereference(q->stab); if (stab && qdisc_dump_stab(skb, stab) < 0) @@ -898,7 +923,8 @@ static void notify_and_destroy(struct net *net, struct sk_buff *skb, static int qdisc_graft(struct net_device *dev, struct Qdisc *parent, struct sk_buff *skb, struct nlmsghdr *n, u32 classid, - struct Qdisc *new, struct Qdisc *old) + struct Qdisc *new, struct Qdisc *old, + struct netlink_ext_ack *extack) { struct Qdisc *q = old; struct net *net = dev_net(dev); @@ -913,8 +939,10 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent, (new && new->flags & TCQ_F_INGRESS)) { num_q = 1; ingress = 1; - if (!dev_ingress_queue(dev)) + if (!dev_ingress_queue(dev)) { + NL_SET_ERR_MSG(extack, "Device does not have an ingress queue"); return -ENOENT; + } } if (dev->flags & IFF_UP) @@ -956,14 +984,22 @@ skip: } else { const struct Qdisc_class_ops *cops = parent->ops->cl_ops; + /* Only support running class lockless if parent is lockless */ + if (new && (new->flags & TCQ_F_NOLOCK) && + parent && !(parent->flags & TCQ_F_NOLOCK)) + new->flags &= ~TCQ_F_NOLOCK; + err = -EOPNOTSUPP; if (cops && cops->graft) { unsigned long cl = cops->find(parent, classid); - if (cl) - err = cops->graft(parent, cl, new, &old); - else + if (cl) { + err = cops->graft(parent, cl, new, &old, + extack); + } else { + NL_SET_ERR_MSG(extack, "Specified class not found"); err = -ENOENT; + } } if (!err) notify_and_destroy(net, skb, n, classid, old, new); @@ -971,6 +1007,40 @@ skip: return err; } +static int qdisc_block_indexes_set(struct Qdisc *sch, struct nlattr **tca, + struct netlink_ext_ack *extack) +{ + u32 block_index; + + if (tca[TCA_INGRESS_BLOCK]) { + block_index = nla_get_u32(tca[TCA_INGRESS_BLOCK]); + + if (!block_index) { + NL_SET_ERR_MSG(extack, "Ingress block index cannot be 0"); + return -EINVAL; + } + if (!sch->ops->ingress_block_set) { + NL_SET_ERR_MSG(extack, "Ingress block sharing is not supported"); + return -EOPNOTSUPP; + } + sch->ops->ingress_block_set(sch, block_index); + } + if (tca[TCA_EGRESS_BLOCK]) { + block_index = nla_get_u32(tca[TCA_EGRESS_BLOCK]); + + if (!block_index) { + NL_SET_ERR_MSG(extack, "Egress block index cannot be 0"); + return -EINVAL; + } + if (!sch->ops->egress_block_set) { + NL_SET_ERR_MSG(extack, "Egress block sharing is not supported"); + return -EOPNOTSUPP; + } + sch->ops->egress_block_set(sch, block_index); + } + return 0; +} + /* lockdep annotation is needed for ingress; egress gets it only for name */ static struct lock_class_key qdisc_tx_lock; static struct lock_class_key qdisc_rx_lock; @@ -984,7 +1054,8 @@ static struct lock_class_key qdisc_rx_lock; static struct Qdisc *qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue, struct Qdisc *p, u32 parent, u32 handle, - struct nlattr **tca, int *errp) + struct nlattr **tca, int *errp, + struct netlink_ext_ack *extack) { int err; struct nlattr *kind = tca[TCA_KIND]; @@ -1022,10 +1093,12 @@ static struct Qdisc *qdisc_create(struct net_device *dev, #endif err = -ENOENT; - if (ops == NULL) + if (!ops) { + NL_SET_ERR_MSG(extack, "Specified qdisc not found"); goto err_out; + } - sch = qdisc_alloc(dev_queue, ops); + sch = qdisc_alloc(dev_queue, ops, extack); if (IS_ERR(sch)) { err = PTR_ERR(sch); goto err_out2; @@ -1062,43 +1135,57 @@ static struct Qdisc *qdisc_create(struct net_device *dev, netdev_info(dev, "Caught tx_queue_len zero misconfig\n"); } - if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS])) == 0) { - if (tca[TCA_STAB]) { - stab = qdisc_get_stab(tca[TCA_STAB]); - if (IS_ERR(stab)) { - err = PTR_ERR(stab); - goto err_out4; - } - rcu_assign_pointer(sch->stab, stab); - } - if (tca[TCA_RATE]) { - seqcount_t *running; - - err = -EOPNOTSUPP; - if (sch->flags & TCQ_F_MQROOT) - goto err_out4; - - if ((sch->parent != TC_H_ROOT) && - !(sch->flags & TCQ_F_INGRESS) && - (!p || !(p->flags & TCQ_F_MQROOT))) - running = qdisc_root_sleeping_running(sch); - else - running = &sch->running; - - err = gen_new_estimator(&sch->bstats, - sch->cpu_bstats, - &sch->rate_est, - NULL, - running, - tca[TCA_RATE]); - if (err) - goto err_out4; + err = qdisc_block_indexes_set(sch, tca, extack); + if (err) + goto err_out3; + + if (ops->init) { + err = ops->init(sch, tca[TCA_OPTIONS], extack); + if (err != 0) + goto err_out5; + } + + if (tca[TCA_STAB]) { + stab = qdisc_get_stab(tca[TCA_STAB], extack); + if (IS_ERR(stab)) { + err = PTR_ERR(stab); + goto err_out4; } + rcu_assign_pointer(sch->stab, stab); + } + if (tca[TCA_RATE]) { + seqcount_t *running; - qdisc_hash_add(sch, false); + err = -EOPNOTSUPP; + if (sch->flags & TCQ_F_MQROOT) { + NL_SET_ERR_MSG(extack, "Cannot attach rate estimator to a multi-queue root qdisc"); + goto err_out4; + } - return sch; + if (sch->parent != TC_H_ROOT && + !(sch->flags & TCQ_F_INGRESS) && + (!p || !(p->flags & TCQ_F_MQROOT))) + running = qdisc_root_sleeping_running(sch); + else + running = &sch->running; + + err = gen_new_estimator(&sch->bstats, + sch->cpu_bstats, + &sch->rate_est, + NULL, + running, + tca[TCA_RATE]); + if (err) { + NL_SET_ERR_MSG(extack, "Failed to generate new estimator"); + goto err_out4; + } } + + qdisc_hash_add(sch, false); + + return sch; + +err_out5: /* ops->init() failed, we call ->destroy() like qdisc_create_dflt() */ if (ops->destroy) ops->destroy(sch); @@ -1122,21 +1209,28 @@ err_out4: goto err_out3; } -static int qdisc_change(struct Qdisc *sch, struct nlattr **tca) +static int qdisc_change(struct Qdisc *sch, struct nlattr **tca, + struct netlink_ext_ack *extack) { struct qdisc_size_table *ostab, *stab = NULL; int err = 0; if (tca[TCA_OPTIONS]) { - if (sch->ops->change == NULL) + if (!sch->ops->change) { + NL_SET_ERR_MSG(extack, "Change operation not supported by specified qdisc"); return -EINVAL; - err = sch->ops->change(sch, tca[TCA_OPTIONS]); + } + if (tca[TCA_INGRESS_BLOCK] || tca[TCA_EGRESS_BLOCK]) { + NL_SET_ERR_MSG(extack, "Change of blocks is not supported"); + return -EOPNOTSUPP; + } + err = sch->ops->change(sch, tca[TCA_OPTIONS], extack); if (err) return err; } if (tca[TCA_STAB]) { - stab = qdisc_get_stab(tca[TCA_STAB]); + stab = qdisc_get_stab(tca[TCA_STAB], extack); if (IS_ERR(stab)) return PTR_ERR(stab); } @@ -1234,8 +1328,10 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, if (clid != TC_H_ROOT) { if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) { p = qdisc_lookup(dev, TC_H_MAJ(clid)); - if (!p) + if (!p) { + NL_SET_ERR_MSG(extack, "Failed to find qdisc with specified classid"); return -ENOENT; + } q = qdisc_leaf(p, clid); } else if (dev_ingress_queue(dev)) { q = dev_ingress_queue(dev)->qdisc_sleeping; @@ -1243,26 +1339,38 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, } else { q = dev->qdisc; } - if (!q) + if (!q) { + NL_SET_ERR_MSG(extack, "Cannot find specified qdisc on specified device"); return -ENOENT; + } - if (tcm->tcm_handle && q->handle != tcm->tcm_handle) + if (tcm->tcm_handle && q->handle != tcm->tcm_handle) { + NL_SET_ERR_MSG(extack, "Invalid handle"); return -EINVAL; + } } else { q = qdisc_lookup(dev, tcm->tcm_handle); - if (!q) + if (!q) { + NL_SET_ERR_MSG(extack, "Failed to find qdisc with specified handle"); return -ENOENT; + } } - if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id)) + if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id)) { + NL_SET_ERR_MSG(extack, "Invalid qdisc name"); return -EINVAL; + } if (n->nlmsg_type == RTM_DELQDISC) { - if (!clid) + if (!clid) { + NL_SET_ERR_MSG(extack, "Classid cannot be zero"); return -EINVAL; - if (q->handle == 0) + } + if (q->handle == 0) { + NL_SET_ERR_MSG(extack, "Cannot delete qdisc with handle of zero"); return -ENOENT; - err = qdisc_graft(dev, p, skb, n, clid, NULL, q); + } + err = qdisc_graft(dev, p, skb, n, clid, NULL, q, extack); if (err != 0) return err; } else { @@ -1308,8 +1416,10 @@ replay: if (clid != TC_H_ROOT) { if (clid != TC_H_INGRESS) { p = qdisc_lookup(dev, TC_H_MAJ(clid)); - if (!p) + if (!p) { + NL_SET_ERR_MSG(extack, "Failed to find specified qdisc"); return -ENOENT; + } q = qdisc_leaf(p, clid); } else if (dev_ingress_queue_create(dev)) { q = dev_ingress_queue(dev)->qdisc_sleeping; @@ -1324,20 +1434,31 @@ replay: if (!q || !tcm->tcm_handle || q->handle != tcm->tcm_handle) { if (tcm->tcm_handle) { - if (q && !(n->nlmsg_flags & NLM_F_REPLACE)) + if (q && !(n->nlmsg_flags & NLM_F_REPLACE)) { + NL_SET_ERR_MSG(extack, "NLM_F_REPLACE needed to override"); return -EEXIST; - if (TC_H_MIN(tcm->tcm_handle)) + } + if (TC_H_MIN(tcm->tcm_handle)) { + NL_SET_ERR_MSG(extack, "Invalid minor handle"); return -EINVAL; + } q = qdisc_lookup(dev, tcm->tcm_handle); if (!q) goto create_n_graft; - if (n->nlmsg_flags & NLM_F_EXCL) + if (n->nlmsg_flags & NLM_F_EXCL) { + NL_SET_ERR_MSG(extack, "Exclusivity flag on, cannot override"); return -EEXIST; - if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id)) + } + if (tca[TCA_KIND] && + nla_strcmp(tca[TCA_KIND], q->ops->id)) { + NL_SET_ERR_MSG(extack, "Invalid qdisc name"); return -EINVAL; + } if (q == p || - (p && check_loop(q, p, 0))) + (p && check_loop(q, p, 0))) { + NL_SET_ERR_MSG(extack, "Qdisc parent/child loop detected"); return -ELOOP; + } qdisc_refcount_inc(q); goto graft; } else { @@ -1372,33 +1493,45 @@ replay: } } } else { - if (!tcm->tcm_handle) + if (!tcm->tcm_handle) { + NL_SET_ERR_MSG(extack, "Handle cannot be zero"); return -EINVAL; + } q = qdisc_lookup(dev, tcm->tcm_handle); } /* Change qdisc parameters */ - if (q == NULL) + if (!q) { + NL_SET_ERR_MSG(extack, "Specified qdisc not found"); return -ENOENT; - if (n->nlmsg_flags & NLM_F_EXCL) + } + if (n->nlmsg_flags & NLM_F_EXCL) { + NL_SET_ERR_MSG(extack, "Exclusivity flag on, cannot modify"); return -EEXIST; - if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id)) + } + if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id)) { + NL_SET_ERR_MSG(extack, "Invalid qdisc name"); return -EINVAL; - err = qdisc_change(q, tca); + } + err = qdisc_change(q, tca, extack); if (err == 0) qdisc_notify(net, skb, n, clid, NULL, q); return err; create_n_graft: - if (!(n->nlmsg_flags & NLM_F_CREATE)) + if (!(n->nlmsg_flags & NLM_F_CREATE)) { + NL_SET_ERR_MSG(extack, "Qdisc not found. To create specify NLM_F_CREATE flag"); return -ENOENT; + } if (clid == TC_H_INGRESS) { - if (dev_ingress_queue(dev)) + if (dev_ingress_queue(dev)) { q = qdisc_create(dev, dev_ingress_queue(dev), p, tcm->tcm_parent, tcm->tcm_parent, - tca, &err); - else + tca, &err, extack); + } else { + NL_SET_ERR_MSG(extack, "Cannot find ingress queue for specified device"); err = -ENOENT; + } } else { struct netdev_queue *dev_queue; @@ -1411,7 +1544,7 @@ create_n_graft: q = qdisc_create(dev, dev_queue, p, tcm->tcm_parent, tcm->tcm_handle, - tca, &err); + tca, &err, extack); } if (q == NULL) { if (err == -EAGAIN) @@ -1420,7 +1553,7 @@ create_n_graft: } graft: - err = qdisc_graft(dev, p, skb, n, clid, q, NULL); + err = qdisc_graft(dev, p, skb, n, clid, q, NULL, extack); if (err) { if (q) qdisc_destroy(q); @@ -1672,7 +1805,7 @@ static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid, cl = cops->find(q, portid); if (!cl) return; - block = cops->tcf_block(q, cl); + block = cops->tcf_block(q, cl, NULL); if (!block) return; list_for_each_entry(chain, &block->chain_list, list) { @@ -1816,10 +1949,15 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, } } + if (tca[TCA_INGRESS_BLOCK] || tca[TCA_EGRESS_BLOCK]) { + NL_SET_ERR_MSG(extack, "Shared blocks are not supported for classes"); + return -EOPNOTSUPP; + } + new_cl = cl; err = -EOPNOTSUPP; if (cops->change) - err = cops->change(q, clid, portid, tca, &new_cl); + err = cops->change(q, clid, portid, tca, &new_cl, extack); if (err == 0) { tclass_notify(net, skb, n, q, new_cl, RTM_NEWTCLASS); /* We just create a new class, need to do reverse binding. */ @@ -1955,7 +2093,6 @@ static int psched_open(struct inode *inode, struct file *file) } static const struct file_operations psched_fops = { - .owner = THIS_MODULE, .open = psched_open, .read = seq_read, .llseek = seq_lseek, diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index 2dbd249c0b2f..cd49afca9617 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -82,7 +82,8 @@ static inline struct atm_flow_data *lookup_flow(struct Qdisc *sch, u32 classid) } static int atm_tc_graft(struct Qdisc *sch, unsigned long arg, - struct Qdisc *new, struct Qdisc **old) + struct Qdisc *new, struct Qdisc **old, + struct netlink_ext_ack *extack) { struct atm_qdisc_data *p = qdisc_priv(sch); struct atm_flow_data *flow = (struct atm_flow_data *)arg; @@ -191,7 +192,8 @@ static const struct nla_policy atm_policy[TCA_ATM_MAX + 1] = { }; static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent, - struct nlattr **tca, unsigned long *arg) + struct nlattr **tca, unsigned long *arg, + struct netlink_ext_ack *extack) { struct atm_qdisc_data *p = qdisc_priv(sch); struct atm_flow_data *flow = (struct atm_flow_data *)*arg; @@ -281,13 +283,15 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent, goto err_out; } - error = tcf_block_get(&flow->block, &flow->filter_list, sch); + error = tcf_block_get(&flow->block, &flow->filter_list, sch, + extack); if (error) { kfree(flow); goto err_out; } - flow->q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, classid); + flow->q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, classid, + extack); if (!flow->q) flow->q = &noop_qdisc; pr_debug("atm_tc_change: qdisc %p\n", flow->q); @@ -356,7 +360,8 @@ static void atm_tc_walk(struct Qdisc *sch, struct qdisc_walker *walker) } } -static struct tcf_block *atm_tc_tcf_block(struct Qdisc *sch, unsigned long cl) +static struct tcf_block *atm_tc_tcf_block(struct Qdisc *sch, unsigned long cl, + struct netlink_ext_ack *extack) { struct atm_qdisc_data *p = qdisc_priv(sch); struct atm_flow_data *flow = (struct atm_flow_data *)cl; @@ -531,7 +536,8 @@ static struct sk_buff *atm_tc_peek(struct Qdisc *sch) return p->link.q->ops->peek(p->link.q); } -static int atm_tc_init(struct Qdisc *sch, struct nlattr *opt) +static int atm_tc_init(struct Qdisc *sch, struct nlattr *opt, + struct netlink_ext_ack *extack) { struct atm_qdisc_data *p = qdisc_priv(sch); int err; @@ -541,12 +547,13 @@ static int atm_tc_init(struct Qdisc *sch, struct nlattr *opt) INIT_LIST_HEAD(&p->link.list); list_add(&p->link.list, &p->flows); p->link.q = qdisc_create_dflt(sch->dev_queue, - &pfifo_qdisc_ops, sch->handle); + &pfifo_qdisc_ops, sch->handle, extack); if (!p->link.q) p->link.q = &noop_qdisc; pr_debug("atm_tc_init: link (%p) qdisc %p\n", &p->link, p->link.q); - err = tcf_block_get(&p->link.block, &p->link.filter_list, sch); + err = tcf_block_get(&p->link.block, &p->link.filter_list, sch, + extack); if (err) return err; diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 525eb3a6d625..f42025d53cfe 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -1132,7 +1132,8 @@ static const struct nla_policy cbq_policy[TCA_CBQ_MAX + 1] = { [TCA_CBQ_POLICE] = { .len = sizeof(struct tc_cbq_police) }, }; -static int cbq_init(struct Qdisc *sch, struct nlattr *opt) +static int cbq_init(struct Qdisc *sch, struct nlattr *opt, + struct netlink_ext_ack *extack) { struct cbq_sched_data *q = qdisc_priv(sch); struct nlattr *tb[TCA_CBQ_MAX + 1]; @@ -1143,22 +1144,27 @@ static int cbq_init(struct Qdisc *sch, struct nlattr *opt) hrtimer_init(&q->delay_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED); q->delay_timer.function = cbq_undelay; - if (!opt) + if (!opt) { + NL_SET_ERR_MSG(extack, "CBQ options are required for this operation"); return -EINVAL; + } - err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy, NULL); + err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy, extack); if (err < 0) return err; - if (tb[TCA_CBQ_RTAB] == NULL || tb[TCA_CBQ_RATE] == NULL) + if (!tb[TCA_CBQ_RTAB] || !tb[TCA_CBQ_RATE]) { + NL_SET_ERR_MSG(extack, "Rate specification missing or incomplete"); return -EINVAL; + } r = nla_data(tb[TCA_CBQ_RATE]); - if ((q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB])) == NULL) + q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB], extack); + if (!q->link.R_tab) return -EINVAL; - err = tcf_block_get(&q->link.block, &q->link.filter_list, sch); + err = tcf_block_get(&q->link.block, &q->link.filter_list, sch, extack); if (err) goto put_rtab; @@ -1170,7 +1176,7 @@ static int cbq_init(struct Qdisc *sch, struct nlattr *opt) q->link.common.classid = sch->handle; q->link.qdisc = sch; q->link.q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, - sch->handle); + sch->handle, NULL); if (!q->link.q) q->link.q = &noop_qdisc; else @@ -1369,13 +1375,13 @@ cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg, } static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, - struct Qdisc **old) + struct Qdisc **old, struct netlink_ext_ack *extack) { struct cbq_class *cl = (struct cbq_class *)arg; if (new == NULL) { - new = qdisc_create_dflt(sch->dev_queue, - &pfifo_qdisc_ops, cl->common.classid); + new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, + cl->common.classid, extack); if (new == NULL) return -ENOBUFS; } @@ -1450,7 +1456,7 @@ static void cbq_destroy(struct Qdisc *sch) static int cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **tca, - unsigned long *arg) + unsigned long *arg, struct netlink_ext_ack *extack) { int err; struct cbq_sched_data *q = qdisc_priv(sch); @@ -1460,29 +1466,37 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t struct cbq_class *parent; struct qdisc_rate_table *rtab = NULL; - if (opt == NULL) + if (!opt) { + NL_SET_ERR_MSG(extack, "Mandatory qdisc options missing"); return -EINVAL; + } - err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy, NULL); + err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy, extack); if (err < 0) return err; - if (tb[TCA_CBQ_OVL_STRATEGY] || tb[TCA_CBQ_POLICE]) + if (tb[TCA_CBQ_OVL_STRATEGY] || tb[TCA_CBQ_POLICE]) { + NL_SET_ERR_MSG(extack, "Neither overlimit strategy nor policing attributes can be used for changing class params"); return -EOPNOTSUPP; + } if (cl) { /* Check parent */ if (parentid) { if (cl->tparent && - cl->tparent->common.classid != parentid) + cl->tparent->common.classid != parentid) { + NL_SET_ERR_MSG(extack, "Invalid parent id"); return -EINVAL; - if (!cl->tparent && parentid != TC_H_ROOT) + } + if (!cl->tparent && parentid != TC_H_ROOT) { + NL_SET_ERR_MSG(extack, "Parent must be root"); return -EINVAL; + } } if (tb[TCA_CBQ_RATE]) { rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]), - tb[TCA_CBQ_RTAB]); + tb[TCA_CBQ_RTAB], extack); if (rtab == NULL) return -EINVAL; } @@ -1494,6 +1508,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t qdisc_root_sleeping_running(sch), tca[TCA_RATE]); if (err) { + NL_SET_ERR_MSG(extack, "Failed to replace specified rate estimator"); qdisc_put_rtab(rtab); return err; } @@ -1532,19 +1547,23 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t if (parentid == TC_H_ROOT) return -EINVAL; - if (tb[TCA_CBQ_WRROPT] == NULL || tb[TCA_CBQ_RATE] == NULL || - tb[TCA_CBQ_LSSOPT] == NULL) + if (!tb[TCA_CBQ_WRROPT] || !tb[TCA_CBQ_RATE] || !tb[TCA_CBQ_LSSOPT]) { + NL_SET_ERR_MSG(extack, "One of the following attributes MUST be specified: WRR, rate or link sharing"); return -EINVAL; + } - rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]), tb[TCA_CBQ_RTAB]); + rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]), tb[TCA_CBQ_RTAB], + extack); if (rtab == NULL) return -EINVAL; if (classid) { err = -EINVAL; if (TC_H_MAJ(classid ^ sch->handle) || - cbq_class_lookup(q, classid)) + cbq_class_lookup(q, classid)) { + NL_SET_ERR_MSG(extack, "Specified class not found"); goto failure; + } } else { int i; classid = TC_H_MAKE(sch->handle, 0x8000); @@ -1556,8 +1575,10 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t break; } err = -ENOSR; - if (i >= 0x8000) + if (i >= 0x8000) { + NL_SET_ERR_MSG(extack, "Unable to generate classid"); goto failure; + } classid = classid|q->hgenerator; } @@ -1565,8 +1586,10 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t if (parentid) { parent = cbq_class_lookup(q, parentid); err = -EINVAL; - if (parent == NULL) + if (!parent) { + NL_SET_ERR_MSG(extack, "Failed to find parentid"); goto failure; + } } err = -ENOBUFS; @@ -1574,7 +1597,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t if (cl == NULL) goto failure; - err = tcf_block_get(&cl->block, &cl->filter_list, sch); + err = tcf_block_get(&cl->block, &cl->filter_list, sch, extack); if (err) { kfree(cl); return err; @@ -1586,6 +1609,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t qdisc_root_sleeping_running(sch), tca[TCA_RATE]); if (err) { + NL_SET_ERR_MSG(extack, "Couldn't create new estimator"); tcf_block_put(cl->block); kfree(cl); goto failure; @@ -1594,7 +1618,8 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t cl->R_tab = rtab; rtab = NULL; - cl->q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, classid); + cl->q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, classid, + NULL); if (!cl->q) cl->q = &noop_qdisc; else @@ -1678,7 +1703,8 @@ static int cbq_delete(struct Qdisc *sch, unsigned long arg) return 0; } -static struct tcf_block *cbq_tcf_block(struct Qdisc *sch, unsigned long arg) +static struct tcf_block *cbq_tcf_block(struct Qdisc *sch, unsigned long arg, + struct netlink_ext_ack *extack) { struct cbq_sched_data *q = qdisc_priv(sch); struct cbq_class *cl = (struct cbq_class *)arg; diff --git a/net/sched/sch_cbs.c b/net/sched/sch_cbs.c index 7a72980c1509..cdd96b9a27bc 100644 --- a/net/sched/sch_cbs.c +++ b/net/sched/sch_cbs.c @@ -219,14 +219,17 @@ static void cbs_disable_offload(struct net_device *dev, } static int cbs_enable_offload(struct net_device *dev, struct cbs_sched_data *q, - const struct tc_cbs_qopt *opt) + const struct tc_cbs_qopt *opt, + struct netlink_ext_ack *extack) { const struct net_device_ops *ops = dev->netdev_ops; struct tc_cbs_qopt_offload cbs = { }; int err; - if (!ops->ndo_setup_tc) + if (!ops->ndo_setup_tc) { + NL_SET_ERR_MSG(extack, "Specified device does not support cbs offload"); return -EOPNOTSUPP; + } cbs.queue = q->queue; @@ -237,8 +240,10 @@ static int cbs_enable_offload(struct net_device *dev, struct cbs_sched_data *q, cbs.sendslope = opt->sendslope; err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_CBS, &cbs); - if (err < 0) + if (err < 0) { + NL_SET_ERR_MSG(extack, "Specified device failed to setup cbs hardware offload"); return err; + } q->enqueue = cbs_enqueue_offload; q->dequeue = cbs_dequeue_offload; @@ -246,7 +251,8 @@ static int cbs_enable_offload(struct net_device *dev, struct cbs_sched_data *q, return 0; } -static int cbs_change(struct Qdisc *sch, struct nlattr *opt) +static int cbs_change(struct Qdisc *sch, struct nlattr *opt, + struct netlink_ext_ack *extack) { struct cbs_sched_data *q = qdisc_priv(sch); struct net_device *dev = qdisc_dev(sch); @@ -254,12 +260,14 @@ static int cbs_change(struct Qdisc *sch, struct nlattr *opt) struct tc_cbs_qopt *qopt; int err; - err = nla_parse_nested(tb, TCA_CBS_MAX, opt, cbs_policy, NULL); + err = nla_parse_nested(tb, TCA_CBS_MAX, opt, cbs_policy, extack); if (err < 0) return err; - if (!tb[TCA_CBS_PARMS]) + if (!tb[TCA_CBS_PARMS]) { + NL_SET_ERR_MSG(extack, "Missing CBS parameter which are mandatory"); return -EINVAL; + } qopt = nla_data(tb[TCA_CBS_PARMS]); @@ -276,7 +284,7 @@ static int cbs_change(struct Qdisc *sch, struct nlattr *opt) cbs_disable_offload(dev, q); } else { - err = cbs_enable_offload(dev, q, qopt); + err = cbs_enable_offload(dev, q, qopt, extack); if (err < 0) return err; } @@ -291,13 +299,16 @@ static int cbs_change(struct Qdisc *sch, struct nlattr *opt) return 0; } -static int cbs_init(struct Qdisc *sch, struct nlattr *opt) +static int cbs_init(struct Qdisc *sch, struct nlattr *opt, + struct netlink_ext_ack *extack) { struct cbs_sched_data *q = qdisc_priv(sch); struct net_device *dev = qdisc_dev(sch); - if (!opt) + if (!opt) { + NL_SET_ERR_MSG(extack, "Missing CBS qdisc options which are mandatory"); return -EINVAL; + } q->queue = sch->dev_queue - netdev_get_tx_queue(dev, 0); @@ -306,7 +317,7 @@ static int cbs_init(struct Qdisc *sch, struct nlattr *opt) qdisc_watchdog_init(&q->watchdog, sch); - return cbs_change(sch, opt); + return cbs_change(sch, opt, extack); } static void cbs_destroy(struct Qdisc *sch) diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c index 531250fceb9e..eafc0d17d174 100644 --- a/net/sched/sch_choke.c +++ b/net/sched/sch_choke.c @@ -344,7 +344,8 @@ static void choke_free(void *addr) kvfree(addr); } -static int choke_change(struct Qdisc *sch, struct nlattr *opt) +static int choke_change(struct Qdisc *sch, struct nlattr *opt, + struct netlink_ext_ack *extack) { struct choke_sched_data *q = qdisc_priv(sch); struct nlattr *tb[TCA_CHOKE_MAX + 1]; @@ -431,9 +432,10 @@ static int choke_change(struct Qdisc *sch, struct nlattr *opt) return 0; } -static int choke_init(struct Qdisc *sch, struct nlattr *opt) +static int choke_init(struct Qdisc *sch, struct nlattr *opt, + struct netlink_ext_ack *extack) { - return choke_change(sch, opt); + return choke_change(sch, opt, extack); } static int choke_dump(struct Qdisc *sch, struct sk_buff *skb) diff --git a/net/sched/sch_codel.c b/net/sched/sch_codel.c index c518a1efcb9d..17cd81f84b5d 100644 --- a/net/sched/sch_codel.c +++ b/net/sched/sch_codel.c @@ -130,7 +130,8 @@ static const struct nla_policy codel_policy[TCA_CODEL_MAX + 1] = { [TCA_CODEL_CE_THRESHOLD]= { .type = NLA_U32 }, }; -static int codel_change(struct Qdisc *sch, struct nlattr *opt) +static int codel_change(struct Qdisc *sch, struct nlattr *opt, + struct netlink_ext_ack *extack) { struct codel_sched_data *q = qdisc_priv(sch); struct nlattr *tb[TCA_CODEL_MAX + 1]; @@ -184,7 +185,8 @@ static int codel_change(struct Qdisc *sch, struct nlattr *opt) return 0; } -static int codel_init(struct Qdisc *sch, struct nlattr *opt) +static int codel_init(struct Qdisc *sch, struct nlattr *opt, + struct netlink_ext_ack *extack) { struct codel_sched_data *q = qdisc_priv(sch); @@ -196,7 +198,7 @@ static int codel_init(struct Qdisc *sch, struct nlattr *opt) q->params.mtu = psched_mtu(qdisc_dev(sch)); if (opt) { - int err = codel_change(sch, opt); + int err = codel_change(sch, opt, extack); if (err) return err; diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c index 5bbcef3dcd8c..e0b0cf8a9939 100644 --- a/net/sched/sch_drr.c +++ b/net/sched/sch_drr.c @@ -64,7 +64,8 @@ static const struct nla_policy drr_policy[TCA_DRR_MAX + 1] = { }; static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid, - struct nlattr **tca, unsigned long *arg) + struct nlattr **tca, unsigned long *arg, + struct netlink_ext_ack *extack) { struct drr_sched *q = qdisc_priv(sch); struct drr_class *cl = (struct drr_class *)*arg; @@ -73,17 +74,21 @@ static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid, u32 quantum; int err; - if (!opt) + if (!opt) { + NL_SET_ERR_MSG(extack, "DRR options are required for this operation"); return -EINVAL; + } - err = nla_parse_nested(tb, TCA_DRR_MAX, opt, drr_policy, NULL); + err = nla_parse_nested(tb, TCA_DRR_MAX, opt, drr_policy, extack); if (err < 0) return err; if (tb[TCA_DRR_QUANTUM]) { quantum = nla_get_u32(tb[TCA_DRR_QUANTUM]); - if (quantum == 0) + if (quantum == 0) { + NL_SET_ERR_MSG(extack, "Specified DRR quantum cannot be zero"); return -EINVAL; + } } else quantum = psched_mtu(qdisc_dev(sch)); @@ -94,8 +99,10 @@ static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid, NULL, qdisc_root_sleeping_running(sch), tca[TCA_RATE]); - if (err) + if (err) { + NL_SET_ERR_MSG(extack, "Failed to replace estimator"); return err; + } } sch_tree_lock(sch); @@ -113,7 +120,8 @@ static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid, cl->common.classid = classid; cl->quantum = quantum; cl->qdisc = qdisc_create_dflt(sch->dev_queue, - &pfifo_qdisc_ops, classid); + &pfifo_qdisc_ops, classid, + NULL); if (cl->qdisc == NULL) cl->qdisc = &noop_qdisc; else @@ -125,6 +133,7 @@ static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid, qdisc_root_sleeping_running(sch), tca[TCA_RATE]); if (err) { + NL_SET_ERR_MSG(extack, "Failed to replace estimator"); qdisc_destroy(cl->qdisc); kfree(cl); return err; @@ -172,12 +181,15 @@ static unsigned long drr_search_class(struct Qdisc *sch, u32 classid) return (unsigned long)drr_find_class(sch, classid); } -static struct tcf_block *drr_tcf_block(struct Qdisc *sch, unsigned long cl) +static struct tcf_block *drr_tcf_block(struct Qdisc *sch, unsigned long cl, + struct netlink_ext_ack *extack) { struct drr_sched *q = qdisc_priv(sch); - if (cl) + if (cl) { + NL_SET_ERR_MSG(extack, "DRR classid must be zero"); return NULL; + } return q->block; } @@ -201,13 +213,14 @@ static void drr_unbind_tcf(struct Qdisc *sch, unsigned long arg) } static int drr_graft_class(struct Qdisc *sch, unsigned long arg, - struct Qdisc *new, struct Qdisc **old) + struct Qdisc *new, struct Qdisc **old, + struct netlink_ext_ack *extack) { struct drr_class *cl = (struct drr_class *)arg; if (new == NULL) { - new = qdisc_create_dflt(sch->dev_queue, - &pfifo_qdisc_ops, cl->common.classid); + new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, + cl->common.classid, NULL); if (new == NULL) new = &noop_qdisc; } @@ -408,12 +421,13 @@ out: return NULL; } -static int drr_init_qdisc(struct Qdisc *sch, struct nlattr *opt) +static int drr_init_qdisc(struct Qdisc *sch, struct nlattr *opt, + struct netlink_ext_ack *extack) { struct drr_sched *q = qdisc_priv(sch); int err; - err = tcf_block_get(&q->block, &q->filter_list, sch); + err = tcf_block_get(&q->block, &q->filter_list, sch, extack); if (err) return err; err = qdisc_class_hash_init(&q->clhash); diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c index fb4fb71c68cf..049714c57075 100644 --- a/net/sched/sch_dsmark.c +++ b/net/sched/sch_dsmark.c @@ -61,7 +61,8 @@ static inline int dsmark_valid_index(struct dsmark_qdisc_data *p, u16 index) /* ------------------------- Class/flow operations ------------------------- */ static int dsmark_graft(struct Qdisc *sch, unsigned long arg, - struct Qdisc *new, struct Qdisc **old) + struct Qdisc *new, struct Qdisc **old, + struct netlink_ext_ack *extack) { struct dsmark_qdisc_data *p = qdisc_priv(sch); @@ -70,7 +71,7 @@ static int dsmark_graft(struct Qdisc *sch, unsigned long arg, if (new == NULL) { new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, - sch->handle); + sch->handle, NULL); if (new == NULL) new = &noop_qdisc; } @@ -112,7 +113,8 @@ static const struct nla_policy dsmark_policy[TCA_DSMARK_MAX + 1] = { }; static int dsmark_change(struct Qdisc *sch, u32 classid, u32 parent, - struct nlattr **tca, unsigned long *arg) + struct nlattr **tca, unsigned long *arg, + struct netlink_ext_ack *extack) { struct dsmark_qdisc_data *p = qdisc_priv(sch); struct nlattr *opt = tca[TCA_OPTIONS]; @@ -184,7 +186,8 @@ ignore: } } -static struct tcf_block *dsmark_tcf_block(struct Qdisc *sch, unsigned long cl) +static struct tcf_block *dsmark_tcf_block(struct Qdisc *sch, unsigned long cl, + struct netlink_ext_ack *extack) { struct dsmark_qdisc_data *p = qdisc_priv(sch); @@ -330,7 +333,8 @@ static struct sk_buff *dsmark_peek(struct Qdisc *sch) return p->q->ops->peek(p->q); } -static int dsmark_init(struct Qdisc *sch, struct nlattr *opt) +static int dsmark_init(struct Qdisc *sch, struct nlattr *opt, + struct netlink_ext_ack *extack) { struct dsmark_qdisc_data *p = qdisc_priv(sch); struct nlattr *tb[TCA_DSMARK_MAX + 1]; @@ -344,7 +348,7 @@ static int dsmark_init(struct Qdisc *sch, struct nlattr *opt) if (!opt) goto errout; - err = tcf_block_get(&p->block, &p->filter_list, sch); + err = tcf_block_get(&p->block, &p->filter_list, sch, extack); if (err) return err; @@ -377,7 +381,8 @@ static int dsmark_init(struct Qdisc *sch, struct nlattr *opt) p->default_index = default_index; p->set_tc_index = nla_get_flag(tb[TCA_DSMARK_SET_TC_INDEX]); - p->q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, sch->handle); + p->q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, sch->handle, + NULL); if (p->q == NULL) p->q = &noop_qdisc; else diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c index 1e37247656f8..24893d3b5d22 100644 --- a/net/sched/sch_fifo.c +++ b/net/sched/sch_fifo.c @@ -55,7 +55,8 @@ static int pfifo_tail_enqueue(struct sk_buff *skb, struct Qdisc *sch, return NET_XMIT_CN; } -static int fifo_init(struct Qdisc *sch, struct nlattr *opt) +static int fifo_init(struct Qdisc *sch, struct nlattr *opt, + struct netlink_ext_ack *extack) { bool bypass; bool is_bfifo = sch->ops == &bfifo_qdisc_ops; @@ -157,7 +158,7 @@ int fifo_set_limit(struct Qdisc *q, unsigned int limit) nla->nla_len = nla_attr_size(sizeof(struct tc_fifo_qopt)); ((struct tc_fifo_qopt *)nla_data(nla))->limit = limit; - ret = q->ops->change(q, nla); + ret = q->ops->change(q, nla, NULL); kfree(nla); } return ret; @@ -165,12 +166,14 @@ int fifo_set_limit(struct Qdisc *q, unsigned int limit) EXPORT_SYMBOL(fifo_set_limit); struct Qdisc *fifo_create_dflt(struct Qdisc *sch, struct Qdisc_ops *ops, - unsigned int limit) + unsigned int limit, + struct netlink_ext_ack *extack) { struct Qdisc *q; int err = -ENOMEM; - q = qdisc_create_dflt(sch->dev_queue, ops, TC_H_MAKE(sch->handle, 1)); + q = qdisc_create_dflt(sch->dev_queue, ops, TC_H_MAKE(sch->handle, 1), + extack); if (q) { err = fifo_set_limit(q, limit); if (err < 0) { diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index 263d16e3219e..a366e4c9413a 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c @@ -685,7 +685,8 @@ static const struct nla_policy fq_policy[TCA_FQ_MAX + 1] = { [TCA_FQ_LOW_RATE_THRESHOLD] = { .type = NLA_U32 }, }; -static int fq_change(struct Qdisc *sch, struct nlattr *opt) +static int fq_change(struct Qdisc *sch, struct nlattr *opt, + struct netlink_ext_ack *extack) { struct fq_sched_data *q = qdisc_priv(sch); struct nlattr *tb[TCA_FQ_MAX + 1]; @@ -788,7 +789,8 @@ static void fq_destroy(struct Qdisc *sch) qdisc_watchdog_cancel(&q->watchdog); } -static int fq_init(struct Qdisc *sch, struct nlattr *opt) +static int fq_init(struct Qdisc *sch, struct nlattr *opt, + struct netlink_ext_ack *extack) { struct fq_sched_data *q = qdisc_priv(sch); int err; @@ -811,7 +813,7 @@ static int fq_init(struct Qdisc *sch, struct nlattr *opt) qdisc_watchdog_init(&q->watchdog, sch); if (opt) - err = fq_change(sch, opt); + err = fq_change(sch, opt, extack); else err = fq_resize(sch, q->fq_trees_log); diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c index 0305d791ea94..22fa13cf5d8b 100644 --- a/net/sched/sch_fq_codel.c +++ b/net/sched/sch_fq_codel.c @@ -377,7 +377,8 @@ static const struct nla_policy fq_codel_policy[TCA_FQ_CODEL_MAX + 1] = { [TCA_FQ_CODEL_MEMORY_LIMIT] = { .type = NLA_U32 }, }; -static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt) +static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt, + struct netlink_ext_ack *extack) { struct fq_codel_sched_data *q = qdisc_priv(sch); struct nlattr *tb[TCA_FQ_CODEL_MAX + 1]; @@ -458,7 +459,8 @@ static void fq_codel_destroy(struct Qdisc *sch) kvfree(q->flows); } -static int fq_codel_init(struct Qdisc *sch, struct nlattr *opt) +static int fq_codel_init(struct Qdisc *sch, struct nlattr *opt, + struct netlink_ext_ack *extack) { struct fq_codel_sched_data *q = qdisc_priv(sch); int i; @@ -477,12 +479,12 @@ static int fq_codel_init(struct Qdisc *sch, struct nlattr *opt) q->cparams.mtu = psched_mtu(qdisc_dev(sch)); if (opt) { - int err = fq_codel_change(sch, opt); + int err = fq_codel_change(sch, opt, extack); if (err) return err; } - err = tcf_block_get(&q->block, &q->filter_list, sch); + err = tcf_block_get(&q->block, &q->filter_list, sch, extack); if (err) return err; @@ -595,7 +597,8 @@ static void fq_codel_unbind(struct Qdisc *q, unsigned long cl) { } -static struct tcf_block *fq_codel_tcf_block(struct Qdisc *sch, unsigned long cl) +static struct tcf_block *fq_codel_tcf_block(struct Qdisc *sch, unsigned long cl, + struct netlink_ext_ack *extack) { struct fq_codel_sched_data *q = qdisc_priv(sch); diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index cac003fddf3e..190570f21b20 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -26,11 +26,13 @@ #include <linux/list.h> #include <linux/slab.h> #include <linux/if_vlan.h> +#include <linux/skb_array.h> #include <linux/if_macvlan.h> #include <net/sch_generic.h> #include <net/pkt_sched.h> #include <net/dst.h> #include <trace/events/qdisc.h> +#include <net/xfrm.h> /* Qdisc to use by default */ const struct Qdisc_ops *default_qdisc_ops = &pfifo_fast_ops; @@ -47,17 +49,115 @@ EXPORT_SYMBOL(default_qdisc_ops); * - updates to tree and tree walking are only done under the rtnl mutex. */ -static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q) +static inline struct sk_buff *__skb_dequeue_bad_txq(struct Qdisc *q) +{ + const struct netdev_queue *txq = q->dev_queue; + spinlock_t *lock = NULL; + struct sk_buff *skb; + + if (q->flags & TCQ_F_NOLOCK) { + lock = qdisc_lock(q); + spin_lock(lock); + } + + skb = skb_peek(&q->skb_bad_txq); + if (skb) { + /* check the reason of requeuing without tx lock first */ + txq = skb_get_tx_queue(txq->dev, skb); + if (!netif_xmit_frozen_or_stopped(txq)) { + skb = __skb_dequeue(&q->skb_bad_txq); + if (qdisc_is_percpu_stats(q)) { + qdisc_qstats_cpu_backlog_dec(q, skb); + qdisc_qstats_cpu_qlen_dec(q); + } else { + qdisc_qstats_backlog_dec(q, skb); + q->q.qlen--; + } + } else { + skb = NULL; + } + } + + if (lock) + spin_unlock(lock); + + return skb; +} + +static inline struct sk_buff *qdisc_dequeue_skb_bad_txq(struct Qdisc *q) { - q->gso_skb = skb; - q->qstats.requeues++; - qdisc_qstats_backlog_inc(q, skb); - q->q.qlen++; /* it's still part of the queue */ + struct sk_buff *skb = skb_peek(&q->skb_bad_txq); + + if (unlikely(skb)) + skb = __skb_dequeue_bad_txq(q); + + return skb; +} + +static inline void qdisc_enqueue_skb_bad_txq(struct Qdisc *q, + struct sk_buff *skb) +{ + spinlock_t *lock = NULL; + + if (q->flags & TCQ_F_NOLOCK) { + lock = qdisc_lock(q); + spin_lock(lock); + } + + __skb_queue_tail(&q->skb_bad_txq, skb); + + if (lock) + spin_unlock(lock); +} + +static inline int __dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q) +{ + while (skb) { + struct sk_buff *next = skb->next; + + __skb_queue_tail(&q->gso_skb, skb); + q->qstats.requeues++; + qdisc_qstats_backlog_inc(q, skb); + q->q.qlen++; /* it's still part of the queue */ + + skb = next; + } + __netif_schedule(q); + + return 0; +} + +static inline int dev_requeue_skb_locked(struct sk_buff *skb, struct Qdisc *q) +{ + spinlock_t *lock = qdisc_lock(q); + + spin_lock(lock); + while (skb) { + struct sk_buff *next = skb->next; + + __skb_queue_tail(&q->gso_skb, skb); + + qdisc_qstats_cpu_requeues_inc(q); + qdisc_qstats_cpu_backlog_inc(q, skb); + qdisc_qstats_cpu_qlen_inc(q); + + skb = next; + } + spin_unlock(lock); + __netif_schedule(q); return 0; } +static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q) +{ + if (q->flags & TCQ_F_NOLOCK) + return dev_requeue_skb_locked(skb, q); + else + return __dev_requeue_skb(skb, q); +} + static void try_bulk_dequeue_skb(struct Qdisc *q, struct sk_buff *skb, const struct netdev_queue *txq, @@ -95,9 +195,15 @@ static void try_bulk_dequeue_skb_slow(struct Qdisc *q, if (!nskb) break; if (unlikely(skb_get_queue_mapping(nskb) != mapping)) { - q->skb_bad_txq = nskb; - qdisc_qstats_backlog_inc(q, nskb); - q->q.qlen++; + qdisc_enqueue_skb_bad_txq(q, nskb); + + if (qdisc_is_percpu_stats(q)) { + qdisc_qstats_cpu_backlog_inc(q, nskb); + qdisc_qstats_cpu_qlen_inc(q); + } else { + qdisc_qstats_backlog_inc(q, nskb); + q->q.qlen++; + } break; } skb->next = nskb; @@ -113,40 +219,62 @@ static void try_bulk_dequeue_skb_slow(struct Qdisc *q, static struct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate, int *packets) { - struct sk_buff *skb = q->gso_skb; const struct netdev_queue *txq = q->dev_queue; + struct sk_buff *skb = NULL; *packets = 1; - if (unlikely(skb)) { + if (unlikely(!skb_queue_empty(&q->gso_skb))) { + spinlock_t *lock = NULL; + + if (q->flags & TCQ_F_NOLOCK) { + lock = qdisc_lock(q); + spin_lock(lock); + } + + skb = skb_peek(&q->gso_skb); + + /* skb may be null if another cpu pulls gso_skb off in between + * empty check and lock. + */ + if (!skb) { + if (lock) + spin_unlock(lock); + goto validate; + } + /* skb in gso_skb were already validated */ *validate = false; + if (xfrm_offload(skb)) + *validate = true; /* check the reason of requeuing without tx lock first */ txq = skb_get_tx_queue(txq->dev, skb); if (!netif_xmit_frozen_or_stopped(txq)) { - q->gso_skb = NULL; - qdisc_qstats_backlog_dec(q, skb); - q->q.qlen--; - } else + skb = __skb_dequeue(&q->gso_skb); + if (qdisc_is_percpu_stats(q)) { + qdisc_qstats_cpu_backlog_dec(q, skb); + qdisc_qstats_cpu_qlen_dec(q); + } else { + qdisc_qstats_backlog_dec(q, skb); + q->q.qlen--; + } + } else { skb = NULL; - goto trace; - } - *validate = true; - skb = q->skb_bad_txq; - if (unlikely(skb)) { - /* check the reason of requeuing without tx lock first */ - txq = skb_get_tx_queue(txq->dev, skb); - if (!netif_xmit_frozen_or_stopped(txq)) { - q->skb_bad_txq = NULL; - qdisc_qstats_backlog_dec(q, skb); - q->q.qlen--; - goto bulk; } - skb = NULL; + if (lock) + spin_unlock(lock); goto trace; } - if (!(q->flags & TCQ_F_ONETXQUEUE) || - !netif_xmit_frozen_or_stopped(txq)) - skb = q->dequeue(q); +validate: + *validate = true; + + if ((q->flags & TCQ_F_ONETXQUEUE) && + netif_xmit_frozen_or_stopped(txq)) + return skb; + + skb = qdisc_dequeue_skb_bad_txq(q); + if (unlikely(skb)) + goto bulk; + skb = q->dequeue(q); if (skb) { bulk: if (qdisc_may_bulk(q)) @@ -165,21 +293,33 @@ trace: * only one CPU can execute this function. * * Returns to the caller: - * 0 - queue is empty or throttled. - * >0 - queue is not empty. + * false - hardware queue frozen backoff + * true - feel free to send more pkts */ -int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q, - struct net_device *dev, struct netdev_queue *txq, - spinlock_t *root_lock, bool validate) +bool sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q, + struct net_device *dev, struct netdev_queue *txq, + spinlock_t *root_lock, bool validate) { int ret = NETDEV_TX_BUSY; + bool again = false; /* And release qdisc */ - spin_unlock(root_lock); + if (root_lock) + spin_unlock(root_lock); /* Note that we validate skb (GSO, checksum, ...) outside of locks */ if (validate) - skb = validate_xmit_skb_list(skb, dev); + skb = validate_xmit_skb_list(skb, dev, &again); + +#ifdef CONFIG_XFRM_OFFLOAD + if (unlikely(again)) { + if (root_lock) + spin_lock(root_lock); + + dev_requeue_skb(skb, q); + return false; + } +#endif if (likely(skb)) { HARD_TX_LOCK(dev, txq, smp_processor_id()); @@ -188,27 +328,28 @@ int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q, HARD_TX_UNLOCK(dev, txq); } else { - spin_lock(root_lock); - return qdisc_qlen(q); + if (root_lock) + spin_lock(root_lock); + return true; } - spin_lock(root_lock); - if (dev_xmit_complete(ret)) { - /* Driver sent out skb successfully or skb was consumed */ - ret = qdisc_qlen(q); - } else { + if (root_lock) + spin_lock(root_lock); + + if (!dev_xmit_complete(ret)) { /* Driver returned NETDEV_TX_BUSY - requeue skb */ if (unlikely(ret != NETDEV_TX_BUSY)) net_warn_ratelimited("BUG %s code %d qlen %d\n", dev->name, ret, q->q.qlen); - ret = dev_requeue_skb(skb, q); + dev_requeue_skb(skb, q); + return false; } if (ret && netif_xmit_frozen_or_stopped(txq)) - ret = 0; + return false; - return ret; + return true; } /* @@ -230,20 +371,22 @@ int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q, * >0 - queue is not empty. * */ -static inline int qdisc_restart(struct Qdisc *q, int *packets) +static inline bool qdisc_restart(struct Qdisc *q, int *packets) { + spinlock_t *root_lock = NULL; struct netdev_queue *txq; struct net_device *dev; - spinlock_t *root_lock; struct sk_buff *skb; bool validate; /* Dequeue packet */ skb = dequeue_skb(q, &validate, packets); if (unlikely(!skb)) - return 0; + return false; + + if (!(q->flags & TCQ_F_NOLOCK)) + root_lock = qdisc_lock(q); - root_lock = qdisc_lock(q); dev = qdisc_dev(q); txq = skb_get_tx_queue(dev, skb); @@ -267,8 +410,6 @@ void __qdisc_run(struct Qdisc *q) break; } } - - qdisc_run_end(q); } unsigned long dev_trans_start(struct net_device *dev) @@ -369,7 +510,7 @@ void netif_carrier_on(struct net_device *dev) if (test_and_clear_bit(__LINK_STATE_NOCARRIER, &dev->state)) { if (dev->reg_state == NETREG_UNINITIALIZED) return; - atomic_inc(&dev->carrier_changes); + atomic_inc(&dev->carrier_up_count); linkwatch_fire_event(dev); if (netif_running(dev)) __netdev_watchdog_up(dev); @@ -388,7 +529,7 @@ void netif_carrier_off(struct net_device *dev) if (!test_and_set_bit(__LINK_STATE_NOCARRIER, &dev->state)) { if (dev->reg_state == NETREG_UNINITIALIZED) return; - atomic_inc(&dev->carrier_changes); + atomic_inc(&dev->carrier_down_count); linkwatch_fire_event(dev); } } @@ -437,7 +578,8 @@ struct Qdisc noop_qdisc = { }; EXPORT_SYMBOL(noop_qdisc); -static int noqueue_init(struct Qdisc *qdisc, struct nlattr *opt) +static int noqueue_init(struct Qdisc *qdisc, struct nlattr *opt, + struct netlink_ext_ack *extack) { /* register_qdisc() assigns a default of noop_enqueue if unset, * but __dev_queue_xmit() treats noqueue only as such @@ -468,93 +610,99 @@ static const u8 prio2band[TC_PRIO_MAX + 1] = { /* * Private data for a pfifo_fast scheduler containing: - * - queues for the three band - * - bitmap indicating which of the bands contain skbs + * - rings for priority bands */ struct pfifo_fast_priv { - u32 bitmap; - struct qdisc_skb_head q[PFIFO_FAST_BANDS]; + struct skb_array q[PFIFO_FAST_BANDS]; }; -/* - * Convert a bitmap to the first band number where an skb is queued, where: - * bitmap=0 means there are no skbs on any band. - * bitmap=1 means there is an skb on band 0. - * bitmap=7 means there are skbs on all 3 bands, etc. - */ -static const int bitmap2band[] = {-1, 0, 1, 0, 2, 0, 1, 0}; - -static inline struct qdisc_skb_head *band2list(struct pfifo_fast_priv *priv, - int band) +static inline struct skb_array *band2list(struct pfifo_fast_priv *priv, + int band) { - return priv->q + band; + return &priv->q[band]; } static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc *qdisc, struct sk_buff **to_free) { - if (qdisc->q.qlen < qdisc_dev(qdisc)->tx_queue_len) { - int band = prio2band[skb->priority & TC_PRIO_MAX]; - struct pfifo_fast_priv *priv = qdisc_priv(qdisc); - struct qdisc_skb_head *list = band2list(priv, band); - - priv->bitmap |= (1 << band); - qdisc->q.qlen++; - return __qdisc_enqueue_tail(skb, qdisc, list); - } + int band = prio2band[skb->priority & TC_PRIO_MAX]; + struct pfifo_fast_priv *priv = qdisc_priv(qdisc); + struct skb_array *q = band2list(priv, band); + int err; + + err = skb_array_produce(q, skb); - return qdisc_drop(skb, qdisc, to_free); + if (unlikely(err)) + return qdisc_drop_cpu(skb, qdisc, to_free); + + qdisc_qstats_cpu_qlen_inc(qdisc); + qdisc_qstats_cpu_backlog_inc(qdisc, skb); + return NET_XMIT_SUCCESS; } static struct sk_buff *pfifo_fast_dequeue(struct Qdisc *qdisc) { struct pfifo_fast_priv *priv = qdisc_priv(qdisc); - int band = bitmap2band[priv->bitmap]; - - if (likely(band >= 0)) { - struct qdisc_skb_head *qh = band2list(priv, band); - struct sk_buff *skb = __qdisc_dequeue_head(qh); + struct sk_buff *skb = NULL; + int band; - if (likely(skb != NULL)) { - qdisc_qstats_backlog_dec(qdisc, skb); - qdisc_bstats_update(qdisc, skb); - } + for (band = 0; band < PFIFO_FAST_BANDS && !skb; band++) { + struct skb_array *q = band2list(priv, band); - qdisc->q.qlen--; - if (qh->qlen == 0) - priv->bitmap &= ~(1 << band); + if (__skb_array_empty(q)) + continue; - return skb; + skb = skb_array_consume_bh(q); + } + if (likely(skb)) { + qdisc_qstats_cpu_backlog_dec(qdisc, skb); + qdisc_bstats_cpu_update(qdisc, skb); + qdisc_qstats_cpu_qlen_dec(qdisc); } - return NULL; + return skb; } static struct sk_buff *pfifo_fast_peek(struct Qdisc *qdisc) { struct pfifo_fast_priv *priv = qdisc_priv(qdisc); - int band = bitmap2band[priv->bitmap]; + struct sk_buff *skb = NULL; + int band; - if (band >= 0) { - struct qdisc_skb_head *qh = band2list(priv, band); + for (band = 0; band < PFIFO_FAST_BANDS && !skb; band++) { + struct skb_array *q = band2list(priv, band); - return qh->head; + skb = __skb_array_peek(q); } - return NULL; + return skb; } static void pfifo_fast_reset(struct Qdisc *qdisc) { - int prio; + int i, band; struct pfifo_fast_priv *priv = qdisc_priv(qdisc); - for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) - __qdisc_reset_queue(band2list(priv, prio)); + for (band = 0; band < PFIFO_FAST_BANDS; band++) { + struct skb_array *q = band2list(priv, band); + struct sk_buff *skb; - priv->bitmap = 0; - qdisc->qstats.backlog = 0; - qdisc->q.qlen = 0; + /* NULL ring is possible if destroy path is due to a failed + * skb_array_init() in pfifo_fast_init() case. + */ + if (!q->ring.queue) + continue; + + while ((skb = skb_array_consume_bh(q)) != NULL) + kfree_skb(skb); + } + + for_each_possible_cpu(i) { + struct gnet_stats_queue *q = per_cpu_ptr(qdisc->cpu_qstats, i); + + q->backlog = 0; + q->qlen = 0; + } } static int pfifo_fast_dump(struct Qdisc *qdisc, struct sk_buff *skb) @@ -570,19 +718,68 @@ nla_put_failure: return -1; } -static int pfifo_fast_init(struct Qdisc *qdisc, struct nlattr *opt) +static int pfifo_fast_init(struct Qdisc *qdisc, struct nlattr *opt, + struct netlink_ext_ack *extack) { - int prio; + unsigned int qlen = qdisc_dev(qdisc)->tx_queue_len; struct pfifo_fast_priv *priv = qdisc_priv(qdisc); + int prio; + + /* guard against zero length rings */ + if (!qlen) + return -EINVAL; - for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) - qdisc_skb_head_init(band2list(priv, prio)); + for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) { + struct skb_array *q = band2list(priv, prio); + int err; + + err = skb_array_init(q, qlen, GFP_KERNEL); + if (err) + return -ENOMEM; + } /* Can by-pass the queue discipline */ qdisc->flags |= TCQ_F_CAN_BYPASS; return 0; } +static void pfifo_fast_destroy(struct Qdisc *sch) +{ + struct pfifo_fast_priv *priv = qdisc_priv(sch); + int prio; + + for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) { + struct skb_array *q = band2list(priv, prio); + + /* NULL ring is possible if destroy path is due to a failed + * skb_array_init() in pfifo_fast_init() case. + */ + if (!q->ring.queue) + continue; + /* Destroy ring but no need to kfree_skb because a call to + * pfifo_fast_reset() has already done that work. + */ + ptr_ring_cleanup(&q->ring, NULL); + } +} + +static int pfifo_fast_change_tx_queue_len(struct Qdisc *sch, + unsigned int new_len) +{ + struct pfifo_fast_priv *priv = qdisc_priv(sch); + struct skb_array *bands[PFIFO_FAST_BANDS]; + int prio; + + for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) { + struct skb_array *q = band2list(priv, prio); + + bands[prio] = q; + } + + return skb_array_resize_multiple(bands, PFIFO_FAST_BANDS, new_len, + GFP_KERNEL); +} + struct Qdisc_ops pfifo_fast_ops __read_mostly = { .id = "pfifo_fast", .priv_size = sizeof(struct pfifo_fast_priv), @@ -590,9 +787,12 @@ struct Qdisc_ops pfifo_fast_ops __read_mostly = { .dequeue = pfifo_fast_dequeue, .peek = pfifo_fast_peek, .init = pfifo_fast_init, + .destroy = pfifo_fast_destroy, .reset = pfifo_fast_reset, .dump = pfifo_fast_dump, + .change_tx_queue_len = pfifo_fast_change_tx_queue_len, .owner = THIS_MODULE, + .static_flags = TCQ_F_NOLOCK | TCQ_F_CPUSTATS, }; EXPORT_SYMBOL(pfifo_fast_ops); @@ -600,7 +800,8 @@ static struct lock_class_key qdisc_tx_busylock; static struct lock_class_key qdisc_running_key; struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, - const struct Qdisc_ops *ops) + const struct Qdisc_ops *ops, + struct netlink_ext_ack *extack) { void *p; struct Qdisc *sch; @@ -609,6 +810,7 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, struct net_device *dev; if (!dev_queue) { + NL_SET_ERR_MSG(extack, "No device queue given"); err = -EINVAL; goto errout; } @@ -630,6 +832,8 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, sch = (struct Qdisc *) QDISC_ALIGN((unsigned long) p); sch->padded = (char *) sch - (char *) p; } + __skb_queue_head_init(&sch->gso_skb); + __skb_queue_head_init(&sch->skb_bad_txq); qdisc_skb_head_init(&sch->q); spin_lock_init(&sch->q.lock); @@ -671,21 +875,24 @@ errout: struct Qdisc *qdisc_create_dflt(struct netdev_queue *dev_queue, const struct Qdisc_ops *ops, - unsigned int parentid) + unsigned int parentid, + struct netlink_ext_ack *extack) { struct Qdisc *sch; - if (!try_module_get(ops->owner)) + if (!try_module_get(ops->owner)) { + NL_SET_ERR_MSG(extack, "Failed to increase module reference counter"); return NULL; + } - sch = qdisc_alloc(dev_queue, ops); + sch = qdisc_alloc(dev_queue, ops, extack); if (IS_ERR(sch)) { module_put(ops->owner); return NULL; } sch->parent = parentid; - if (!ops->init || ops->init(sch, NULL) == 0) + if (!ops->init || ops->init(sch, NULL, extack) == 0) return sch; qdisc_destroy(sch); @@ -698,17 +905,21 @@ EXPORT_SYMBOL(qdisc_create_dflt); void qdisc_reset(struct Qdisc *qdisc) { const struct Qdisc_ops *ops = qdisc->ops; + struct sk_buff *skb, *tmp; if (ops->reset) ops->reset(qdisc); - kfree_skb(qdisc->skb_bad_txq); - qdisc->skb_bad_txq = NULL; + skb_queue_walk_safe(&qdisc->gso_skb, skb, tmp) { + __skb_unlink(skb, &qdisc->gso_skb); + kfree_skb_list(skb); + } - if (qdisc->gso_skb) { - kfree_skb_list(qdisc->gso_skb); - qdisc->gso_skb = NULL; + skb_queue_walk_safe(&qdisc->skb_bad_txq, skb, tmp) { + __skb_unlink(skb, &qdisc->skb_bad_txq); + kfree_skb_list(skb); } + qdisc->q.qlen = 0; qdisc->qstats.backlog = 0; } @@ -727,6 +938,7 @@ void qdisc_free(struct Qdisc *qdisc) void qdisc_destroy(struct Qdisc *qdisc) { const struct Qdisc_ops *ops = qdisc->ops; + struct sk_buff *skb, *tmp; if (qdisc->flags & TCQ_F_BUILTIN || !refcount_dec_and_test(&qdisc->refcnt)) @@ -746,8 +958,16 @@ void qdisc_destroy(struct Qdisc *qdisc) module_put(ops->owner); dev_put(qdisc_dev(qdisc)); - kfree_skb_list(qdisc->gso_skb); - kfree_skb(qdisc->skb_bad_txq); + skb_queue_walk_safe(&qdisc->gso_skb, skb, tmp) { + __skb_unlink(skb, &qdisc->gso_skb); + kfree_skb_list(skb); + } + + skb_queue_walk_safe(&qdisc->skb_bad_txq, skb, tmp) { + __skb_unlink(skb, &qdisc->skb_bad_txq); + kfree_skb_list(skb); + } + qdisc_free(qdisc); } EXPORT_SYMBOL(qdisc_destroy); @@ -762,10 +982,6 @@ struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue, root_lock = qdisc_lock(oqdisc); spin_lock_bh(root_lock); - /* Prune old scheduler */ - if (oqdisc && refcount_read(&oqdisc->refcnt) <= 1) - qdisc_reset(oqdisc); - /* ... and graft new one */ if (qdisc == NULL) qdisc = &noop_qdisc; @@ -788,7 +1004,7 @@ static void attach_one_default_qdisc(struct net_device *dev, if (dev->priv_flags & IFF_NO_QUEUE) ops = &noqueue_qdisc_ops; - qdisc = qdisc_create_dflt(dev_queue, ops, TC_H_ROOT); + qdisc = qdisc_create_dflt(dev_queue, ops, TC_H_ROOT, NULL); if (!qdisc) { netdev_info(dev, "activation failed\n"); return; @@ -811,7 +1027,7 @@ static void attach_default_qdiscs(struct net_device *dev) dev->qdisc = txq->qdisc_sleeping; qdisc_refcount_inc(dev->qdisc); } else { - qdisc = qdisc_create_dflt(txq, &mq_qdisc_ops, TC_H_ROOT); + qdisc = qdisc_create_dflt(txq, &mq_qdisc_ops, TC_H_ROOT, NULL); if (qdisc) { dev->qdisc = qdisc; qdisc->ops->attach(qdisc); @@ -901,14 +1117,18 @@ static bool some_qdisc_is_busy(struct net_device *dev) dev_queue = netdev_get_tx_queue(dev, i); q = dev_queue->qdisc_sleeping; - root_lock = qdisc_lock(q); - spin_lock_bh(root_lock); + if (q->flags & TCQ_F_NOLOCK) { + val = test_bit(__QDISC_STATE_SCHED, &q->state); + } else { + root_lock = qdisc_lock(q); + spin_lock_bh(root_lock); - val = (qdisc_is_running(q) || - test_bit(__QDISC_STATE_SCHED, &q->state)); + val = (qdisc_is_running(q) || + test_bit(__QDISC_STATE_SCHED, &q->state)); - spin_unlock_bh(root_lock); + spin_unlock_bh(root_lock); + } if (val) return true; @@ -916,6 +1136,16 @@ static bool some_qdisc_is_busy(struct net_device *dev) return false; } +static void dev_qdisc_reset(struct net_device *dev, + struct netdev_queue *dev_queue, + void *none) +{ + struct Qdisc *qdisc = dev_queue->qdisc_sleeping; + + if (qdisc) + qdisc_reset(qdisc); +} + /** * dev_deactivate_many - deactivate transmissions on several devices * @head: list of devices to deactivate @@ -926,7 +1156,6 @@ static bool some_qdisc_is_busy(struct net_device *dev) void dev_deactivate_many(struct list_head *head) { struct net_device *dev; - bool sync_needed = false; list_for_each_entry(dev, head, close_list) { netdev_for_each_tx_queue(dev, dev_deactivate_queue, @@ -936,20 +1165,25 @@ void dev_deactivate_many(struct list_head *head) &noop_qdisc); dev_watchdog_down(dev); - sync_needed |= !dev->dismantle; } /* Wait for outstanding qdisc-less dev_queue_xmit calls. * This is avoided if all devices are in dismantle phase : * Caller will call synchronize_net() for us */ - if (sync_needed) - synchronize_net(); + synchronize_net(); /* Wait for outstanding qdisc_run calls. */ - list_for_each_entry(dev, head, close_list) + list_for_each_entry(dev, head, close_list) { while (some_qdisc_is_busy(dev)) yield(); + /* The new qdisc is assigned at this point so we can safely + * unwind stale skb lists and qdisc statistics + */ + netdev_for_each_tx_queue(dev, dev_qdisc_reset, NULL); + if (dev_ingress_queue(dev)) + dev_qdisc_reset(dev, dev_ingress_queue(dev), NULL); + } } void dev_deactivate(struct net_device *dev) @@ -962,6 +1196,39 @@ void dev_deactivate(struct net_device *dev) } EXPORT_SYMBOL(dev_deactivate); +static int qdisc_change_tx_queue_len(struct net_device *dev, + struct netdev_queue *dev_queue) +{ + struct Qdisc *qdisc = dev_queue->qdisc_sleeping; + const struct Qdisc_ops *ops = qdisc->ops; + + if (ops->change_tx_queue_len) + return ops->change_tx_queue_len(qdisc, dev->tx_queue_len); + return 0; +} + +int dev_qdisc_change_tx_queue_len(struct net_device *dev) +{ + bool up = dev->flags & IFF_UP; + unsigned int i; + int ret = 0; + + if (up) + dev_deactivate(dev); + + for (i = 0; i < dev->num_tx_queues; i++) { + ret = qdisc_change_tx_queue_len(dev, &dev->_tx[i]); + + /* TODO: revert changes on a partial failure */ + if (ret) + break; + } + + if (up) + dev_activate(dev); + return ret; +} + static void dev_init_scheduler_queue(struct net_device *dev, struct netdev_queue *dev_queue, void *_qdisc) @@ -970,6 +1237,8 @@ static void dev_init_scheduler_queue(struct net_device *dev, rcu_assign_pointer(dev_queue->qdisc, qdisc); dev_queue->qdisc_sleeping = qdisc; + __skb_queue_head_init(&qdisc->gso_skb); + __skb_queue_head_init(&qdisc->skb_bad_txq); } void dev_init_scheduler(struct net_device *dev) diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c index bc30f9186ac6..cbe4831f46f4 100644 --- a/net/sched/sch_gred.c +++ b/net/sched/sch_gred.c @@ -306,12 +306,13 @@ static inline int gred_change_table_def(struct Qdisc *sch, struct nlattr *dps) struct tc_gred_sopt *sopt; int i; - if (dps == NULL) + if (!dps) return -EINVAL; sopt = nla_data(dps); - if (sopt->DPs > MAX_DPs || sopt->DPs == 0 || sopt->def_DP >= sopt->DPs) + if (sopt->DPs > MAX_DPs || sopt->DPs == 0 || + sopt->def_DP >= sopt->DPs) return -EINVAL; sch_tree_lock(sch); @@ -391,7 +392,8 @@ static const struct nla_policy gred_policy[TCA_GRED_MAX + 1] = { [TCA_GRED_LIMIT] = { .type = NLA_U32 }, }; -static int gred_change(struct Qdisc *sch, struct nlattr *opt) +static int gred_change(struct Qdisc *sch, struct nlattr *opt, + struct netlink_ext_ack *extack) { struct gred_sched *table = qdisc_priv(sch); struct tc_gred_qopt *ctl; @@ -465,12 +467,13 @@ errout: return err; } -static int gred_init(struct Qdisc *sch, struct nlattr *opt) +static int gred_init(struct Qdisc *sch, struct nlattr *opt, + struct netlink_ext_ack *extack) { struct nlattr *tb[TCA_GRED_MAX + 1]; int err; - if (opt == NULL) + if (!opt) return -EINVAL; err = nla_parse_nested(tb, TCA_GRED_MAX, opt, gred_policy, NULL); diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index d04068a97d81..3ae9877ea205 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -921,7 +921,8 @@ static const struct nla_policy hfsc_policy[TCA_HFSC_MAX + 1] = { static int hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, - struct nlattr **tca, unsigned long *arg) + struct nlattr **tca, unsigned long *arg, + struct netlink_ext_ack *extack) { struct hfsc_sched *q = qdisc_priv(sch); struct hfsc_class *cl = (struct hfsc_class *)*arg; @@ -1033,7 +1034,7 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, if (cl == NULL) return -ENOBUFS; - err = tcf_block_get(&cl->block, &cl->filter_list, sch); + err = tcf_block_get(&cl->block, &cl->filter_list, sch, extack); if (err) { kfree(cl); return err; @@ -1061,8 +1062,8 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, cl->cl_common.classid = classid; cl->sched = q; cl->cl_parent = parent; - cl->qdisc = qdisc_create_dflt(sch->dev_queue, - &pfifo_qdisc_ops, classid); + cl->qdisc = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, + classid, NULL); if (cl->qdisc == NULL) cl->qdisc = &noop_qdisc; else @@ -1176,7 +1177,7 @@ hfsc_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) static int hfsc_graft_class(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, - struct Qdisc **old) + struct Qdisc **old, struct netlink_ext_ack *extack) { struct hfsc_class *cl = (struct hfsc_class *)arg; @@ -1184,7 +1185,7 @@ hfsc_graft_class(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, return -EINVAL; if (new == NULL) { new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, - cl->cl_common.classid); + cl->cl_common.classid, NULL); if (new == NULL) new = &noop_qdisc; } @@ -1246,7 +1247,8 @@ hfsc_unbind_tcf(struct Qdisc *sch, unsigned long arg) cl->filter_cnt--; } -static struct tcf_block *hfsc_tcf_block(struct Qdisc *sch, unsigned long arg) +static struct tcf_block *hfsc_tcf_block(struct Qdisc *sch, unsigned long arg, + struct netlink_ext_ack *extack) { struct hfsc_sched *q = qdisc_priv(sch); struct hfsc_class *cl = (struct hfsc_class *)arg; @@ -1388,7 +1390,8 @@ hfsc_schedule_watchdog(struct Qdisc *sch) } static int -hfsc_init_qdisc(struct Qdisc *sch, struct nlattr *opt) +hfsc_init_qdisc(struct Qdisc *sch, struct nlattr *opt, + struct netlink_ext_ack *extack) { struct hfsc_sched *q = qdisc_priv(sch); struct tc_hfsc_qopt *qopt; @@ -1396,7 +1399,7 @@ hfsc_init_qdisc(struct Qdisc *sch, struct nlattr *opt) qdisc_watchdog_init(&q->watchdog, sch); - if (opt == NULL || nla_len(opt) < sizeof(*qopt)) + if (!opt || nla_len(opt) < sizeof(*qopt)) return -EINVAL; qopt = nla_data(opt); @@ -1406,14 +1409,14 @@ hfsc_init_qdisc(struct Qdisc *sch, struct nlattr *opt) return err; q->eligible = RB_ROOT; - err = tcf_block_get(&q->root.block, &q->root.filter_list, sch); + err = tcf_block_get(&q->root.block, &q->root.filter_list, sch, extack); if (err) return err; q->root.cl_common.classid = sch->handle; q->root.sched = q; q->root.qdisc = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, - sch->handle); + sch->handle, NULL); if (q->root.qdisc == NULL) q->root.qdisc = &noop_qdisc; else @@ -1429,7 +1432,8 @@ hfsc_init_qdisc(struct Qdisc *sch, struct nlattr *opt) } static int -hfsc_change_qdisc(struct Qdisc *sch, struct nlattr *opt) +hfsc_change_qdisc(struct Qdisc *sch, struct nlattr *opt, + struct netlink_ext_ack *extack) { struct hfsc_sched *q = qdisc_priv(sch); struct tc_hfsc_qopt *qopt; diff --git a/net/sched/sch_hhf.c b/net/sched/sch_hhf.c index 73a53c08091b..bce2632212d3 100644 --- a/net/sched/sch_hhf.c +++ b/net/sched/sch_hhf.c @@ -504,7 +504,8 @@ static const struct nla_policy hhf_policy[TCA_HHF_MAX + 1] = { [TCA_HHF_NON_HH_WEIGHT] = { .type = NLA_U32 }, }; -static int hhf_change(struct Qdisc *sch, struct nlattr *opt) +static int hhf_change(struct Qdisc *sch, struct nlattr *opt, + struct netlink_ext_ack *extack) { struct hhf_sched_data *q = qdisc_priv(sch); struct nlattr *tb[TCA_HHF_MAX + 1]; @@ -571,7 +572,8 @@ static int hhf_change(struct Qdisc *sch, struct nlattr *opt) return 0; } -static int hhf_init(struct Qdisc *sch, struct nlattr *opt) +static int hhf_init(struct Qdisc *sch, struct nlattr *opt, + struct netlink_ext_ack *extack) { struct hhf_sched_data *q = qdisc_priv(sch); int i; @@ -589,7 +591,7 @@ static int hhf_init(struct Qdisc *sch, struct nlattr *opt) q->hhf_non_hh_weight = 2; if (opt) { - int err = hhf_change(sch, opt); + int err = hhf_change(sch, opt, extack); if (err) return err; diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index fa0380730ff0..1ea9846cc6ce 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -1017,7 +1017,8 @@ static void htb_work_func(struct work_struct *work) rcu_read_unlock(); } -static int htb_init(struct Qdisc *sch, struct nlattr *opt) +static int htb_init(struct Qdisc *sch, struct nlattr *opt, + struct netlink_ext_ack *extack) { struct htb_sched *q = qdisc_priv(sch); struct nlattr *tb[TCA_HTB_MAX + 1]; @@ -1031,7 +1032,7 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt) if (!opt) return -EINVAL; - err = tcf_block_get(&q->block, &q->filter_list, sch); + err = tcf_block_get(&q->block, &q->filter_list, sch, extack); if (err) return err; @@ -1171,7 +1172,7 @@ htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d) } static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, - struct Qdisc **old) + struct Qdisc **old, struct netlink_ext_ack *extack) { struct htb_class *cl = (struct htb_class *)arg; @@ -1179,7 +1180,7 @@ static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, return -EINVAL; if (new == NULL && (new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, - cl->common.classid)) == NULL) + cl->common.classid, extack)) == NULL) return -ENOBUFS; *old = qdisc_replace(sch, new, &cl->un.leaf.q); @@ -1289,7 +1290,8 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg) if (!cl->level && htb_parent_last_child(cl)) { new_q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, - cl->parent->common.classid); + cl->parent->common.classid, + NULL); last_child = 1; } @@ -1326,7 +1328,7 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg) static int htb_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **tca, - unsigned long *arg) + unsigned long *arg, struct netlink_ext_ack *extack) { int err = -EINVAL; struct htb_sched *q = qdisc_priv(sch); @@ -1356,10 +1358,12 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, /* Keeping backward compatible with rate_table based iproute2 tc */ if (hopt->rate.linklayer == TC_LINKLAYER_UNAWARE) - qdisc_put_rtab(qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB])); + qdisc_put_rtab(qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB], + NULL)); if (hopt->ceil.linklayer == TC_LINKLAYER_UNAWARE) - qdisc_put_rtab(qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB])); + qdisc_put_rtab(qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB], + NULL)); if (!cl) { /* new class */ struct Qdisc *new_q; @@ -1394,7 +1398,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, if (!cl) goto failure; - err = tcf_block_get(&cl->block, &cl->filter_list, sch); + err = tcf_block_get(&cl->block, &cl->filter_list, sch, extack); if (err) { kfree(cl); goto failure; @@ -1423,8 +1427,8 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, * so that can't be used inside of sch_tree_lock * -- thanks to Karlis Peisenieks */ - new_q = qdisc_create_dflt(sch->dev_queue, - &pfifo_qdisc_ops, classid); + new_q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, + classid, NULL); sch_tree_lock(sch); if (parent && !parent->level) { unsigned int qlen = parent->un.leaf.q->q.qlen; @@ -1524,7 +1528,8 @@ failure: return err; } -static struct tcf_block *htb_tcf_block(struct Qdisc *sch, unsigned long arg) +static struct tcf_block *htb_tcf_block(struct Qdisc *sch, unsigned long arg, + struct netlink_ext_ack *extack) { struct htb_sched *q = qdisc_priv(sch); struct htb_class *cl = (struct htb_class *)arg; diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c index 003e1b063447..ce3f55259d0d 100644 --- a/net/sched/sch_ingress.c +++ b/net/sched/sch_ingress.c @@ -48,7 +48,8 @@ static void ingress_walk(struct Qdisc *sch, struct qdisc_walker *walker) { } -static struct tcf_block *ingress_tcf_block(struct Qdisc *sch, unsigned long cl) +static struct tcf_block *ingress_tcf_block(struct Qdisc *sch, unsigned long cl, + struct netlink_ext_ack *extack) { struct ingress_sched_data *q = qdisc_priv(sch); @@ -60,9 +61,24 @@ static void clsact_chain_head_change(struct tcf_proto *tp_head, void *priv) struct mini_Qdisc_pair *miniqp = priv; mini_qdisc_pair_swap(miniqp, tp_head); +}; + +static void ingress_ingress_block_set(struct Qdisc *sch, u32 block_index) +{ + struct ingress_sched_data *q = qdisc_priv(sch); + + q->block_info.block_index = block_index; } -static int ingress_init(struct Qdisc *sch, struct nlattr *opt) +static u32 ingress_ingress_block_get(struct Qdisc *sch) +{ + struct ingress_sched_data *q = qdisc_priv(sch); + + return q->block_info.block_index; +} + +static int ingress_init(struct Qdisc *sch, struct nlattr *opt, + struct netlink_ext_ack *extack) { struct ingress_sched_data *q = qdisc_priv(sch); struct net_device *dev = qdisc_dev(sch); @@ -75,7 +91,7 @@ static int ingress_init(struct Qdisc *sch, struct nlattr *opt) q->block_info.chain_head_change = clsact_chain_head_change; q->block_info.chain_head_change_priv = &q->miniqp; - return tcf_block_get_ext(&q->block, sch, &q->block_info); + return tcf_block_get_ext(&q->block, sch, &q->block_info, extack); } static void ingress_destroy(struct Qdisc *sch) @@ -111,14 +127,16 @@ static const struct Qdisc_class_ops ingress_class_ops = { }; static struct Qdisc_ops ingress_qdisc_ops __read_mostly = { - .cl_ops = &ingress_class_ops, - .id = "ingress", - .priv_size = sizeof(struct ingress_sched_data), - .static_flags = TCQ_F_CPUSTATS, - .init = ingress_init, - .destroy = ingress_destroy, - .dump = ingress_dump, - .owner = THIS_MODULE, + .cl_ops = &ingress_class_ops, + .id = "ingress", + .priv_size = sizeof(struct ingress_sched_data), + .static_flags = TCQ_F_CPUSTATS, + .init = ingress_init, + .destroy = ingress_destroy, + .dump = ingress_dump, + .ingress_block_set = ingress_ingress_block_set, + .ingress_block_get = ingress_ingress_block_get, + .owner = THIS_MODULE, }; struct clsact_sched_data { @@ -147,7 +165,8 @@ static unsigned long clsact_bind_filter(struct Qdisc *sch, return clsact_find(sch, classid); } -static struct tcf_block *clsact_tcf_block(struct Qdisc *sch, unsigned long cl) +static struct tcf_block *clsact_tcf_block(struct Qdisc *sch, unsigned long cl, + struct netlink_ext_ack *extack) { struct clsact_sched_data *q = qdisc_priv(sch); @@ -161,7 +180,36 @@ static struct tcf_block *clsact_tcf_block(struct Qdisc *sch, unsigned long cl) } } -static int clsact_init(struct Qdisc *sch, struct nlattr *opt) +static void clsact_ingress_block_set(struct Qdisc *sch, u32 block_index) +{ + struct clsact_sched_data *q = qdisc_priv(sch); + + q->ingress_block_info.block_index = block_index; +} + +static void clsact_egress_block_set(struct Qdisc *sch, u32 block_index) +{ + struct clsact_sched_data *q = qdisc_priv(sch); + + q->egress_block_info.block_index = block_index; +} + +static u32 clsact_ingress_block_get(struct Qdisc *sch) +{ + struct clsact_sched_data *q = qdisc_priv(sch); + + return q->ingress_block_info.block_index; +} + +static u32 clsact_egress_block_get(struct Qdisc *sch) +{ + struct clsact_sched_data *q = qdisc_priv(sch); + + return q->egress_block_info.block_index; +} + +static int clsact_init(struct Qdisc *sch, struct nlattr *opt, + struct netlink_ext_ack *extack) { struct clsact_sched_data *q = qdisc_priv(sch); struct net_device *dev = qdisc_dev(sch); @@ -176,7 +224,8 @@ static int clsact_init(struct Qdisc *sch, struct nlattr *opt) q->ingress_block_info.chain_head_change = clsact_chain_head_change; q->ingress_block_info.chain_head_change_priv = &q->miniqp_ingress; - err = tcf_block_get_ext(&q->ingress_block, sch, &q->ingress_block_info); + err = tcf_block_get_ext(&q->ingress_block, sch, &q->ingress_block_info, + extack); if (err) return err; @@ -186,7 +235,7 @@ static int clsact_init(struct Qdisc *sch, struct nlattr *opt) q->egress_block_info.chain_head_change = clsact_chain_head_change; q->egress_block_info.chain_head_change_priv = &q->miniqp_egress; - return tcf_block_get_ext(&q->egress_block, sch, &q->egress_block_info); + return tcf_block_get_ext(&q->egress_block, sch, &q->egress_block_info, extack); } static void clsact_destroy(struct Qdisc *sch) @@ -210,14 +259,18 @@ static const struct Qdisc_class_ops clsact_class_ops = { }; static struct Qdisc_ops clsact_qdisc_ops __read_mostly = { - .cl_ops = &clsact_class_ops, - .id = "clsact", - .priv_size = sizeof(struct clsact_sched_data), - .static_flags = TCQ_F_CPUSTATS, - .init = clsact_init, - .destroy = clsact_destroy, - .dump = ingress_dump, - .owner = THIS_MODULE, + .cl_ops = &clsact_class_ops, + .id = "clsact", + .priv_size = sizeof(struct clsact_sched_data), + .static_flags = TCQ_F_CPUSTATS, + .init = clsact_init, + .destroy = clsact_destroy, + .dump = ingress_dump, + .ingress_block_set = clsact_ingress_block_set, + .egress_block_set = clsact_egress_block_set, + .ingress_block_get = clsact_ingress_block_get, + .egress_block_get = clsact_egress_block_get, + .owner = THIS_MODULE, }; static int __init ingress_module_init(void) diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c index 213b586a06a0..f062a18e9162 100644 --- a/net/sched/sch_mq.c +++ b/net/sched/sch_mq.c @@ -17,6 +17,7 @@ #include <linux/skbuff.h> #include <net/netlink.h> #include <net/pkt_sched.h> +#include <net/sch_generic.h> struct mq_sched { struct Qdisc **qdiscs; @@ -35,7 +36,8 @@ static void mq_destroy(struct Qdisc *sch) kfree(priv->qdiscs); } -static int mq_init(struct Qdisc *sch, struct nlattr *opt) +static int mq_init(struct Qdisc *sch, struct nlattr *opt, + struct netlink_ext_ack *extack) { struct net_device *dev = qdisc_dev(sch); struct mq_sched *priv = qdisc_priv(sch); @@ -59,7 +61,8 @@ static int mq_init(struct Qdisc *sch, struct nlattr *opt) dev_queue = netdev_get_tx_queue(dev, ntx); qdisc = qdisc_create_dflt(dev_queue, get_default_qdisc_ops(dev, ntx), TC_H_MAKE(TC_H_MAJ(sch->handle), - TC_H_MIN(ntx + 1))); + TC_H_MIN(ntx + 1)), + extack); if (!qdisc) return -ENOMEM; priv->qdiscs[ntx] = qdisc; @@ -97,23 +100,42 @@ static int mq_dump(struct Qdisc *sch, struct sk_buff *skb) struct net_device *dev = qdisc_dev(sch); struct Qdisc *qdisc; unsigned int ntx; + __u32 qlen = 0; sch->q.qlen = 0; memset(&sch->bstats, 0, sizeof(sch->bstats)); memset(&sch->qstats, 0, sizeof(sch->qstats)); + /* MQ supports lockless qdiscs. However, statistics accounting needs + * to account for all, none, or a mix of locked and unlocked child + * qdiscs. Percpu stats are added to counters in-band and locking + * qdisc totals are added at end. + */ for (ntx = 0; ntx < dev->num_tx_queues; ntx++) { qdisc = netdev_get_tx_queue(dev, ntx)->qdisc_sleeping; spin_lock_bh(qdisc_lock(qdisc)); - sch->q.qlen += qdisc->q.qlen; - sch->bstats.bytes += qdisc->bstats.bytes; - sch->bstats.packets += qdisc->bstats.packets; - sch->qstats.backlog += qdisc->qstats.backlog; - sch->qstats.drops += qdisc->qstats.drops; - sch->qstats.requeues += qdisc->qstats.requeues; - sch->qstats.overlimits += qdisc->qstats.overlimits; + + if (qdisc_is_percpu_stats(qdisc)) { + qlen = qdisc_qlen_sum(qdisc); + __gnet_stats_copy_basic(NULL, &sch->bstats, + qdisc->cpu_bstats, + &qdisc->bstats); + __gnet_stats_copy_queue(&sch->qstats, + qdisc->cpu_qstats, + &qdisc->qstats, qlen); + } else { + sch->q.qlen += qdisc->q.qlen; + sch->bstats.bytes += qdisc->bstats.bytes; + sch->bstats.packets += qdisc->bstats.packets; + sch->qstats.backlog += qdisc->qstats.backlog; + sch->qstats.drops += qdisc->qstats.drops; + sch->qstats.requeues += qdisc->qstats.requeues; + sch->qstats.overlimits += qdisc->qstats.overlimits; + } + spin_unlock_bh(qdisc_lock(qdisc)); } + return 0; } @@ -134,7 +156,7 @@ static struct netdev_queue *mq_select_queue(struct Qdisc *sch, } static int mq_graft(struct Qdisc *sch, unsigned long cl, struct Qdisc *new, - struct Qdisc **old) + struct Qdisc **old, struct netlink_ext_ack *extack) { struct netdev_queue *dev_queue = mq_queue_get(sch, cl); struct net_device *dev = qdisc_dev(sch); diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c index b85885a9d8a1..0e9d761cdd80 100644 --- a/net/sched/sch_mqprio.c +++ b/net/sched/sch_mqprio.c @@ -132,7 +132,8 @@ static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla, return 0; } -static int mqprio_init(struct Qdisc *sch, struct nlattr *opt) +static int mqprio_init(struct Qdisc *sch, struct nlattr *opt, + struct netlink_ext_ack *extack) { struct net_device *dev = qdisc_dev(sch); struct mqprio_sched *priv = qdisc_priv(sch); @@ -229,7 +230,7 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt) qdisc = qdisc_create_dflt(dev_queue, get_default_qdisc_ops(dev, i), TC_H_MAKE(TC_H_MAJ(sch->handle), - TC_H_MIN(i + 1))); + TC_H_MIN(i + 1)), extack); if (!qdisc) return -ENOMEM; @@ -319,7 +320,7 @@ static struct netdev_queue *mqprio_queue_get(struct Qdisc *sch, } static int mqprio_graft(struct Qdisc *sch, unsigned long cl, struct Qdisc *new, - struct Qdisc **old) + struct Qdisc **old, struct netlink_ext_ack *extack) { struct net_device *dev = qdisc_dev(sch); struct netdev_queue *dev_queue = mqprio_queue_get(sch, cl); @@ -388,22 +389,40 @@ static int mqprio_dump(struct Qdisc *sch, struct sk_buff *skb) struct nlattr *nla = (struct nlattr *)skb_tail_pointer(skb); struct tc_mqprio_qopt opt = { 0 }; struct Qdisc *qdisc; - unsigned int i; + unsigned int ntx, tc; sch->q.qlen = 0; memset(&sch->bstats, 0, sizeof(sch->bstats)); memset(&sch->qstats, 0, sizeof(sch->qstats)); - for (i = 0; i < dev->num_tx_queues; i++) { - qdisc = rtnl_dereference(netdev_get_tx_queue(dev, i)->qdisc); + /* MQ supports lockless qdiscs. However, statistics accounting needs + * to account for all, none, or a mix of locked and unlocked child + * qdiscs. Percpu stats are added to counters in-band and locking + * qdisc totals are added at end. + */ + for (ntx = 0; ntx < dev->num_tx_queues; ntx++) { + qdisc = netdev_get_tx_queue(dev, ntx)->qdisc_sleeping; spin_lock_bh(qdisc_lock(qdisc)); - sch->q.qlen += qdisc->q.qlen; - sch->bstats.bytes += qdisc->bstats.bytes; - sch->bstats.packets += qdisc->bstats.packets; - sch->qstats.backlog += qdisc->qstats.backlog; - sch->qstats.drops += qdisc->qstats.drops; - sch->qstats.requeues += qdisc->qstats.requeues; - sch->qstats.overlimits += qdisc->qstats.overlimits; + + if (qdisc_is_percpu_stats(qdisc)) { + __u32 qlen = qdisc_qlen_sum(qdisc); + + __gnet_stats_copy_basic(NULL, &sch->bstats, + qdisc->cpu_bstats, + &qdisc->bstats); + __gnet_stats_copy_queue(&sch->qstats, + qdisc->cpu_qstats, + &qdisc->qstats, qlen); + } else { + sch->q.qlen += qdisc->q.qlen; + sch->bstats.bytes += qdisc->bstats.bytes; + sch->bstats.packets += qdisc->bstats.packets; + sch->qstats.backlog += qdisc->qstats.backlog; + sch->qstats.drops += qdisc->qstats.drops; + sch->qstats.requeues += qdisc->qstats.requeues; + sch->qstats.overlimits += qdisc->qstats.overlimits; + } + spin_unlock_bh(qdisc_lock(qdisc)); } @@ -411,9 +430,9 @@ static int mqprio_dump(struct Qdisc *sch, struct sk_buff *skb) memcpy(opt.prio_tc_map, dev->prio_tc_map, sizeof(opt.prio_tc_map)); opt.hw = priv->hw_offload; - for (i = 0; i < netdev_get_num_tc(dev); i++) { - opt.count[i] = dev->tc_to_txq[i].count; - opt.offset[i] = dev->tc_to_txq[i].offset; + for (tc = 0; tc < netdev_get_num_tc(dev); tc++) { + opt.count[tc] = dev->tc_to_txq[tc].count; + opt.offset[tc] = dev->tc_to_txq[tc].offset; } if (nla_put(skb, TCA_OPTIONS, NLA_ALIGN(sizeof(opt)), &opt)) @@ -495,7 +514,6 @@ static int mqprio_dump_class_stats(struct Qdisc *sch, unsigned long cl, if (cl >= TC_H_MIN_PRIORITY) { int i; __u32 qlen = 0; - struct Qdisc *qdisc; struct gnet_stats_queue qstats = {0}; struct gnet_stats_basic_packed bstats = {0}; struct net_device *dev = qdisc_dev(sch); @@ -511,18 +529,26 @@ static int mqprio_dump_class_stats(struct Qdisc *sch, unsigned long cl, for (i = tc.offset; i < tc.offset + tc.count; i++) { struct netdev_queue *q = netdev_get_tx_queue(dev, i); + struct Qdisc *qdisc = rtnl_dereference(q->qdisc); + struct gnet_stats_basic_cpu __percpu *cpu_bstats = NULL; + struct gnet_stats_queue __percpu *cpu_qstats = NULL; - qdisc = rtnl_dereference(q->qdisc); spin_lock_bh(qdisc_lock(qdisc)); - qlen += qdisc->q.qlen; - bstats.bytes += qdisc->bstats.bytes; - bstats.packets += qdisc->bstats.packets; - qstats.backlog += qdisc->qstats.backlog; - qstats.drops += qdisc->qstats.drops; - qstats.requeues += qdisc->qstats.requeues; - qstats.overlimits += qdisc->qstats.overlimits; + if (qdisc_is_percpu_stats(qdisc)) { + cpu_bstats = qdisc->cpu_bstats; + cpu_qstats = qdisc->cpu_qstats; + } + + qlen = qdisc_qlen_sum(qdisc); + __gnet_stats_copy_basic(NULL, &sch->bstats, + cpu_bstats, &qdisc->bstats); + __gnet_stats_copy_queue(&sch->qstats, + cpu_qstats, + &qdisc->qstats, + qlen); spin_unlock_bh(qdisc_lock(qdisc)); } + /* Reclaim root sleeping lock before completing stats */ if (d->lock) spin_lock_bh(d->lock); diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c index 012216386c0b..1da7ea8de0ad 100644 --- a/net/sched/sch_multiq.c +++ b/net/sched/sch_multiq.c @@ -180,7 +180,8 @@ multiq_destroy(struct Qdisc *sch) kfree(q->queues); } -static int multiq_tune(struct Qdisc *sch, struct nlattr *opt) +static int multiq_tune(struct Qdisc *sch, struct nlattr *opt, + struct netlink_ext_ack *extack) { struct multiq_sched_data *q = qdisc_priv(sch); struct tc_multiq_qopt *qopt; @@ -215,7 +216,7 @@ static int multiq_tune(struct Qdisc *sch, struct nlattr *opt) child = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, TC_H_MAKE(sch->handle, - i + 1)); + i + 1), extack); if (child) { sch_tree_lock(sch); old = q->queues[i]; @@ -236,17 +237,18 @@ static int multiq_tune(struct Qdisc *sch, struct nlattr *opt) return 0; } -static int multiq_init(struct Qdisc *sch, struct nlattr *opt) +static int multiq_init(struct Qdisc *sch, struct nlattr *opt, + struct netlink_ext_ack *extack) { struct multiq_sched_data *q = qdisc_priv(sch); int i, err; q->queues = NULL; - if (opt == NULL) + if (!opt) return -EINVAL; - err = tcf_block_get(&q->block, &q->filter_list, sch); + err = tcf_block_get(&q->block, &q->filter_list, sch, extack); if (err) return err; @@ -258,7 +260,7 @@ static int multiq_init(struct Qdisc *sch, struct nlattr *opt) for (i = 0; i < q->max_bands; i++) q->queues[i] = &noop_qdisc; - return multiq_tune(sch, opt); + return multiq_tune(sch, opt, extack); } static int multiq_dump(struct Qdisc *sch, struct sk_buff *skb) @@ -281,7 +283,7 @@ nla_put_failure: } static int multiq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, - struct Qdisc **old) + struct Qdisc **old, struct netlink_ext_ack *extack) { struct multiq_sched_data *q = qdisc_priv(sch); unsigned long band = arg - 1; @@ -369,7 +371,8 @@ static void multiq_walk(struct Qdisc *sch, struct qdisc_walker *arg) } } -static struct tcf_block *multiq_tcf_block(struct Qdisc *sch, unsigned long cl) +static struct tcf_block *multiq_tcf_block(struct Qdisc *sch, unsigned long cl, + struct netlink_ext_ack *extack) { struct multiq_sched_data *q = qdisc_priv(sch); diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index dd70924cbcdf..7bbc13b8ca47 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -893,7 +893,8 @@ static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla, } /* Parse netlink message to set options */ -static int netem_change(struct Qdisc *sch, struct nlattr *opt) +static int netem_change(struct Qdisc *sch, struct nlattr *opt, + struct netlink_ext_ack *extack) { struct netem_sched_data *q = qdisc_priv(sch); struct nlattr *tb[TCA_NETEM_MAX + 1]; @@ -984,7 +985,8 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt) return ret; } -static int netem_init(struct Qdisc *sch, struct nlattr *opt) +static int netem_init(struct Qdisc *sch, struct nlattr *opt, + struct netlink_ext_ack *extack) { struct netem_sched_data *q = qdisc_priv(sch); int ret; @@ -995,7 +997,7 @@ static int netem_init(struct Qdisc *sch, struct nlattr *opt) return -EINVAL; q->loss_model = CLG_RANDOM; - ret = netem_change(sch, opt); + ret = netem_change(sch, opt, extack); if (ret) pr_info("netem: change failed\n"); return ret; @@ -1157,7 +1159,7 @@ static int netem_dump_class(struct Qdisc *sch, unsigned long cl, } static int netem_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, - struct Qdisc **old) + struct Qdisc **old, struct netlink_ext_ack *extack) { struct netem_sched_data *q = qdisc_priv(sch); diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c index 776c694c77c7..18d30bb86881 100644 --- a/net/sched/sch_pie.c +++ b/net/sched/sch_pie.c @@ -181,7 +181,8 @@ static const struct nla_policy pie_policy[TCA_PIE_MAX + 1] = { [TCA_PIE_BYTEMODE] = {.type = NLA_U32}, }; -static int pie_change(struct Qdisc *sch, struct nlattr *opt) +static int pie_change(struct Qdisc *sch, struct nlattr *opt, + struct netlink_ext_ack *extack) { struct pie_sched_data *q = qdisc_priv(sch); struct nlattr *tb[TCA_PIE_MAX + 1]; @@ -439,7 +440,8 @@ static void pie_timer(struct timer_list *t) } -static int pie_init(struct Qdisc *sch, struct nlattr *opt) +static int pie_init(struct Qdisc *sch, struct nlattr *opt, + struct netlink_ext_ack *extack) { struct pie_sched_data *q = qdisc_priv(sch); @@ -451,7 +453,7 @@ static int pie_init(struct Qdisc *sch, struct nlattr *opt) timer_setup(&q->adapt_timer, pie_timer, 0); if (opt) { - int err = pie_change(sch, opt); + int err = pie_change(sch, opt, extack); if (err) return err; diff --git a/net/sched/sch_plug.c b/net/sched/sch_plug.c index 1c6cbab3e7b9..5619d2eb17b6 100644 --- a/net/sched/sch_plug.c +++ b/net/sched/sch_plug.c @@ -123,7 +123,8 @@ static struct sk_buff *plug_dequeue(struct Qdisc *sch) return qdisc_dequeue_head(sch); } -static int plug_init(struct Qdisc *sch, struct nlattr *opt) +static int plug_init(struct Qdisc *sch, struct nlattr *opt, + struct netlink_ext_ack *extack) { struct plug_sched_data *q = qdisc_priv(sch); @@ -158,7 +159,8 @@ static int plug_init(struct Qdisc *sch, struct nlattr *opt) * command is received (just act as a pass-thru queue). * TCQ_PLUG_LIMIT: Increase/decrease queue size */ -static int plug_change(struct Qdisc *sch, struct nlattr *opt) +static int plug_change(struct Qdisc *sch, struct nlattr *opt, + struct netlink_ext_ack *extack) { struct plug_sched_data *q = qdisc_priv(sch); struct tc_plug_qopt *msg; diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index 2c79559a0d31..efbf51f35778 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -142,6 +142,31 @@ prio_reset(struct Qdisc *sch) sch->q.qlen = 0; } +static int prio_offload(struct Qdisc *sch, bool enable) +{ + struct prio_sched_data *q = qdisc_priv(sch); + struct net_device *dev = qdisc_dev(sch); + struct tc_prio_qopt_offload opt = { + .handle = sch->handle, + .parent = sch->parent, + }; + + if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc) + return -EOPNOTSUPP; + + if (enable) { + opt.command = TC_PRIO_REPLACE; + opt.replace_params.bands = q->bands; + memcpy(&opt.replace_params.priomap, q->prio2band, + TC_PRIO_MAX + 1); + opt.replace_params.qstats = &sch->qstats; + } else { + opt.command = TC_PRIO_DESTROY; + } + + return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_PRIO, &opt); +} + static void prio_destroy(struct Qdisc *sch) { @@ -149,11 +174,13 @@ prio_destroy(struct Qdisc *sch) struct prio_sched_data *q = qdisc_priv(sch); tcf_block_put(q->block); + prio_offload(sch, false); for (prio = 0; prio < q->bands; prio++) qdisc_destroy(q->queues[prio]); } -static int prio_tune(struct Qdisc *sch, struct nlattr *opt) +static int prio_tune(struct Qdisc *sch, struct nlattr *opt, + struct netlink_ext_ack *extack) { struct prio_sched_data *q = qdisc_priv(sch); struct Qdisc *queues[TCQ_PRIO_BANDS]; @@ -175,7 +202,8 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt) /* Before commit, make sure we can allocate all new qdiscs */ for (i = oldbands; i < qopt->bands; i++) { queues[i] = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, - TC_H_MAKE(sch->handle, i + 1)); + TC_H_MAKE(sch->handle, i + 1), + extack); if (!queues[i]) { while (i > oldbands) qdisc_destroy(queues[--i]); @@ -202,10 +230,12 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt) } sch_tree_unlock(sch); + prio_offload(sch, true); return 0; } -static int prio_init(struct Qdisc *sch, struct nlattr *opt) +static int prio_init(struct Qdisc *sch, struct nlattr *opt, + struct netlink_ext_ack *extack) { struct prio_sched_data *q = qdisc_priv(sch); int err; @@ -213,11 +243,42 @@ static int prio_init(struct Qdisc *sch, struct nlattr *opt) if (!opt) return -EINVAL; - err = tcf_block_get(&q->block, &q->filter_list, sch); + err = tcf_block_get(&q->block, &q->filter_list, sch, extack); if (err) return err; - return prio_tune(sch, opt); + return prio_tune(sch, opt, extack); +} + +static int prio_dump_offload(struct Qdisc *sch) +{ + struct net_device *dev = qdisc_dev(sch); + struct tc_prio_qopt_offload hw_stats = { + .command = TC_PRIO_STATS, + .handle = sch->handle, + .parent = sch->parent, + { + .stats = { + .bstats = &sch->bstats, + .qstats = &sch->qstats, + }, + }, + }; + int err; + + sch->flags &= ~TCQ_F_OFFLOADED; + if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc) + return 0; + + err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_PRIO, + &hw_stats); + if (err == -EOPNOTSUPP) + return 0; + + if (!err) + sch->flags |= TCQ_F_OFFLOADED; + + return err; } static int prio_dump(struct Qdisc *sch, struct sk_buff *skb) @@ -225,10 +286,15 @@ static int prio_dump(struct Qdisc *sch, struct sk_buff *skb) struct prio_sched_data *q = qdisc_priv(sch); unsigned char *b = skb_tail_pointer(skb); struct tc_prio_qopt opt; + int err; opt.bands = q->bands; memcpy(&opt.priomap, q->prio2band, TC_PRIO_MAX + 1); + err = prio_dump_offload(sch); + if (err) + goto nla_put_failure; + if (nla_put(skb, TCA_OPTIONS, sizeof(opt), &opt)) goto nla_put_failure; @@ -240,7 +306,7 @@ nla_put_failure: } static int prio_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, - struct Qdisc **old) + struct Qdisc **old, struct netlink_ext_ack *extack) { struct prio_sched_data *q = qdisc_priv(sch); unsigned long band = arg - 1; @@ -327,7 +393,8 @@ static void prio_walk(struct Qdisc *sch, struct qdisc_walker *arg) } } -static struct tcf_block *prio_tcf_block(struct Qdisc *sch, unsigned long cl) +static struct tcf_block *prio_tcf_block(struct Qdisc *sch, unsigned long cl, + struct netlink_ext_ack *extack) { struct prio_sched_data *q = qdisc_priv(sch); diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index 6962b37a3ad3..bb1a9c11fc54 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -402,7 +402,8 @@ static int qfq_change_agg(struct Qdisc *sch, struct qfq_class *cl, u32 weight, } static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, - struct nlattr **tca, unsigned long *arg) + struct nlattr **tca, unsigned long *arg, + struct netlink_ext_ack *extack) { struct qfq_sched *q = qdisc_priv(sch); struct qfq_class *cl = (struct qfq_class *)*arg; @@ -479,8 +480,8 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, cl->common.classid = classid; cl->deficit = lmax; - cl->qdisc = qdisc_create_dflt(sch->dev_queue, - &pfifo_qdisc_ops, classid); + cl->qdisc = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, + classid, NULL); if (cl->qdisc == NULL) cl->qdisc = &noop_qdisc; @@ -564,7 +565,8 @@ static unsigned long qfq_search_class(struct Qdisc *sch, u32 classid) return (unsigned long)qfq_find_class(sch, classid); } -static struct tcf_block *qfq_tcf_block(struct Qdisc *sch, unsigned long cl) +static struct tcf_block *qfq_tcf_block(struct Qdisc *sch, unsigned long cl, + struct netlink_ext_ack *extack) { struct qfq_sched *q = qdisc_priv(sch); @@ -593,13 +595,14 @@ static void qfq_unbind_tcf(struct Qdisc *sch, unsigned long arg) } static int qfq_graft_class(struct Qdisc *sch, unsigned long arg, - struct Qdisc *new, struct Qdisc **old) + struct Qdisc *new, struct Qdisc **old, + struct netlink_ext_ack *extack) { struct qfq_class *cl = (struct qfq_class *)arg; if (new == NULL) { - new = qdisc_create_dflt(sch->dev_queue, - &pfifo_qdisc_ops, cl->common.classid); + new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, + cl->common.classid, NULL); if (new == NULL) new = &noop_qdisc; } @@ -1413,14 +1416,15 @@ static void qfq_qlen_notify(struct Qdisc *sch, unsigned long arg) qfq_deactivate_class(q, cl); } -static int qfq_init_qdisc(struct Qdisc *sch, struct nlattr *opt) +static int qfq_init_qdisc(struct Qdisc *sch, struct nlattr *opt, + struct netlink_ext_ack *extack) { struct qfq_sched *q = qdisc_priv(sch); struct qfq_group *grp; int i, j, err; u32 max_cl_shift, maxbudg_shift, max_classes; - err = tcf_block_get(&q->block, &q->filter_list, sch); + err = tcf_block_get(&q->block, &q->filter_list, sch, extack); if (err) return err; diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index f0747eb87dc4..16644b3d2362 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c @@ -157,7 +157,6 @@ static int red_offload(struct Qdisc *sch, bool enable) .handle = sch->handle, .parent = sch->parent, }; - int err; if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc) return -EOPNOTSUPP; @@ -168,18 +167,12 @@ static int red_offload(struct Qdisc *sch, bool enable) opt.set.max = q->parms.qth_max >> q->parms.Wlog; opt.set.probability = q->parms.max_P; opt.set.is_ecn = red_use_ecn(q); + opt.set.qstats = &sch->qstats; } else { opt.command = TC_RED_DESTROY; } - err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED, &opt); - - if (!err && enable) - sch->flags |= TCQ_F_OFFLOADED; - else - sch->flags &= ~TCQ_F_OFFLOADED; - - return err; + return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED, &opt); } static void red_destroy(struct Qdisc *sch) @@ -197,7 +190,8 @@ static const struct nla_policy red_policy[TCA_RED_MAX + 1] = { [TCA_RED_MAX_P] = { .type = NLA_U32 }, }; -static int red_change(struct Qdisc *sch, struct nlattr *opt) +static int red_change(struct Qdisc *sch, struct nlattr *opt, + struct netlink_ext_ack *extack) { struct red_sched_data *q = qdisc_priv(sch); struct nlattr *tb[TCA_RED_MAX + 1]; @@ -224,7 +218,8 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt) return -EINVAL; if (ctl->limit > 0) { - child = fifo_create_dflt(sch, &bfifo_qdisc_ops, ctl->limit); + child = fifo_create_dflt(sch, &bfifo_qdisc_ops, ctl->limit, + extack); if (IS_ERR(child)) return PTR_ERR(child); } @@ -272,14 +267,15 @@ static inline void red_adaptative_timer(struct timer_list *t) spin_unlock(root_lock); } -static int red_init(struct Qdisc *sch, struct nlattr *opt) +static int red_init(struct Qdisc *sch, struct nlattr *opt, + struct netlink_ext_ack *extack) { struct red_sched_data *q = qdisc_priv(sch); q->qdisc = &noop_qdisc; q->sch = sch; timer_setup(&q->adapt_timer, red_adaptative_timer, 0); - return red_change(sch, opt); + return red_change(sch, opt, extack); } static int red_dump_offload_stats(struct Qdisc *sch, struct tc_red_qopt *opt) @@ -294,12 +290,22 @@ static int red_dump_offload_stats(struct Qdisc *sch, struct tc_red_qopt *opt) .stats.qstats = &sch->qstats, }, }; + int err; - if (!(sch->flags & TCQ_F_OFFLOADED)) + sch->flags &= ~TCQ_F_OFFLOADED; + + if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc) return 0; - return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED, - &hw_stats); + err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED, + &hw_stats); + if (err == -EOPNOTSUPP) + return 0; + + if (!err) + sch->flags |= TCQ_F_OFFLOADED; + + return err; } static int red_dump(struct Qdisc *sch, struct sk_buff *skb) @@ -317,7 +323,6 @@ static int red_dump(struct Qdisc *sch, struct sk_buff *skb) }; int err; - sch->qstats.backlog = q->qdisc->qstats.backlog; err = red_dump_offload_stats(sch, &opt); if (err) goto nla_put_failure; @@ -339,32 +344,24 @@ static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d) { struct red_sched_data *q = qdisc_priv(sch); struct net_device *dev = qdisc_dev(sch); - struct tc_red_xstats st = { - .early = q->stats.prob_drop + q->stats.forced_drop, - .pdrop = q->stats.pdrop, - .other = q->stats.other, - .marked = q->stats.prob_mark + q->stats.forced_mark, - }; + struct tc_red_xstats st = {0}; if (sch->flags & TCQ_F_OFFLOADED) { - struct red_stats hw_stats = {0}; struct tc_red_qopt_offload hw_stats_request = { .command = TC_RED_XSTATS, .handle = sch->handle, .parent = sch->parent, { - .xstats = &hw_stats, + .xstats = &q->stats, }, }; - if (!dev->netdev_ops->ndo_setup_tc(dev, - TC_SETUP_QDISC_RED, - &hw_stats_request)) { - st.early += hw_stats.prob_drop + hw_stats.forced_drop; - st.pdrop += hw_stats.pdrop; - st.other += hw_stats.other; - st.marked += hw_stats.prob_mark + hw_stats.forced_mark; - } + dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED, + &hw_stats_request); } + st.early = q->stats.prob_drop + q->stats.forced_drop; + st.pdrop = q->stats.pdrop; + st.other = q->stats.other; + st.marked = q->stats.prob_mark + q->stats.forced_mark; return gnet_stats_copy_app(d, &st, sizeof(st)); } @@ -380,7 +377,7 @@ static int red_dump_class(struct Qdisc *sch, unsigned long cl, } static int red_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, - struct Qdisc **old) + struct Qdisc **old, struct netlink_ext_ack *extack) { struct red_sched_data *q = qdisc_priv(sch); diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c index 0678debdd856..7cbdad8419b7 100644 --- a/net/sched/sch_sfb.c +++ b/net/sched/sch_sfb.c @@ -488,7 +488,8 @@ static const struct tc_sfb_qopt sfb_default_ops = { .penalty_burst = 20, }; -static int sfb_change(struct Qdisc *sch, struct nlattr *opt) +static int sfb_change(struct Qdisc *sch, struct nlattr *opt, + struct netlink_ext_ack *extack) { struct sfb_sched_data *q = qdisc_priv(sch); struct Qdisc *child; @@ -512,7 +513,7 @@ static int sfb_change(struct Qdisc *sch, struct nlattr *opt) if (limit == 0) limit = qdisc_dev(sch)->tx_queue_len; - child = fifo_create_dflt(sch, &pfifo_qdisc_ops, limit); + child = fifo_create_dflt(sch, &pfifo_qdisc_ops, limit, extack); if (IS_ERR(child)) return PTR_ERR(child); @@ -549,17 +550,18 @@ static int sfb_change(struct Qdisc *sch, struct nlattr *opt) return 0; } -static int sfb_init(struct Qdisc *sch, struct nlattr *opt) +static int sfb_init(struct Qdisc *sch, struct nlattr *opt, + struct netlink_ext_ack *extack) { struct sfb_sched_data *q = qdisc_priv(sch); int err; - err = tcf_block_get(&q->block, &q->filter_list, sch); + err = tcf_block_get(&q->block, &q->filter_list, sch, extack); if (err) return err; q->qdisc = &noop_qdisc; - return sfb_change(sch, opt); + return sfb_change(sch, opt, extack); } static int sfb_dump(struct Qdisc *sch, struct sk_buff *skb) @@ -615,7 +617,7 @@ static int sfb_dump_class(struct Qdisc *sch, unsigned long cl, } static int sfb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, - struct Qdisc **old) + struct Qdisc **old, struct netlink_ext_ack *extack) { struct sfb_sched_data *q = qdisc_priv(sch); @@ -643,7 +645,8 @@ static void sfb_unbind(struct Qdisc *sch, unsigned long arg) } static int sfb_change_class(struct Qdisc *sch, u32 classid, u32 parentid, - struct nlattr **tca, unsigned long *arg) + struct nlattr **tca, unsigned long *arg, + struct netlink_ext_ack *extack) { return -ENOSYS; } @@ -665,7 +668,8 @@ static void sfb_walk(struct Qdisc *sch, struct qdisc_walker *walker) } } -static struct tcf_block *sfb_tcf_block(struct Qdisc *sch, unsigned long cl) +static struct tcf_block *sfb_tcf_block(struct Qdisc *sch, unsigned long cl, + struct netlink_ext_ack *extack) { struct sfb_sched_data *q = qdisc_priv(sch); diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index 930e5bd26d3d..2f2678197760 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -721,7 +721,8 @@ static void sfq_destroy(struct Qdisc *sch) kfree(q->red_parms); } -static int sfq_init(struct Qdisc *sch, struct nlattr *opt) +static int sfq_init(struct Qdisc *sch, struct nlattr *opt, + struct netlink_ext_ack *extack) { struct sfq_sched_data *q = qdisc_priv(sch); int i; @@ -730,7 +731,7 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt) q->sch = sch; timer_setup(&q->perturb_timer, sfq_perturbation, TIMER_DEFERRABLE); - err = tcf_block_get(&q->block, &q->filter_list, sch); + err = tcf_block_get(&q->block, &q->filter_list, sch, extack); if (err) return err; @@ -836,7 +837,8 @@ static void sfq_unbind(struct Qdisc *q, unsigned long cl) { } -static struct tcf_block *sfq_tcf_block(struct Qdisc *sch, unsigned long cl) +static struct tcf_block *sfq_tcf_block(struct Qdisc *sch, unsigned long cl, + struct netlink_ext_ack *extack) { struct sfq_sched_data *q = qdisc_priv(sch); diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index 120f4f365967..83e76d046993 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -302,7 +302,8 @@ static const struct nla_policy tbf_policy[TCA_TBF_MAX + 1] = { [TCA_TBF_PBURST] = { .type = NLA_U32 }, }; -static int tbf_change(struct Qdisc *sch, struct nlattr *opt) +static int tbf_change(struct Qdisc *sch, struct nlattr *opt, + struct netlink_ext_ack *extack) { int err; struct tbf_sched_data *q = qdisc_priv(sch); @@ -326,11 +327,13 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt) qopt = nla_data(tb[TCA_TBF_PARMS]); if (qopt->rate.linklayer == TC_LINKLAYER_UNAWARE) qdisc_put_rtab(qdisc_get_rtab(&qopt->rate, - tb[TCA_TBF_RTAB])); + tb[TCA_TBF_RTAB], + NULL)); if (qopt->peakrate.linklayer == TC_LINKLAYER_UNAWARE) qdisc_put_rtab(qdisc_get_rtab(&qopt->peakrate, - tb[TCA_TBF_PTAB])); + tb[TCA_TBF_PTAB], + NULL)); buffer = min_t(u64, PSCHED_TICKS2NS(qopt->buffer), ~0U); mtu = min_t(u64, PSCHED_TICKS2NS(qopt->mtu), ~0U); @@ -383,7 +386,8 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt) if (err) goto done; } else if (qopt->limit > 0) { - child = fifo_create_dflt(sch, &bfifo_qdisc_ops, qopt->limit); + child = fifo_create_dflt(sch, &bfifo_qdisc_ops, qopt->limit, + extack); if (IS_ERR(child)) { err = PTR_ERR(child); goto done; @@ -421,19 +425,20 @@ done: return err; } -static int tbf_init(struct Qdisc *sch, struct nlattr *opt) +static int tbf_init(struct Qdisc *sch, struct nlattr *opt, + struct netlink_ext_ack *extack) { struct tbf_sched_data *q = qdisc_priv(sch); qdisc_watchdog_init(&q->watchdog, sch); q->qdisc = &noop_qdisc; - if (opt == NULL) + if (!opt) return -EINVAL; q->t_c = ktime_get_ns(); - return tbf_change(sch, opt); + return tbf_change(sch, opt, extack); } static void tbf_destroy(struct Qdisc *sch) @@ -494,7 +499,7 @@ static int tbf_dump_class(struct Qdisc *sch, unsigned long cl, } static int tbf_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, - struct Qdisc **old) + struct Qdisc **old, struct netlink_ext_ack *extack) { struct tbf_sched_data *q = qdisc_priv(sch); diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index 9fe6b427afed..93f04cf5cac1 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c @@ -167,7 +167,8 @@ teql_destroy(struct Qdisc *sch) } } -static int teql_qdisc_init(struct Qdisc *sch, struct nlattr *opt) +static int teql_qdisc_init(struct Qdisc *sch, struct nlattr *opt, + struct netlink_ext_ack *extack) { struct net_device *dev = qdisc_dev(sch); struct teql_master *m = (struct teql_master *)sch->ops; diff --git a/net/sctp/Kconfig b/net/sctp/Kconfig index d9c04dc1b3f3..c740b189d4ba 100644 --- a/net/sctp/Kconfig +++ b/net/sctp/Kconfig @@ -37,18 +37,6 @@ menuconfig IP_SCTP if IP_SCTP -config NET_SCTPPROBE - tristate "SCTP: Association probing" - depends on PROC_FS && KPROBES - ---help--- - This module allows for capturing the changes to SCTP association - state in response to incoming packets. It is used for debugging - SCTP congestion control algorithms. If you don't understand - what was just said, you don't need it: say N. - - To compile this code as a module, choose M here: the - module will be called sctp_probe. - config SCTP_DBG_OBJCNT bool "SCTP: Debug object counts" depends on PROC_FS diff --git a/net/sctp/Makefile b/net/sctp/Makefile index 1ca84a288443..6776582ec449 100644 --- a/net/sctp/Makefile +++ b/net/sctp/Makefile @@ -4,7 +4,6 @@ # obj-$(CONFIG_IP_SCTP) += sctp.o -obj-$(CONFIG_NET_SCTPPROBE) += sctp_probe.o obj-$(CONFIG_INET_SCTP_DIAG) += sctp_diag.o sctp-y := sm_statetable.o sm_statefuns.o sm_sideeffect.o \ @@ -14,9 +13,7 @@ sctp-y := sm_statetable.o sm_statefuns.o sm_sideeffect.o \ tsnmap.o bind_addr.o socket.o primitive.o \ output.o input.o debug.o stream.o auth.o \ offload.o stream_sched.o stream_sched_prio.o \ - stream_sched_rr.o - -sctp_probe-y := probe.o + stream_sched_rr.o stream_interleave.o sctp-$(CONFIG_SCTP_DBG_OBJCNT) += objcnt.o sctp-$(CONFIG_PROC_FS) += proc.o diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 69394f4d6091..837806dd5799 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -861,7 +861,7 @@ void sctp_assoc_control_transport(struct sctp_association *asoc, event = sctp_ulpevent_make_peer_addr_change(asoc, &addr, 0, spc_state, error, GFP_ATOMIC); if (event) - sctp_ulpq_tail_event(&asoc->ulpq, event); + asoc->stream.si->enqueue_event(&asoc->ulpq, event); } /* Select new active and retran paths. */ diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c index 7f8baa48e7c2..991a530c6b31 100644 --- a/net/sctp/chunk.c +++ b/net/sctp/chunk.c @@ -124,7 +124,7 @@ static void sctp_datamsg_destroy(struct sctp_datamsg *msg) ev = sctp_ulpevent_make_send_failed(asoc, chunk, sent, error, GFP_ATOMIC); if (ev) - sctp_ulpq_tail_event(&asoc->ulpq, ev); + asoc->stream.si->enqueue_event(&asoc->ulpq, ev); } sctp_chunk_put(chunk); @@ -191,7 +191,7 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc, */ max_data = asoc->pathmtu - sctp_sk(asoc->base.sk)->pf->af->net_header_len - - sizeof(struct sctphdr) - sizeof(struct sctp_data_chunk); + sizeof(struct sctphdr) - sctp_datachk_len(&asoc->stream); max_data = SCTP_TRUNC4(max_data); /* If the the peer requested that we authenticate DATA chunks @@ -264,8 +264,8 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc, frag |= SCTP_DATA_SACK_IMM; } - chunk = sctp_make_datafrag_empty(asoc, sinfo, len, frag, - 0, GFP_KERNEL); + chunk = asoc->stream.si->make_datafrag(asoc, sinfo, len, frag, + GFP_KERNEL); if (!chunk) { err = -ENOMEM; goto errout; diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c index ee1e601a0b11..8b3146816519 100644 --- a/net/sctp/endpointola.c +++ b/net/sctp/endpointola.c @@ -232,7 +232,7 @@ void sctp_endpoint_free(struct sctp_endpoint *ep) { ep->base.dead = true; - ep->base.sk->sk_state = SCTP_SS_CLOSED; + inet_sk_set_state(ep->base.sk, SCTP_SS_CLOSED); /* Unlink this endpoint, so we can't find it again! */ sctp_unhash_endpoint(ep); diff --git a/net/sctp/output.c b/net/sctp/output.c index 4a865cd06d76..01a26ee051e3 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -313,6 +313,7 @@ static enum sctp_xmit __sctp_packet_append_chunk(struct sctp_packet *packet, /* We believe that this chunk is OK to add to the packet */ switch (chunk->chunk_hdr->type) { case SCTP_CID_DATA: + case SCTP_CID_I_DATA: /* Account for the data being in the packet */ sctp_packet_append_data(packet, chunk); /* Disallow SACK bundling after DATA. */ @@ -724,7 +725,7 @@ static enum sctp_xmit sctp_packet_can_append_data(struct sctp_packet *packet, * or delay in hopes of bundling a full sized packet. */ if (chunk->skb->len + q->out_qlen > transport->pathmtu - - packet->overhead - sizeof(struct sctp_data_chunk) - 4) + packet->overhead - sctp_datachk_len(&chunk->asoc->stream) - 4) /* Enough data queued to fill a packet */ return SCTP_XMIT_OK; @@ -759,7 +760,7 @@ static void sctp_packet_append_data(struct sctp_packet *packet, asoc->peer.rwnd = rwnd; sctp_chunk_assign_tsn(chunk); - sctp_chunk_assign_ssn(chunk); + asoc->stream.si->assign_number(chunk); } static enum sctp_xmit sctp_packet_will_fit(struct sctp_packet *packet, diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index c4ec99b20150..f211b3db6a35 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -67,8 +67,6 @@ static void sctp_mark_missing(struct sctp_outq *q, __u32 highest_new_tsn, int count_of_newacks); -static void sctp_generate_fwdtsn(struct sctp_outq *q, __u32 sack_ctsn); - static void sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp); /* Add data to the front of the queue. */ @@ -591,7 +589,7 @@ void sctp_retransmit(struct sctp_outq *q, struct sctp_transport *transport, * following the procedures outlined in C1 - C5. */ if (reason == SCTP_RTXR_T3_RTX) - sctp_generate_fwdtsn(q, q->asoc->ctsn_ack_point); + q->asoc->stream.si->generate_ftsn(q, q->asoc->ctsn_ack_point); /* Flush the queues only on timeout, since fast_rtx is only * triggered during sack processing and the queue @@ -942,6 +940,7 @@ static void sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp) case SCTP_CID_ECN_ECNE: case SCTP_CID_ASCONF: case SCTP_CID_FWD_TSN: + case SCTP_CID_I_FWD_TSN: case SCTP_CID_RECONF: status = sctp_packet_transmit_chunk(packet, chunk, one_packet, gfp); @@ -956,7 +955,8 @@ static void sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp) * sender MUST assure that at least one T3-rtx * timer is running. */ - if (chunk->chunk_hdr->type == SCTP_CID_FWD_TSN) { + if (chunk->chunk_hdr->type == SCTP_CID_FWD_TSN || + chunk->chunk_hdr->type == SCTP_CID_I_FWD_TSN) { sctp_transport_reset_t3_rtx(transport); transport->last_time_sent = jiffies; } @@ -1372,7 +1372,7 @@ int sctp_outq_sack(struct sctp_outq *q, struct sctp_chunk *chunk) asoc->peer.rwnd = sack_a_rwnd; - sctp_generate_fwdtsn(q, sack_ctsn); + asoc->stream.si->generate_ftsn(q, sack_ctsn); pr_debug("%s: sack cumulative tsn ack:0x%x\n", __func__, sack_ctsn); pr_debug("%s: cumulative tsn ack of assoc:%p is 0x%x, " @@ -1795,7 +1795,7 @@ static inline int sctp_get_skip_pos(struct sctp_fwdtsn_skip *skiplist, } /* Create and add a fwdtsn chunk to the outq's control queue if needed. */ -static void sctp_generate_fwdtsn(struct sctp_outq *q, __u32 ctsn) +void sctp_generate_fwdtsn(struct sctp_outq *q, __u32 ctsn) { struct sctp_association *asoc = q->asoc; struct sctp_chunk *ftsn_chunk = NULL; diff --git a/net/sctp/probe.c b/net/sctp/probe.c deleted file mode 100644 index 1280f85a598d..000000000000 --- a/net/sctp/probe.c +++ /dev/null @@ -1,244 +0,0 @@ -/* - * sctp_probe - Observe the SCTP flow with kprobes. - * - * The idea for this came from Werner Almesberger's umlsim - * Copyright (C) 2004, Stephen Hemminger <shemminger@osdl.org> - * - * Modified for SCTP from Stephen Hemminger's code - * Copyright (C) 2010, Wei Yongjun <yjwei@cn.fujitsu.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#include <linux/kernel.h> -#include <linux/kprobes.h> -#include <linux/socket.h> -#include <linux/sctp.h> -#include <linux/proc_fs.h> -#include <linux/vmalloc.h> -#include <linux/module.h> -#include <linux/kfifo.h> -#include <linux/time.h> -#include <net/net_namespace.h> - -#include <net/sctp/sctp.h> -#include <net/sctp/sm.h> - -MODULE_SOFTDEP("pre: sctp"); -MODULE_AUTHOR("Wei Yongjun <yjwei@cn.fujitsu.com>"); -MODULE_DESCRIPTION("SCTP snooper"); -MODULE_LICENSE("GPL"); - -static int port __read_mostly = 0; -MODULE_PARM_DESC(port, "Port to match (0=all)"); -module_param(port, int, 0); - -static unsigned int fwmark __read_mostly = 0; -MODULE_PARM_DESC(fwmark, "skb mark to match (0=no mark)"); -module_param(fwmark, uint, 0); - -static int bufsize __read_mostly = 64 * 1024; -MODULE_PARM_DESC(bufsize, "Log buffer size (default 64k)"); -module_param(bufsize, int, 0); - -static int full __read_mostly = 1; -MODULE_PARM_DESC(full, "Full log (1=every ack packet received, 0=only cwnd changes)"); -module_param(full, int, 0); - -static const char procname[] = "sctpprobe"; - -static struct { - struct kfifo fifo; - spinlock_t lock; - wait_queue_head_t wait; - struct timespec64 tstart; -} sctpw; - -static __printf(1, 2) void printl(const char *fmt, ...) -{ - va_list args; - int len; - char tbuf[256]; - - va_start(args, fmt); - len = vscnprintf(tbuf, sizeof(tbuf), fmt, args); - va_end(args); - - kfifo_in_locked(&sctpw.fifo, tbuf, len, &sctpw.lock); - wake_up(&sctpw.wait); -} - -static int sctpprobe_open(struct inode *inode, struct file *file) -{ - kfifo_reset(&sctpw.fifo); - ktime_get_ts64(&sctpw.tstart); - - return 0; -} - -static ssize_t sctpprobe_read(struct file *file, char __user *buf, - size_t len, loff_t *ppos) -{ - int error = 0, cnt = 0; - unsigned char *tbuf; - - if (!buf) - return -EINVAL; - - if (len == 0) - return 0; - - tbuf = vmalloc(len); - if (!tbuf) - return -ENOMEM; - - error = wait_event_interruptible(sctpw.wait, - kfifo_len(&sctpw.fifo) != 0); - if (error) - goto out_free; - - cnt = kfifo_out_locked(&sctpw.fifo, tbuf, len, &sctpw.lock); - error = copy_to_user(buf, tbuf, cnt) ? -EFAULT : 0; - -out_free: - vfree(tbuf); - - return error ? error : cnt; -} - -static const struct file_operations sctpprobe_fops = { - .owner = THIS_MODULE, - .open = sctpprobe_open, - .read = sctpprobe_read, - .llseek = noop_llseek, -}; - -static enum sctp_disposition jsctp_sf_eat_sack( - struct net *net, - const struct sctp_endpoint *ep, - const struct sctp_association *asoc, - const union sctp_subtype type, - void *arg, - struct sctp_cmd_seq *commands) -{ - struct sctp_chunk *chunk = arg; - struct sk_buff *skb = chunk->skb; - struct sctp_transport *sp; - static __u32 lcwnd = 0; - struct timespec64 now; - - sp = asoc->peer.primary_path; - - if (((port == 0 && fwmark == 0) || - asoc->peer.port == port || - ep->base.bind_addr.port == port || - (fwmark > 0 && skb->mark == fwmark)) && - (full || sp->cwnd != lcwnd)) { - lcwnd = sp->cwnd; - - ktime_get_ts64(&now); - now = timespec64_sub(now, sctpw.tstart); - - printl("%lu.%06lu ", (unsigned long) now.tv_sec, - (unsigned long) now.tv_nsec / NSEC_PER_USEC); - - printl("%p %5d %5d %5d %8d %5d ", asoc, - ep->base.bind_addr.port, asoc->peer.port, - asoc->pathmtu, asoc->peer.rwnd, asoc->unack_data); - - list_for_each_entry(sp, &asoc->peer.transport_addr_list, - transports) { - if (sp == asoc->peer.primary_path) - printl("*"); - - printl("%pISc %2u %8u %8u %8u %8u %8u ", - &sp->ipaddr, sp->state, sp->cwnd, sp->ssthresh, - sp->flight_size, sp->partial_bytes_acked, - sp->pathmtu); - } - printl("\n"); - } - - jprobe_return(); - return 0; -} - -static struct jprobe sctp_recv_probe = { - .kp = { - .symbol_name = "sctp_sf_eat_sack_6_2", - }, - .entry = jsctp_sf_eat_sack, -}; - -static __init int sctp_setup_jprobe(void) -{ - int ret = register_jprobe(&sctp_recv_probe); - - if (ret) { - if (request_module("sctp")) - goto out; - ret = register_jprobe(&sctp_recv_probe); - } - -out: - return ret; -} - -static __init int sctpprobe_init(void) -{ - int ret = -ENOMEM; - - /* Warning: if the function signature of sctp_sf_eat_sack_6_2, - * has been changed, you also have to change the signature of - * jsctp_sf_eat_sack, otherwise you end up right here! - */ - BUILD_BUG_ON(__same_type(sctp_sf_eat_sack_6_2, - jsctp_sf_eat_sack) == 0); - - init_waitqueue_head(&sctpw.wait); - spin_lock_init(&sctpw.lock); - if (kfifo_alloc(&sctpw.fifo, bufsize, GFP_KERNEL)) - return ret; - - if (!proc_create(procname, S_IRUSR, init_net.proc_net, - &sctpprobe_fops)) - goto free_kfifo; - - ret = sctp_setup_jprobe(); - if (ret) - goto remove_proc; - - pr_info("probe registered (port=%d/fwmark=%u) bufsize=%u\n", - port, fwmark, bufsize); - return 0; - -remove_proc: - remove_proc_entry(procname, init_net.proc_net); -free_kfifo: - kfifo_free(&sctpw.fifo); - return ret; -} - -static __exit void sctpprobe_exit(void) -{ - kfifo_free(&sctpw.fifo); - remove_proc_entry(procname, init_net.proc_net); - unregister_jprobe(&sctp_recv_probe); -} - -module_init(sctpprobe_init); -module_exit(sctpprobe_exit); diff --git a/net/sctp/proc.c b/net/sctp/proc.c index 26b4be6b4172..537545ebcb0e 100644 --- a/net/sctp/proc.c +++ b/net/sctp/proc.c @@ -95,7 +95,6 @@ static int sctp_snmp_seq_open(struct inode *inode, struct file *file) } static const struct file_operations sctp_snmp_seq_fops = { - .owner = THIS_MODULE, .open = sctp_snmp_seq_open, .read = seq_read, .llseek = seq_lseek, @@ -288,12 +287,8 @@ struct sctp_ht_iter { static void *sctp_transport_seq_start(struct seq_file *seq, loff_t *pos) { struct sctp_ht_iter *iter = seq->private; - int err = sctp_transport_walk_start(&iter->hti); - if (err) { - iter->start_fail = 1; - return ERR_PTR(err); - } + sctp_transport_walk_start(&iter->hti); iter->start_fail = 0; return sctp_transport_get_idx(seq_file_net(seq), &iter->hti, *pos); diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 9bf575f2e8ed..793b05ec692b 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -228,7 +228,7 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc, struct sctp_inithdr init; union sctp_params addrs; struct sctp_sock *sp; - __u8 extensions[4]; + __u8 extensions[5]; size_t chunksize; __be16 types[2]; int num_ext = 0; @@ -278,6 +278,11 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc, if (sp->adaptation_ind) chunksize += sizeof(aiparam); + if (sp->strm_interleave) { + extensions[num_ext] = SCTP_CID_I_DATA; + num_ext += 1; + } + chunksize += vparam_len; /* Account for AUTH related parameters */ @@ -392,7 +397,7 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc, struct sctp_inithdr initack; union sctp_params addrs; struct sctp_sock *sp; - __u8 extensions[4]; + __u8 extensions[5]; size_t chunksize; int num_ext = 0; int cookie_len; @@ -442,6 +447,11 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc, if (sp->adaptation_ind) chunksize += sizeof(aiparam); + if (asoc->intl_enable) { + extensions[num_ext] = SCTP_CID_I_DATA; + num_ext += 1; + } + if (asoc->peer.auth_capable) { auth_random = (struct sctp_paramhdr *)asoc->c.auth_random; chunksize += ntohs(auth_random->length); @@ -711,38 +721,31 @@ nodata: /* Make a DATA chunk for the given association from the provided * parameters. However, do not populate the data payload. */ -struct sctp_chunk *sctp_make_datafrag_empty(struct sctp_association *asoc, +struct sctp_chunk *sctp_make_datafrag_empty(const struct sctp_association *asoc, const struct sctp_sndrcvinfo *sinfo, - int data_len, __u8 flags, __u16 ssn, - gfp_t gfp) + int len, __u8 flags, gfp_t gfp) { struct sctp_chunk *retval; struct sctp_datahdr dp; - int chunk_len; /* We assign the TSN as LATE as possible, not here when * creating the chunk. */ - dp.tsn = 0; + memset(&dp, 0, sizeof(dp)); + dp.ppid = sinfo->sinfo_ppid; dp.stream = htons(sinfo->sinfo_stream); - dp.ppid = sinfo->sinfo_ppid; /* Set the flags for an unordered send. */ - if (sinfo->sinfo_flags & SCTP_UNORDERED) { + if (sinfo->sinfo_flags & SCTP_UNORDERED) flags |= SCTP_DATA_UNORDERED; - dp.ssn = 0; - } else - dp.ssn = htons(ssn); - chunk_len = sizeof(dp) + data_len; - retval = sctp_make_data(asoc, flags, chunk_len, gfp); + retval = sctp_make_data(asoc, flags, sizeof(dp) + len, gfp); if (!retval) - goto nodata; + return NULL; retval->subh.data_hdr = sctp_addto_chunk(retval, sizeof(dp), &dp); memcpy(&retval->sinfo, sinfo, sizeof(struct sctp_sndrcvinfo)); -nodata: return retval; } @@ -1273,7 +1276,6 @@ struct sctp_chunk *sctp_make_auth(const struct sctp_association *asoc) struct sctp_authhdr auth_hdr; struct sctp_hmac *hmac_desc; struct sctp_chunk *retval; - __u8 *hmac; /* Get the first hmac that the peer told us to use */ hmac_desc = sctp_auth_asoc_get_hmac(asoc); @@ -1292,7 +1294,7 @@ struct sctp_chunk *sctp_make_auth(const struct sctp_association *asoc) retval->subh.auth_hdr = sctp_addto_chunk(retval, sizeof(auth_hdr), &auth_hdr); - hmac = skb_put_zero(retval->skb, hmac_desc->hmac_len); + skb_put_zero(retval->skb, hmac_desc->hmac_len); /* Adjust the chunk header to include the empty MAC */ retval->chunk_hdr->length = @@ -1415,6 +1417,12 @@ static struct sctp_chunk *sctp_make_data(const struct sctp_association *asoc, return _sctp_make_chunk(asoc, SCTP_CID_DATA, flags, paylen, gfp); } +struct sctp_chunk *sctp_make_idata(const struct sctp_association *asoc, + __u8 flags, int paylen, gfp_t gfp) +{ + return _sctp_make_chunk(asoc, SCTP_CID_I_DATA, flags, paylen, gfp); +} + static struct sctp_chunk *sctp_make_control(const struct sctp_association *asoc, __u8 type, __u8 flags, int paylen, gfp_t gfp) @@ -2032,6 +2040,10 @@ static void sctp_process_ext_param(struct sctp_association *asoc, if (net->sctp.addip_enable) asoc->peer.asconf_capable = 1; break; + case SCTP_CID_I_DATA: + if (sctp_sk(asoc->base.sk)->strm_interleave) + asoc->intl_enable = 1; + break; default: break; } @@ -3523,6 +3535,30 @@ struct sctp_chunk *sctp_make_fwdtsn(const struct sctp_association *asoc, return retval; } +struct sctp_chunk *sctp_make_ifwdtsn(const struct sctp_association *asoc, + __u32 new_cum_tsn, size_t nstreams, + struct sctp_ifwdtsn_skip *skiplist) +{ + struct sctp_chunk *retval = NULL; + struct sctp_ifwdtsn_hdr ftsn_hdr; + size_t hint; + + hint = (nstreams + 1) * sizeof(__u32); + + retval = sctp_make_control(asoc, SCTP_CID_I_FWD_TSN, 0, hint, + GFP_ATOMIC); + if (!retval) + return NULL; + + ftsn_hdr.new_cum_tsn = htonl(new_cum_tsn); + retval->subh.ifwdtsn_hdr = + sctp_addto_chunk(retval, sizeof(ftsn_hdr), &ftsn_hdr); + + sctp_addto_chunk(retval, nstreams * sizeof(skiplist[0]), skiplist); + + return retval; +} + /* RE-CONFIG 3.1 (RE-CONFIG chunk) * 0 1 2 3 * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index df94d77401e7..b71e7fb0a20a 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -632,7 +632,7 @@ static void sctp_cmd_assoc_failed(struct sctp_cmd_seq *commands, struct sctp_chunk *abort; /* Cancel any partial delivery in progress. */ - sctp_ulpq_abort_pd(&asoc->ulpq, GFP_ATOMIC); + asoc->stream.si->abort_pd(&asoc->ulpq, GFP_ATOMIC); if (event_type == SCTP_EVENT_T_CHUNK && subtype.chunk == SCTP_CID_ABORT) event = sctp_ulpevent_make_assoc_change(asoc, 0, SCTP_COMM_LOST, @@ -878,12 +878,12 @@ static void sctp_cmd_new_state(struct sctp_cmd_seq *cmds, * successfully completed a connect() call. */ if (sctp_state(asoc, ESTABLISHED) && sctp_sstate(sk, CLOSED)) - sk->sk_state = SCTP_SS_ESTABLISHED; + inet_sk_set_state(sk, SCTP_SS_ESTABLISHED); /* Set the RCV_SHUTDOWN flag when a SHUTDOWN is received. */ if (sctp_state(asoc, SHUTDOWN_RECEIVED) && sctp_sstate(sk, ESTABLISHED)) { - sk->sk_state = SCTP_SS_CLOSING; + inet_sk_set_state(sk, SCTP_SS_CLOSING); sk->sk_shutdown |= RCV_SHUTDOWN; } } @@ -972,7 +972,7 @@ static void sctp_cmd_process_operr(struct sctp_cmd_seq *cmds, if (!ev) return; - sctp_ulpq_tail_event(&asoc->ulpq, ev); + asoc->stream.si->enqueue_event(&asoc->ulpq, ev); switch (err_hdr->cause) { case SCTP_ERROR_UNKNOWN_CHUNK: @@ -1007,18 +1007,6 @@ static void sctp_cmd_process_operr(struct sctp_cmd_seq *cmds, } } -/* Process variable FWDTSN chunk information. */ -static void sctp_cmd_process_fwdtsn(struct sctp_ulpq *ulpq, - struct sctp_chunk *chunk) -{ - struct sctp_fwdtsn_skip *skip; - - /* Walk through all the skipped SSNs */ - sctp_walk_fwdtsn(skip, chunk) { - sctp_ulpq_skip(ulpq, ntohs(skip->stream), ntohs(skip->ssn)); - } -} - /* Helper function to remove the association non-primary peer * transports. */ @@ -1058,7 +1046,7 @@ static void sctp_cmd_assoc_change(struct sctp_cmd_seq *commands, asoc->c.sinit_max_instreams, NULL, GFP_ATOMIC); if (ev) - sctp_ulpq_tail_event(&asoc->ulpq, ev); + asoc->stream.si->enqueue_event(&asoc->ulpq, ev); } /* Helper function to generate an adaptation indication event */ @@ -1070,7 +1058,7 @@ static void sctp_cmd_adaptation_ind(struct sctp_cmd_seq *commands, ev = sctp_ulpevent_make_adaptation_indication(asoc, GFP_ATOMIC); if (ev) - sctp_ulpq_tail_event(&asoc->ulpq, ev); + asoc->stream.si->enqueue_event(&asoc->ulpq, ev); } @@ -1368,18 +1356,12 @@ static int sctp_cmd_interpreter(enum sctp_event event_type, break; case SCTP_CMD_REPORT_FWDTSN: - /* Move the Cumulattive TSN Ack ahead. */ - sctp_tsnmap_skip(&asoc->peer.tsn_map, cmd->obj.u32); - - /* purge the fragmentation queue */ - sctp_ulpq_reasm_flushtsn(&asoc->ulpq, cmd->obj.u32); - - /* Abort any in progress partial delivery. */ - sctp_ulpq_abort_pd(&asoc->ulpq, GFP_ATOMIC); + asoc->stream.si->report_ftsn(&asoc->ulpq, cmd->obj.u32); break; case SCTP_CMD_PROCESS_FWDTSN: - sctp_cmd_process_fwdtsn(&asoc->ulpq, cmd->obj.chunk); + asoc->stream.si->handle_ftsn(&asoc->ulpq, + cmd->obj.chunk); break; case SCTP_CMD_GEN_SACK: @@ -1483,8 +1465,9 @@ static int sctp_cmd_interpreter(enum sctp_event event_type, pr_debug("%s: sm_sideff: chunk_up:%p, ulpq:%p\n", __func__, cmd->obj.chunk, &asoc->ulpq); - sctp_ulpq_tail_data(&asoc->ulpq, cmd->obj.chunk, - GFP_ATOMIC); + asoc->stream.si->ulpevent_data(&asoc->ulpq, + cmd->obj.chunk, + GFP_ATOMIC); break; case SCTP_CMD_EVENT_ULP: @@ -1492,7 +1475,8 @@ static int sctp_cmd_interpreter(enum sctp_event event_type, pr_debug("%s: sm_sideff: event_up:%p, ulpq:%p\n", __func__, cmd->obj.ulpevent, &asoc->ulpq); - sctp_ulpq_tail_event(&asoc->ulpq, cmd->obj.ulpevent); + asoc->stream.si->enqueue_event(&asoc->ulpq, + cmd->obj.ulpevent); break; case SCTP_CMD_REPLY: @@ -1729,12 +1713,13 @@ static int sctp_cmd_interpreter(enum sctp_event event_type, break; case SCTP_CMD_PART_DELIVER: - sctp_ulpq_partial_delivery(&asoc->ulpq, GFP_ATOMIC); + asoc->stream.si->start_pd(&asoc->ulpq, GFP_ATOMIC); break; case SCTP_CMD_RENEGE: - sctp_ulpq_renege(&asoc->ulpq, cmd->obj.chunk, - GFP_ATOMIC); + asoc->stream.si->renege_events(&asoc->ulpq, + cmd->obj.chunk, + GFP_ATOMIC); break; case SCTP_CMD_SETUP_T4: diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 8f8ccded13e4..eb7905ffe5f2 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -59,6 +59,9 @@ #include <net/sctp/sm.h> #include <net/sctp/structs.h> +#define CREATE_TRACE_POINTS +#include <trace/events/sctp.h> + static struct sctp_packet *sctp_abort_pkt_new( struct net *net, const struct sctp_endpoint *ep, @@ -3013,7 +3016,7 @@ enum sctp_disposition sctp_sf_eat_data_6_2(struct net *net, return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); } - if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_data_chunk))) + if (!sctp_chunk_length_valid(chunk, sctp_datachk_len(&asoc->stream))) return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); @@ -3034,7 +3037,7 @@ enum sctp_disposition sctp_sf_eat_data_6_2(struct net *net, case SCTP_IERROR_PROTO_VIOLATION: return sctp_sf_abort_violation(net, ep, asoc, chunk, commands, (u8 *)chunk->subh.data_hdr, - sizeof(struct sctp_datahdr)); + sctp_datahdr_len(&asoc->stream)); default: BUG(); } @@ -3133,7 +3136,7 @@ enum sctp_disposition sctp_sf_eat_data_fast_4_4( return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); } - if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_data_chunk))) + if (!sctp_chunk_length_valid(chunk, sctp_datachk_len(&asoc->stream))) return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); @@ -3150,7 +3153,7 @@ enum sctp_disposition sctp_sf_eat_data_fast_4_4( case SCTP_IERROR_PROTO_VIOLATION: return sctp_sf_abort_violation(net, ep, asoc, chunk, commands, (u8 *)chunk->subh.data_hdr, - sizeof(struct sctp_datahdr)); + sctp_datahdr_len(&asoc->stream)); default: BUG(); } @@ -3219,6 +3222,8 @@ enum sctp_disposition sctp_sf_eat_sack_6_2(struct net *net, struct sctp_sackhdr *sackh; __u32 ctsn; + trace_sctp_probe(ep, asoc, chunk); + if (!sctp_vtag_verify(chunk, asoc)) return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); @@ -3957,7 +3962,6 @@ enum sctp_disposition sctp_sf_eat_fwd_tsn(struct net *net, { struct sctp_fwdtsn_hdr *fwdtsn_hdr; struct sctp_chunk *chunk = arg; - struct sctp_fwdtsn_skip *skip; __u16 len; __u32 tsn; @@ -3971,7 +3975,7 @@ enum sctp_disposition sctp_sf_eat_fwd_tsn(struct net *net, return sctp_sf_unk_chunk(net, ep, asoc, type, arg, commands); /* Make sure that the FORWARD_TSN chunk has valid length. */ - if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_fwdtsn_chunk))) + if (!sctp_chunk_length_valid(chunk, sctp_ftsnchk_len(&asoc->stream))) return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); @@ -3990,14 +3994,11 @@ enum sctp_disposition sctp_sf_eat_fwd_tsn(struct net *net, if (sctp_tsnmap_check(&asoc->peer.tsn_map, tsn) < 0) goto discard_noforce; - /* Silently discard the chunk if stream-id is not valid */ - sctp_walk_fwdtsn(skip, chunk) { - if (ntohs(skip->stream) >= asoc->stream.incnt) - goto discard_noforce; - } + if (!asoc->stream.si->validate_ftsn(chunk)) + goto discard_noforce; sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_FWDTSN, SCTP_U32(tsn)); - if (len > sizeof(struct sctp_fwdtsn_hdr)) + if (len > sctp_ftsnhdr_len(&asoc->stream)) sctp_add_cmd_sf(commands, SCTP_CMD_PROCESS_FWDTSN, SCTP_CHUNK(chunk)); @@ -4028,7 +4029,6 @@ enum sctp_disposition sctp_sf_eat_fwd_tsn_fast( { struct sctp_fwdtsn_hdr *fwdtsn_hdr; struct sctp_chunk *chunk = arg; - struct sctp_fwdtsn_skip *skip; __u16 len; __u32 tsn; @@ -4042,7 +4042,7 @@ enum sctp_disposition sctp_sf_eat_fwd_tsn_fast( return sctp_sf_unk_chunk(net, ep, asoc, type, arg, commands); /* Make sure that the FORWARD_TSN chunk has a valid length. */ - if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_fwdtsn_chunk))) + if (!sctp_chunk_length_valid(chunk, sctp_ftsnchk_len(&asoc->stream))) return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); @@ -4061,14 +4061,11 @@ enum sctp_disposition sctp_sf_eat_fwd_tsn_fast( if (sctp_tsnmap_check(&asoc->peer.tsn_map, tsn) < 0) goto gen_shutdown; - /* Silently discard the chunk if stream-id is not valid */ - sctp_walk_fwdtsn(skip, chunk) { - if (ntohs(skip->stream) >= asoc->stream.incnt) - goto gen_shutdown; - } + if (!asoc->stream.si->validate_ftsn(chunk)) + goto gen_shutdown; sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_FWDTSN, SCTP_U32(tsn)); - if (len > sizeof(struct sctp_fwdtsn_hdr)) + if (len > sctp_ftsnhdr_len(&asoc->stream)) sctp_add_cmd_sf(commands, SCTP_CMD_PROCESS_FWDTSN, SCTP_CHUNK(chunk)); @@ -6244,14 +6241,12 @@ static int sctp_eat_data(const struct sctp_association *asoc, struct sctp_chunk *err; enum sctp_verb deliver; size_t datalen; - u8 ordered = 0; - u16 ssn, sid; __u32 tsn; int tmp; data_hdr = (struct sctp_datahdr *)chunk->skb->data; chunk->subh.data_hdr = data_hdr; - skb_pull(chunk->skb, sizeof(*data_hdr)); + skb_pull(chunk->skb, sctp_datahdr_len(&asoc->stream)); tsn = ntohl(data_hdr->tsn); pr_debug("%s: TSN 0x%x\n", __func__, tsn); @@ -6299,7 +6294,7 @@ static int sctp_eat_data(const struct sctp_association *asoc, * Actually, allow a little bit of overflow (up to a MTU). */ datalen = ntohs(chunk->chunk_hdr->length); - datalen -= sizeof(struct sctp_data_chunk); + datalen -= sctp_datachk_len(&asoc->stream); deliver = SCTP_CMD_CHUNK_ULP; @@ -6394,7 +6389,6 @@ static int sctp_eat_data(const struct sctp_association *asoc, SCTP_INC_STATS(net, SCTP_MIB_INORDERCHUNKS); if (chunk->asoc) chunk->asoc->stats.iodchunks++; - ordered = 1; } /* RFC 2960 6.5 Stream Identifier and Stream Sequence Number @@ -6405,8 +6399,7 @@ static int sctp_eat_data(const struct sctp_association *asoc, * with cause set to "Invalid Stream Identifier" (See Section 3.3.10) * and discard the DATA chunk. */ - sid = ntohs(data_hdr->stream); - if (sid >= asoc->stream.incnt) { + if (ntohs(data_hdr->stream) >= asoc->stream.incnt) { /* Mark tsn as received even though we drop it */ sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_TSN, SCTP_U32(tsn)); @@ -6427,8 +6420,7 @@ static int sctp_eat_data(const struct sctp_association *asoc, * SSN is smaller then the next expected one. If it is, it wrapped * and is invalid. */ - ssn = ntohs(data_hdr->ssn); - if (ordered && SSN_lt(ssn, sctp_ssn_peek(&asoc->stream, in, sid))) + if (!asoc->stream.si->validate_data(chunk)) return SCTP_IERROR_PROTO_VIOLATION; /* Send the data up to the user. Note: Schedule the diff --git a/net/sctp/sm_statetable.c b/net/sctp/sm_statetable.c index 79b6bee5b768..691d9dc620e3 100644 --- a/net/sctp/sm_statetable.c +++ b/net/sctp/sm_statetable.c @@ -985,11 +985,14 @@ static const struct sctp_sm_table_entry *sctp_chunk_event_lookup( if (state > SCTP_STATE_MAX) return &bug; + if (cid == SCTP_CID_I_DATA) + cid = SCTP_CID_DATA; + if (cid <= SCTP_CID_BASE_MAX) return &chunk_event_table[cid][state]; if (net->sctp.prsctp_enable) { - if (cid == SCTP_CID_FWD_TSN) + if (cid == SCTP_CID_FWD_TSN || cid == SCTP_CID_I_FWD_TSN) return &prsctp_chunk_event_table[0][state]; } diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 737e551fbf67..356e387f82e7 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -201,6 +201,22 @@ static void sctp_for_each_tx_datachunk(struct sctp_association *asoc, cb(chunk); } +static void sctp_for_each_rx_skb(struct sctp_association *asoc, struct sock *sk, + void (*cb)(struct sk_buff *, struct sock *)) + +{ + struct sk_buff *skb, *tmp; + + sctp_skb_for_each(skb, &asoc->ulpq.lobby, tmp) + cb(skb, sk); + + sctp_skb_for_each(skb, &asoc->ulpq.reasm, tmp) + cb(skb, sk); + + sctp_skb_for_each(skb, &asoc->ulpq.reasm_uo, tmp) + cb(skb, sk); +} + /* Verify that this is a valid address. */ static inline int sctp_verify_addr(struct sock *sk, union sctp_addr *addr, int len) @@ -1489,7 +1505,7 @@ static void sctp_close(struct sock *sk, long timeout) lock_sock_nested(sk, SINGLE_DEPTH_NESTING); sk->sk_shutdown = SHUTDOWN_MASK; - sk->sk_state = SCTP_SS_CLOSING; + inet_sk_set_state(sk, SCTP_SS_CLOSING); ep = sctp_sk(sk)->ep; @@ -1515,6 +1531,7 @@ static void sctp_close(struct sock *sk, long timeout) if (data_was_unread || !skb_queue_empty(&asoc->ulpq.lobby) || !skb_queue_empty(&asoc->ulpq.reasm) || + !skb_queue_empty(&asoc->ulpq.reasm_uo) || (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime)) { struct sctp_chunk *chunk; @@ -1969,7 +1986,20 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len) if (err < 0) goto out_free; - wait_connect = true; + /* If stream interleave is enabled, wait_connect has to be + * done earlier than data enqueue, as it needs to make data + * or idata according to asoc->intl_enable which is set + * after connection is done. + */ + if (sctp_sk(asoc->base.sk)->strm_interleave) { + timeo = sock_sndtimeo(sk, 0); + err = sctp_wait_for_connect(asoc, &timeo); + if (err) + goto out_unlock; + } else { + wait_connect = true; + } + pr_debug("%s: we associated primitively\n", __func__); } @@ -2248,7 +2278,7 @@ static int sctp_setsockopt_events(struct sock *sk, char __user *optval, if (!event) return -ENOMEM; - sctp_ulpq_tail_event(&asoc->ulpq, event); + asoc->stream.si->enqueue_event(&asoc->ulpq, event); } } @@ -3147,7 +3177,7 @@ static int sctp_setsockopt_maxseg(struct sock *sk, char __user *optval, unsigned if (val == 0) { val = asoc->pathmtu - sp->pf->af->net_header_len; val -= sizeof(struct sctphdr) + - sizeof(struct sctp_data_chunk); + sctp_datachk_len(&asoc->stream); } asoc->user_frag = val; asoc->frag_point = sctp_frag_point(asoc, asoc->pathmtu); @@ -3317,7 +3347,10 @@ static int sctp_setsockopt_fragment_interleave(struct sock *sk, if (get_user(val, (int __user *)optval)) return -EFAULT; - sctp_sk(sk)->frag_interleave = (val == 0) ? 0 : 1; + sctp_sk(sk)->frag_interleave = !!val; + + if (!sctp_sk(sk)->frag_interleave) + sctp_sk(sk)->strm_interleave = 0; return 0; } @@ -4000,6 +4033,40 @@ out: return retval; } +static int sctp_setsockopt_interleaving_supported(struct sock *sk, + char __user *optval, + unsigned int optlen) +{ + struct sctp_sock *sp = sctp_sk(sk); + struct net *net = sock_net(sk); + struct sctp_assoc_value params; + int retval = -EINVAL; + + if (optlen < sizeof(params)) + goto out; + + optlen = sizeof(params); + if (copy_from_user(¶ms, optval, optlen)) { + retval = -EFAULT; + goto out; + } + + if (params.assoc_id) + goto out; + + if (!net->sctp.intl_enable || !sp->frag_interleave) { + retval = -EPERM; + goto out; + } + + sp->strm_interleave = !!params.assoc_value; + + retval = 0; + +out: + return retval; +} + /* API 6.2 setsockopt(), getsockopt() * * Applications use setsockopt() and getsockopt() to set or retrieve @@ -4187,6 +4254,10 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname, case SCTP_STREAM_SCHEDULER_VALUE: retval = sctp_setsockopt_scheduler_value(sk, optval, optlen); break; + case SCTP_INTERLEAVING_SUPPORTED: + retval = sctp_setsockopt_interleaving_supported(sk, optval, + optlen); + break; default: retval = -ENOPROTOOPT; break; @@ -4563,7 +4634,7 @@ static void sctp_shutdown(struct sock *sk, int how) if (how & SEND_SHUTDOWN && !list_empty(&ep->asocs)) { struct sctp_association *asoc; - sk->sk_state = SCTP_SS_CLOSING; + inet_sk_set_state(sk, SCTP_SS_CLOSING); asoc = list_entry(ep->asocs.next, struct sctp_association, asocs); sctp_primitive_SHUTDOWN(net, asoc, NULL); @@ -4657,20 +4728,11 @@ int sctp_get_sctp_info(struct sock *sk, struct sctp_association *asoc, EXPORT_SYMBOL_GPL(sctp_get_sctp_info); /* use callback to avoid exporting the core structure */ -int sctp_transport_walk_start(struct rhashtable_iter *iter) +void sctp_transport_walk_start(struct rhashtable_iter *iter) { - int err; - rhltable_walk_enter(&sctp_transport_hashtable, iter); - err = rhashtable_walk_start(iter); - if (err && err != -EAGAIN) { - rhashtable_walk_stop(iter); - rhashtable_walk_exit(iter); - return err; - } - - return 0; + rhashtable_walk_start(iter); } void sctp_transport_walk_stop(struct rhashtable_iter *iter) @@ -4764,9 +4826,8 @@ int sctp_for_each_transport(int (*cb)(struct sctp_transport *, void *), int ret; again: - ret = sctp_transport_walk_start(&hti); - if (ret) - return ret; + ret = 0; + sctp_transport_walk_start(&hti); tsp = sctp_transport_get_idx(net, &hti, *pos + 1); for (; !IS_ERR_OR_NULL(tsp); tsp = sctp_transport_get_next(net, &hti)) { @@ -6965,6 +7026,47 @@ out: return retval; } +static int sctp_getsockopt_interleaving_supported(struct sock *sk, int len, + char __user *optval, + int __user *optlen) +{ + struct sctp_assoc_value params; + struct sctp_association *asoc; + int retval = -EFAULT; + + if (len < sizeof(params)) { + retval = -EINVAL; + goto out; + } + + len = sizeof(params); + if (copy_from_user(¶ms, optval, len)) + goto out; + + asoc = sctp_id2assoc(sk, params.assoc_id); + if (asoc) { + params.assoc_value = asoc->intl_enable; + } else if (!params.assoc_id) { + struct sctp_sock *sp = sctp_sk(sk); + + params.assoc_value = sp->strm_interleave; + } else { + retval = -EINVAL; + goto out; + } + + if (put_user(len, optlen)) + goto out; + + if (copy_to_user(optval, ¶ms, len)) + goto out; + + retval = 0; + +out: + return retval; +} + static int sctp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen) { @@ -7155,6 +7257,10 @@ static int sctp_getsockopt(struct sock *sk, int level, int optname, retval = sctp_getsockopt_scheduler_value(sk, len, optval, optlen); break; + case SCTP_INTERLEAVING_SUPPORTED: + retval = sctp_getsockopt_interleaving_supported(sk, len, optval, + optlen); + break; default: retval = -ENOPROTOOPT; break; @@ -7389,13 +7495,13 @@ static int sctp_listen_start(struct sock *sk, int backlog) * sockets. * */ - sk->sk_state = SCTP_SS_LISTENING; + inet_sk_set_state(sk, SCTP_SS_LISTENING); if (!ep->base.bind_addr.port) { if (sctp_autobind(sk)) return -EAGAIN; } else { if (sctp_get_port(sk, inet_sk(sk)->inet_num)) { - sk->sk_state = SCTP_SS_CLOSED; + inet_sk_set_state(sk, SCTP_SS_CLOSED); return -EADDRINUSE; } } @@ -8388,11 +8494,7 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk, } - sctp_skb_for_each(skb, &assoc->ulpq.reasm, tmp) - sctp_skb_set_owner_r_frag(skb, newsk); - - sctp_skb_for_each(skb, &assoc->ulpq.lobby, tmp) - sctp_skb_set_owner_r_frag(skb, newsk); + sctp_for_each_rx_skb(assoc, newsk, sctp_skb_set_owner_r_frag); /* Set the type of socket to indicate that it is peeled off from the * original UDP-style socket or created with the accept() call on a @@ -8418,10 +8520,10 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk, * is called, set RCV_SHUTDOWN flag. */ if (sctp_state(assoc, CLOSED) && sctp_style(newsk, TCP)) { - newsk->sk_state = SCTP_SS_CLOSED; + inet_sk_set_state(newsk, SCTP_SS_CLOSED); newsk->sk_shutdown |= RCV_SHUTDOWN; } else { - newsk->sk_state = SCTP_SS_ESTABLISHED; + inet_sk_set_state(newsk, SCTP_SS_ESTABLISHED); } release_sock(newsk); diff --git a/net/sctp/stream.c b/net/sctp/stream.c index 524dfeb94c41..cedf672487f9 100644 --- a/net/sctp/stream.c +++ b/net/sctp/stream.c @@ -167,6 +167,7 @@ int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt, sched->init(stream); in: + sctp_stream_interleave_init(stream); if (!incnt) goto out; @@ -213,11 +214,13 @@ void sctp_stream_clear(struct sctp_stream *stream) { int i; - for (i = 0; i < stream->outcnt; i++) - stream->out[i].ssn = 0; + for (i = 0; i < stream->outcnt; i++) { + stream->out[i].mid = 0; + stream->out[i].mid_uo = 0; + } for (i = 0; i < stream->incnt; i++) - stream->in[i].ssn = 0; + stream->in[i].mid = 0; } void sctp_stream_update(struct sctp_stream *stream, struct sctp_stream *new) @@ -604,10 +607,10 @@ struct sctp_chunk *sctp_process_strreset_outreq( } for (i = 0; i < nums; i++) - stream->in[ntohs(str_p[i])].ssn = 0; + stream->in[ntohs(str_p[i])].mid = 0; } else { for (i = 0; i < stream->incnt; i++) - stream->in[i].ssn = 0; + stream->in[i].mid = 0; } result = SCTP_STRRESET_PERFORMED; @@ -751,8 +754,7 @@ struct sctp_chunk *sctp_process_strreset_tsnreq( * performed. */ max_tsn_seen = sctp_tsnmap_get_max_tsn_seen(&asoc->peer.tsn_map); - sctp_ulpq_reasm_flushtsn(&asoc->ulpq, max_tsn_seen); - sctp_ulpq_abort_pd(&asoc->ulpq, GFP_ATOMIC); + asoc->stream.si->report_ftsn(&asoc->ulpq, max_tsn_seen); /* G1: Compute an appropriate value for the Receiver's Next TSN -- the * TSN that the peer should use to send the next DATA chunk. The @@ -781,10 +783,12 @@ struct sctp_chunk *sctp_process_strreset_tsnreq( /* G5: The next expected and outgoing SSNs MUST be reset to 0 for all * incoming and outgoing streams. */ - for (i = 0; i < stream->outcnt; i++) - stream->out[i].ssn = 0; + for (i = 0; i < stream->outcnt; i++) { + stream->out[i].mid = 0; + stream->out[i].mid_uo = 0; + } for (i = 0; i < stream->incnt; i++) - stream->in[i].ssn = 0; + stream->in[i].mid = 0; result = SCTP_STRRESET_PERFORMED; @@ -974,11 +978,15 @@ struct sctp_chunk *sctp_process_strreset_resp( if (result == SCTP_STRRESET_PERFORMED) { if (nums) { - for (i = 0; i < nums; i++) - stream->out[ntohs(str_p[i])].ssn = 0; + for (i = 0; i < nums; i++) { + stream->out[ntohs(str_p[i])].mid = 0; + stream->out[ntohs(str_p[i])].mid_uo = 0; + } } else { - for (i = 0; i < stream->outcnt; i++) - stream->out[i].ssn = 0; + for (i = 0; i < stream->outcnt; i++) { + stream->out[i].mid = 0; + stream->out[i].mid_uo = 0; + } } flags = SCTP_STREAM_RESET_OUTGOING_SSN; @@ -1021,8 +1029,7 @@ struct sctp_chunk *sctp_process_strreset_resp( &asoc->peer.tsn_map); LIST_HEAD(temp); - sctp_ulpq_reasm_flushtsn(&asoc->ulpq, mtsn); - sctp_ulpq_abort_pd(&asoc->ulpq, GFP_ATOMIC); + asoc->stream.si->report_ftsn(&asoc->ulpq, mtsn); sctp_tsnmap_init(&asoc->peer.tsn_map, SCTP_TSN_MAP_INITIAL, @@ -1040,10 +1047,12 @@ struct sctp_chunk *sctp_process_strreset_resp( asoc->ctsn_ack_point = asoc->next_tsn - 1; asoc->adv_peer_ack_point = asoc->ctsn_ack_point; - for (i = 0; i < stream->outcnt; i++) - stream->out[i].ssn = 0; + for (i = 0; i < stream->outcnt; i++) { + stream->out[i].mid = 0; + stream->out[i].mid_uo = 0; + } for (i = 0; i < stream->incnt; i++) - stream->in[i].ssn = 0; + stream->in[i].mid = 0; } for (i = 0; i < stream->outcnt; i++) diff --git a/net/sctp/stream_interleave.c b/net/sctp/stream_interleave.c new file mode 100644 index 000000000000..8c7cf8f08711 --- /dev/null +++ b/net/sctp/stream_interleave.c @@ -0,0 +1,1334 @@ +/* SCTP kernel implementation + * (C) Copyright Red Hat Inc. 2017 + * + * This file is part of the SCTP kernel implementation + * + * These functions manipulate sctp stream queue/scheduling. + * + * This SCTP implementation is free software; + * you can redistribute it and/or modify it under the terms of + * the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This SCTP implementation is distributed in the hope that it + * will be useful, but WITHOUT ANY WARRANTY; without even the implied + * ************************ + * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU CC; see the file COPYING. If not, see + * <http://www.gnu.org/licenses/>. + * + * Please send any bug reports or fixes you make to the + * email addresched(es): + * lksctp developers <linux-sctp@vger.kernel.org> + * + * Written or modified by: + * Xin Long <lucien.xin@gmail.com> + */ + +#include <net/busy_poll.h> +#include <net/sctp/sctp.h> +#include <net/sctp/sm.h> +#include <net/sctp/ulpevent.h> +#include <linux/sctp.h> + +static struct sctp_chunk *sctp_make_idatafrag_empty( + const struct sctp_association *asoc, + const struct sctp_sndrcvinfo *sinfo, + int len, __u8 flags, gfp_t gfp) +{ + struct sctp_chunk *retval; + struct sctp_idatahdr dp; + + memset(&dp, 0, sizeof(dp)); + dp.stream = htons(sinfo->sinfo_stream); + + if (sinfo->sinfo_flags & SCTP_UNORDERED) + flags |= SCTP_DATA_UNORDERED; + + retval = sctp_make_idata(asoc, flags, sizeof(dp) + len, gfp); + if (!retval) + return NULL; + + retval->subh.idata_hdr = sctp_addto_chunk(retval, sizeof(dp), &dp); + memcpy(&retval->sinfo, sinfo, sizeof(struct sctp_sndrcvinfo)); + + return retval; +} + +static void sctp_chunk_assign_mid(struct sctp_chunk *chunk) +{ + struct sctp_stream *stream; + struct sctp_chunk *lchunk; + __u32 cfsn = 0; + __u16 sid; + + if (chunk->has_mid) + return; + + sid = sctp_chunk_stream_no(chunk); + stream = &chunk->asoc->stream; + + list_for_each_entry(lchunk, &chunk->msg->chunks, frag_list) { + struct sctp_idatahdr *hdr; + __u32 mid; + + lchunk->has_mid = 1; + + hdr = lchunk->subh.idata_hdr; + + if (lchunk->chunk_hdr->flags & SCTP_DATA_FIRST_FRAG) + hdr->ppid = lchunk->sinfo.sinfo_ppid; + else + hdr->fsn = htonl(cfsn++); + + if (lchunk->chunk_hdr->flags & SCTP_DATA_UNORDERED) { + mid = lchunk->chunk_hdr->flags & SCTP_DATA_LAST_FRAG ? + sctp_mid_uo_next(stream, out, sid) : + sctp_mid_uo_peek(stream, out, sid); + } else { + mid = lchunk->chunk_hdr->flags & SCTP_DATA_LAST_FRAG ? + sctp_mid_next(stream, out, sid) : + sctp_mid_peek(stream, out, sid); + } + hdr->mid = htonl(mid); + } +} + +static bool sctp_validate_data(struct sctp_chunk *chunk) +{ + const struct sctp_stream *stream; + __u16 sid, ssn; + + if (chunk->chunk_hdr->type != SCTP_CID_DATA) + return false; + + if (chunk->chunk_hdr->flags & SCTP_DATA_UNORDERED) + return true; + + stream = &chunk->asoc->stream; + sid = sctp_chunk_stream_no(chunk); + ssn = ntohs(chunk->subh.data_hdr->ssn); + + return !SSN_lt(ssn, sctp_ssn_peek(stream, in, sid)); +} + +static bool sctp_validate_idata(struct sctp_chunk *chunk) +{ + struct sctp_stream *stream; + __u32 mid; + __u16 sid; + + if (chunk->chunk_hdr->type != SCTP_CID_I_DATA) + return false; + + if (chunk->chunk_hdr->flags & SCTP_DATA_UNORDERED) + return true; + + stream = &chunk->asoc->stream; + sid = sctp_chunk_stream_no(chunk); + mid = ntohl(chunk->subh.idata_hdr->mid); + + return !MID_lt(mid, sctp_mid_peek(stream, in, sid)); +} + +static void sctp_intl_store_reasm(struct sctp_ulpq *ulpq, + struct sctp_ulpevent *event) +{ + struct sctp_ulpevent *cevent; + struct sk_buff *pos; + + pos = skb_peek_tail(&ulpq->reasm); + if (!pos) { + __skb_queue_tail(&ulpq->reasm, sctp_event2skb(event)); + return; + } + + cevent = sctp_skb2event(pos); + + if (event->stream == cevent->stream && + event->mid == cevent->mid && + (cevent->msg_flags & SCTP_DATA_FIRST_FRAG || + (!(event->msg_flags & SCTP_DATA_FIRST_FRAG) && + event->fsn > cevent->fsn))) { + __skb_queue_tail(&ulpq->reasm, sctp_event2skb(event)); + return; + } + + if ((event->stream == cevent->stream && + MID_lt(cevent->mid, event->mid)) || + event->stream > cevent->stream) { + __skb_queue_tail(&ulpq->reasm, sctp_event2skb(event)); + return; + } + + skb_queue_walk(&ulpq->reasm, pos) { + cevent = sctp_skb2event(pos); + + if (event->stream < cevent->stream || + (event->stream == cevent->stream && + MID_lt(event->mid, cevent->mid))) + break; + + if (event->stream == cevent->stream && + event->mid == cevent->mid && + !(cevent->msg_flags & SCTP_DATA_FIRST_FRAG) && + (event->msg_flags & SCTP_DATA_FIRST_FRAG || + event->fsn < cevent->fsn)) + break; + } + + __skb_queue_before(&ulpq->reasm, pos, sctp_event2skb(event)); +} + +static struct sctp_ulpevent *sctp_intl_retrieve_partial( + struct sctp_ulpq *ulpq, + struct sctp_ulpevent *event) +{ + struct sk_buff *first_frag = NULL; + struct sk_buff *last_frag = NULL; + struct sctp_ulpevent *retval; + struct sctp_stream_in *sin; + struct sk_buff *pos; + __u32 next_fsn = 0; + int is_last = 0; + + sin = sctp_stream_in(ulpq->asoc, event->stream); + + skb_queue_walk(&ulpq->reasm, pos) { + struct sctp_ulpevent *cevent = sctp_skb2event(pos); + + if (cevent->stream < event->stream) + continue; + + if (cevent->stream > event->stream || + cevent->mid != sin->mid) + break; + + switch (cevent->msg_flags & SCTP_DATA_FRAG_MASK) { + case SCTP_DATA_FIRST_FRAG: + goto out; + case SCTP_DATA_MIDDLE_FRAG: + if (!first_frag) { + if (cevent->fsn == sin->fsn) { + first_frag = pos; + last_frag = pos; + next_fsn = cevent->fsn + 1; + } + } else if (cevent->fsn == next_fsn) { + last_frag = pos; + next_fsn++; + } else { + goto out; + } + break; + case SCTP_DATA_LAST_FRAG: + if (!first_frag) { + if (cevent->fsn == sin->fsn) { + first_frag = pos; + last_frag = pos; + next_fsn = 0; + is_last = 1; + } + } else if (cevent->fsn == next_fsn) { + last_frag = pos; + next_fsn = 0; + is_last = 1; + } + goto out; + default: + goto out; + } + } + +out: + if (!first_frag) + return NULL; + + retval = sctp_make_reassembled_event(sock_net(ulpq->asoc->base.sk), + &ulpq->reasm, first_frag, + last_frag); + if (retval) { + sin->fsn = next_fsn; + if (is_last) { + retval->msg_flags |= MSG_EOR; + sin->pd_mode = 0; + } + } + + return retval; +} + +static struct sctp_ulpevent *sctp_intl_retrieve_reassembled( + struct sctp_ulpq *ulpq, + struct sctp_ulpevent *event) +{ + struct sctp_association *asoc = ulpq->asoc; + struct sk_buff *pos, *first_frag = NULL; + struct sctp_ulpevent *retval = NULL; + struct sk_buff *pd_first = NULL; + struct sk_buff *pd_last = NULL; + struct sctp_stream_in *sin; + __u32 next_fsn = 0; + __u32 pd_point = 0; + __u32 pd_len = 0; + __u32 mid = 0; + + sin = sctp_stream_in(ulpq->asoc, event->stream); + + skb_queue_walk(&ulpq->reasm, pos) { + struct sctp_ulpevent *cevent = sctp_skb2event(pos); + + if (cevent->stream < event->stream) + continue; + if (cevent->stream > event->stream) + break; + + if (MID_lt(cevent->mid, event->mid)) + continue; + if (MID_lt(event->mid, cevent->mid)) + break; + + switch (cevent->msg_flags & SCTP_DATA_FRAG_MASK) { + case SCTP_DATA_FIRST_FRAG: + if (cevent->mid == sin->mid) { + pd_first = pos; + pd_last = pos; + pd_len = pos->len; + } + + first_frag = pos; + next_fsn = 0; + mid = cevent->mid; + break; + + case SCTP_DATA_MIDDLE_FRAG: + if (first_frag && cevent->mid == mid && + cevent->fsn == next_fsn) { + next_fsn++; + if (pd_first) { + pd_last = pos; + pd_len += pos->len; + } + } else { + first_frag = NULL; + } + break; + + case SCTP_DATA_LAST_FRAG: + if (first_frag && cevent->mid == mid && + cevent->fsn == next_fsn) + goto found; + else + first_frag = NULL; + break; + } + } + + if (!pd_first) + goto out; + + pd_point = sctp_sk(asoc->base.sk)->pd_point; + if (pd_point && pd_point <= pd_len) { + retval = sctp_make_reassembled_event(sock_net(asoc->base.sk), + &ulpq->reasm, + pd_first, pd_last); + if (retval) { + sin->fsn = next_fsn; + sin->pd_mode = 1; + } + } + goto out; + +found: + retval = sctp_make_reassembled_event(sock_net(asoc->base.sk), + &ulpq->reasm, + first_frag, pos); + if (retval) + retval->msg_flags |= MSG_EOR; + +out: + return retval; +} + +static struct sctp_ulpevent *sctp_intl_reasm(struct sctp_ulpq *ulpq, + struct sctp_ulpevent *event) +{ + struct sctp_ulpevent *retval = NULL; + struct sctp_stream_in *sin; + + if (SCTP_DATA_NOT_FRAG == (event->msg_flags & SCTP_DATA_FRAG_MASK)) { + event->msg_flags |= MSG_EOR; + return event; + } + + sctp_intl_store_reasm(ulpq, event); + + sin = sctp_stream_in(ulpq->asoc, event->stream); + if (sin->pd_mode && event->mid == sin->mid && + event->fsn == sin->fsn) + retval = sctp_intl_retrieve_partial(ulpq, event); + + if (!retval) + retval = sctp_intl_retrieve_reassembled(ulpq, event); + + return retval; +} + +static void sctp_intl_store_ordered(struct sctp_ulpq *ulpq, + struct sctp_ulpevent *event) +{ + struct sctp_ulpevent *cevent; + struct sk_buff *pos; + + pos = skb_peek_tail(&ulpq->lobby); + if (!pos) { + __skb_queue_tail(&ulpq->lobby, sctp_event2skb(event)); + return; + } + + cevent = (struct sctp_ulpevent *)pos->cb; + if (event->stream == cevent->stream && + MID_lt(cevent->mid, event->mid)) { + __skb_queue_tail(&ulpq->lobby, sctp_event2skb(event)); + return; + } + + if (event->stream > cevent->stream) { + __skb_queue_tail(&ulpq->lobby, sctp_event2skb(event)); + return; + } + + skb_queue_walk(&ulpq->lobby, pos) { + cevent = (struct sctp_ulpevent *)pos->cb; + + if (cevent->stream > event->stream) + break; + + if (cevent->stream == event->stream && + MID_lt(event->mid, cevent->mid)) + break; + } + + __skb_queue_before(&ulpq->lobby, pos, sctp_event2skb(event)); +} + +static void sctp_intl_retrieve_ordered(struct sctp_ulpq *ulpq, + struct sctp_ulpevent *event) +{ + struct sk_buff_head *event_list; + struct sctp_stream *stream; + struct sk_buff *pos, *tmp; + __u16 sid = event->stream; + + stream = &ulpq->asoc->stream; + event_list = (struct sk_buff_head *)sctp_event2skb(event)->prev; + + sctp_skb_for_each(pos, &ulpq->lobby, tmp) { + struct sctp_ulpevent *cevent = (struct sctp_ulpevent *)pos->cb; + + if (cevent->stream > sid) + break; + + if (cevent->stream < sid) + continue; + + if (cevent->mid != sctp_mid_peek(stream, in, sid)) + break; + + sctp_mid_next(stream, in, sid); + + __skb_unlink(pos, &ulpq->lobby); + + __skb_queue_tail(event_list, pos); + } +} + +static struct sctp_ulpevent *sctp_intl_order(struct sctp_ulpq *ulpq, + struct sctp_ulpevent *event) +{ + struct sctp_stream *stream; + __u16 sid; + + stream = &ulpq->asoc->stream; + sid = event->stream; + + if (event->mid != sctp_mid_peek(stream, in, sid)) { + sctp_intl_store_ordered(ulpq, event); + return NULL; + } + + sctp_mid_next(stream, in, sid); + + sctp_intl_retrieve_ordered(ulpq, event); + + return event; +} + +static int sctp_enqueue_event(struct sctp_ulpq *ulpq, + struct sctp_ulpevent *event) +{ + struct sk_buff *skb = sctp_event2skb(event); + struct sock *sk = ulpq->asoc->base.sk; + struct sctp_sock *sp = sctp_sk(sk); + struct sk_buff_head *skb_list; + + skb_list = (struct sk_buff_head *)skb->prev; + + if (sk->sk_shutdown & RCV_SHUTDOWN && + (sk->sk_shutdown & SEND_SHUTDOWN || + !sctp_ulpevent_is_notification(event))) + goto out_free; + + if (!sctp_ulpevent_is_notification(event)) { + sk_mark_napi_id(sk, skb); + sk_incoming_cpu_update(sk); + } + + if (!sctp_ulpevent_is_enabled(event, &sp->subscribe)) + goto out_free; + + if (skb_list) + skb_queue_splice_tail_init(skb_list, + &sk->sk_receive_queue); + else + __skb_queue_tail(&sk->sk_receive_queue, skb); + + if (!sp->data_ready_signalled) { + sp->data_ready_signalled = 1; + sk->sk_data_ready(sk); + } + + return 1; + +out_free: + if (skb_list) + sctp_queue_purge_ulpevents(skb_list); + else + sctp_ulpevent_free(event); + + return 0; +} + +static void sctp_intl_store_reasm_uo(struct sctp_ulpq *ulpq, + struct sctp_ulpevent *event) +{ + struct sctp_ulpevent *cevent; + struct sk_buff *pos; + + pos = skb_peek_tail(&ulpq->reasm_uo); + if (!pos) { + __skb_queue_tail(&ulpq->reasm_uo, sctp_event2skb(event)); + return; + } + + cevent = sctp_skb2event(pos); + + if (event->stream == cevent->stream && + event->mid == cevent->mid && + (cevent->msg_flags & SCTP_DATA_FIRST_FRAG || + (!(event->msg_flags & SCTP_DATA_FIRST_FRAG) && + event->fsn > cevent->fsn))) { + __skb_queue_tail(&ulpq->reasm_uo, sctp_event2skb(event)); + return; + } + + if ((event->stream == cevent->stream && + MID_lt(cevent->mid, event->mid)) || + event->stream > cevent->stream) { + __skb_queue_tail(&ulpq->reasm_uo, sctp_event2skb(event)); + return; + } + + skb_queue_walk(&ulpq->reasm_uo, pos) { + cevent = sctp_skb2event(pos); + + if (event->stream < cevent->stream || + (event->stream == cevent->stream && + MID_lt(event->mid, cevent->mid))) + break; + + if (event->stream == cevent->stream && + event->mid == cevent->mid && + !(cevent->msg_flags & SCTP_DATA_FIRST_FRAG) && + (event->msg_flags & SCTP_DATA_FIRST_FRAG || + event->fsn < cevent->fsn)) + break; + } + + __skb_queue_before(&ulpq->reasm_uo, pos, sctp_event2skb(event)); +} + +static struct sctp_ulpevent *sctp_intl_retrieve_partial_uo( + struct sctp_ulpq *ulpq, + struct sctp_ulpevent *event) +{ + struct sk_buff *first_frag = NULL; + struct sk_buff *last_frag = NULL; + struct sctp_ulpevent *retval; + struct sctp_stream_in *sin; + struct sk_buff *pos; + __u32 next_fsn = 0; + int is_last = 0; + + sin = sctp_stream_in(ulpq->asoc, event->stream); + + skb_queue_walk(&ulpq->reasm_uo, pos) { + struct sctp_ulpevent *cevent = sctp_skb2event(pos); + + if (cevent->stream < event->stream) + continue; + if (cevent->stream > event->stream) + break; + + if (MID_lt(cevent->mid, sin->mid_uo)) + continue; + if (MID_lt(sin->mid_uo, cevent->mid)) + break; + + switch (cevent->msg_flags & SCTP_DATA_FRAG_MASK) { + case SCTP_DATA_FIRST_FRAG: + goto out; + case SCTP_DATA_MIDDLE_FRAG: + if (!first_frag) { + if (cevent->fsn == sin->fsn_uo) { + first_frag = pos; + last_frag = pos; + next_fsn = cevent->fsn + 1; + } + } else if (cevent->fsn == next_fsn) { + last_frag = pos; + next_fsn++; + } else { + goto out; + } + break; + case SCTP_DATA_LAST_FRAG: + if (!first_frag) { + if (cevent->fsn == sin->fsn_uo) { + first_frag = pos; + last_frag = pos; + next_fsn = 0; + is_last = 1; + } + } else if (cevent->fsn == next_fsn) { + last_frag = pos; + next_fsn = 0; + is_last = 1; + } + goto out; + default: + goto out; + } + } + +out: + if (!first_frag) + return NULL; + + retval = sctp_make_reassembled_event(sock_net(ulpq->asoc->base.sk), + &ulpq->reasm_uo, first_frag, + last_frag); + if (retval) { + sin->fsn_uo = next_fsn; + if (is_last) { + retval->msg_flags |= MSG_EOR; + sin->pd_mode_uo = 0; + } + } + + return retval; +} + +static struct sctp_ulpevent *sctp_intl_retrieve_reassembled_uo( + struct sctp_ulpq *ulpq, + struct sctp_ulpevent *event) +{ + struct sctp_association *asoc = ulpq->asoc; + struct sk_buff *pos, *first_frag = NULL; + struct sctp_ulpevent *retval = NULL; + struct sk_buff *pd_first = NULL; + struct sk_buff *pd_last = NULL; + struct sctp_stream_in *sin; + __u32 next_fsn = 0; + __u32 pd_point = 0; + __u32 pd_len = 0; + __u32 mid = 0; + + sin = sctp_stream_in(ulpq->asoc, event->stream); + + skb_queue_walk(&ulpq->reasm_uo, pos) { + struct sctp_ulpevent *cevent = sctp_skb2event(pos); + + if (cevent->stream < event->stream) + continue; + if (cevent->stream > event->stream) + break; + + if (MID_lt(cevent->mid, event->mid)) + continue; + if (MID_lt(event->mid, cevent->mid)) + break; + + switch (cevent->msg_flags & SCTP_DATA_FRAG_MASK) { + case SCTP_DATA_FIRST_FRAG: + if (!sin->pd_mode_uo) { + sin->mid_uo = cevent->mid; + pd_first = pos; + pd_last = pos; + pd_len = pos->len; + } + + first_frag = pos; + next_fsn = 0; + mid = cevent->mid; + break; + + case SCTP_DATA_MIDDLE_FRAG: + if (first_frag && cevent->mid == mid && + cevent->fsn == next_fsn) { + next_fsn++; + if (pd_first) { + pd_last = pos; + pd_len += pos->len; + } + } else { + first_frag = NULL; + } + break; + + case SCTP_DATA_LAST_FRAG: + if (first_frag && cevent->mid == mid && + cevent->fsn == next_fsn) + goto found; + else + first_frag = NULL; + break; + } + } + + if (!pd_first) + goto out; + + pd_point = sctp_sk(asoc->base.sk)->pd_point; + if (pd_point && pd_point <= pd_len) { + retval = sctp_make_reassembled_event(sock_net(asoc->base.sk), + &ulpq->reasm_uo, + pd_first, pd_last); + if (retval) { + sin->fsn_uo = next_fsn; + sin->pd_mode_uo = 1; + } + } + goto out; + +found: + retval = sctp_make_reassembled_event(sock_net(asoc->base.sk), + &ulpq->reasm_uo, + first_frag, pos); + if (retval) + retval->msg_flags |= MSG_EOR; + +out: + return retval; +} + +static struct sctp_ulpevent *sctp_intl_reasm_uo(struct sctp_ulpq *ulpq, + struct sctp_ulpevent *event) +{ + struct sctp_ulpevent *retval = NULL; + struct sctp_stream_in *sin; + + if (SCTP_DATA_NOT_FRAG == (event->msg_flags & SCTP_DATA_FRAG_MASK)) { + event->msg_flags |= MSG_EOR; + return event; + } + + sctp_intl_store_reasm_uo(ulpq, event); + + sin = sctp_stream_in(ulpq->asoc, event->stream); + if (sin->pd_mode_uo && event->mid == sin->mid_uo && + event->fsn == sin->fsn_uo) + retval = sctp_intl_retrieve_partial_uo(ulpq, event); + + if (!retval) + retval = sctp_intl_retrieve_reassembled_uo(ulpq, event); + + return retval; +} + +static struct sctp_ulpevent *sctp_intl_retrieve_first_uo(struct sctp_ulpq *ulpq) +{ + struct sctp_stream_in *csin, *sin = NULL; + struct sk_buff *first_frag = NULL; + struct sk_buff *last_frag = NULL; + struct sctp_ulpevent *retval; + struct sk_buff *pos; + __u32 next_fsn = 0; + __u16 sid = 0; + + skb_queue_walk(&ulpq->reasm_uo, pos) { + struct sctp_ulpevent *cevent = sctp_skb2event(pos); + + csin = sctp_stream_in(ulpq->asoc, cevent->stream); + if (csin->pd_mode_uo) + continue; + + switch (cevent->msg_flags & SCTP_DATA_FRAG_MASK) { + case SCTP_DATA_FIRST_FRAG: + if (first_frag) + goto out; + first_frag = pos; + last_frag = pos; + next_fsn = 0; + sin = csin; + sid = cevent->stream; + sin->mid_uo = cevent->mid; + break; + case SCTP_DATA_MIDDLE_FRAG: + if (!first_frag) + break; + if (cevent->stream == sid && + cevent->mid == sin->mid_uo && + cevent->fsn == next_fsn) { + next_fsn++; + last_frag = pos; + } else { + goto out; + } + break; + case SCTP_DATA_LAST_FRAG: + if (first_frag) + goto out; + break; + default: + break; + } + } + + if (!first_frag) + return NULL; + +out: + retval = sctp_make_reassembled_event(sock_net(ulpq->asoc->base.sk), + &ulpq->reasm_uo, first_frag, + last_frag); + if (retval) { + sin->fsn_uo = next_fsn; + sin->pd_mode_uo = 1; + } + + return retval; +} + +static int sctp_ulpevent_idata(struct sctp_ulpq *ulpq, + struct sctp_chunk *chunk, gfp_t gfp) +{ + struct sctp_ulpevent *event; + struct sk_buff_head temp; + int event_eor = 0; + + event = sctp_ulpevent_make_rcvmsg(chunk->asoc, chunk, gfp); + if (!event) + return -ENOMEM; + + event->mid = ntohl(chunk->subh.idata_hdr->mid); + if (event->msg_flags & SCTP_DATA_FIRST_FRAG) + event->ppid = chunk->subh.idata_hdr->ppid; + else + event->fsn = ntohl(chunk->subh.idata_hdr->fsn); + + if (!(event->msg_flags & SCTP_DATA_UNORDERED)) { + event = sctp_intl_reasm(ulpq, event); + if (event && event->msg_flags & MSG_EOR) { + skb_queue_head_init(&temp); + __skb_queue_tail(&temp, sctp_event2skb(event)); + + event = sctp_intl_order(ulpq, event); + } + } else { + event = sctp_intl_reasm_uo(ulpq, event); + } + + if (event) { + event_eor = (event->msg_flags & MSG_EOR) ? 1 : 0; + sctp_enqueue_event(ulpq, event); + } + + return event_eor; +} + +static struct sctp_ulpevent *sctp_intl_retrieve_first(struct sctp_ulpq *ulpq) +{ + struct sctp_stream_in *csin, *sin = NULL; + struct sk_buff *first_frag = NULL; + struct sk_buff *last_frag = NULL; + struct sctp_ulpevent *retval; + struct sk_buff *pos; + __u32 next_fsn = 0; + __u16 sid = 0; + + skb_queue_walk(&ulpq->reasm, pos) { + struct sctp_ulpevent *cevent = sctp_skb2event(pos); + + csin = sctp_stream_in(ulpq->asoc, cevent->stream); + if (csin->pd_mode) + continue; + + switch (cevent->msg_flags & SCTP_DATA_FRAG_MASK) { + case SCTP_DATA_FIRST_FRAG: + if (first_frag) + goto out; + if (cevent->mid == csin->mid) { + first_frag = pos; + last_frag = pos; + next_fsn = 0; + sin = csin; + sid = cevent->stream; + } + break; + case SCTP_DATA_MIDDLE_FRAG: + if (!first_frag) + break; + if (cevent->stream == sid && + cevent->mid == sin->mid && + cevent->fsn == next_fsn) { + next_fsn++; + last_frag = pos; + } else { + goto out; + } + break; + case SCTP_DATA_LAST_FRAG: + if (first_frag) + goto out; + break; + default: + break; + } + } + + if (!first_frag) + return NULL; + +out: + retval = sctp_make_reassembled_event(sock_net(ulpq->asoc->base.sk), + &ulpq->reasm, first_frag, + last_frag); + if (retval) { + sin->fsn = next_fsn; + sin->pd_mode = 1; + } + + return retval; +} + +static void sctp_intl_start_pd(struct sctp_ulpq *ulpq, gfp_t gfp) +{ + struct sctp_ulpevent *event; + + if (!skb_queue_empty(&ulpq->reasm)) { + do { + event = sctp_intl_retrieve_first(ulpq); + if (event) + sctp_enqueue_event(ulpq, event); + } while (event); + } + + if (!skb_queue_empty(&ulpq->reasm_uo)) { + do { + event = sctp_intl_retrieve_first_uo(ulpq); + if (event) + sctp_enqueue_event(ulpq, event); + } while (event); + } +} + +static void sctp_renege_events(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk, + gfp_t gfp) +{ + struct sctp_association *asoc = ulpq->asoc; + __u32 freed = 0; + __u16 needed; + + if (chunk) { + needed = ntohs(chunk->chunk_hdr->length); + needed -= sizeof(struct sctp_idata_chunk); + } else { + needed = SCTP_DEFAULT_MAXWINDOW; + } + + if (skb_queue_empty(&asoc->base.sk->sk_receive_queue)) { + freed = sctp_ulpq_renege_list(ulpq, &ulpq->lobby, needed); + if (freed < needed) + freed += sctp_ulpq_renege_list(ulpq, &ulpq->reasm, + needed); + if (freed < needed) + freed += sctp_ulpq_renege_list(ulpq, &ulpq->reasm_uo, + needed); + } + + if (chunk && freed >= needed) + if (sctp_ulpevent_idata(ulpq, chunk, gfp) <= 0) + sctp_intl_start_pd(ulpq, gfp); + + sk_mem_reclaim(asoc->base.sk); +} + +static void sctp_intl_stream_abort_pd(struct sctp_ulpq *ulpq, __u16 sid, + __u32 mid, __u16 flags, gfp_t gfp) +{ + struct sock *sk = ulpq->asoc->base.sk; + struct sctp_ulpevent *ev = NULL; + + if (!sctp_ulpevent_type_enabled(SCTP_PARTIAL_DELIVERY_EVENT, + &sctp_sk(sk)->subscribe)) + return; + + ev = sctp_ulpevent_make_pdapi(ulpq->asoc, SCTP_PARTIAL_DELIVERY_ABORTED, + sid, mid, flags, gfp); + if (ev) { + __skb_queue_tail(&sk->sk_receive_queue, sctp_event2skb(ev)); + + if (!sctp_sk(sk)->data_ready_signalled) { + sctp_sk(sk)->data_ready_signalled = 1; + sk->sk_data_ready(sk); + } + } +} + +static void sctp_intl_reap_ordered(struct sctp_ulpq *ulpq, __u16 sid) +{ + struct sctp_stream *stream = &ulpq->asoc->stream; + struct sctp_ulpevent *cevent, *event = NULL; + struct sk_buff_head *lobby = &ulpq->lobby; + struct sk_buff *pos, *tmp; + struct sk_buff_head temp; + __u16 csid; + __u32 cmid; + + skb_queue_head_init(&temp); + sctp_skb_for_each(pos, lobby, tmp) { + cevent = (struct sctp_ulpevent *)pos->cb; + csid = cevent->stream; + cmid = cevent->mid; + + if (csid > sid) + break; + + if (csid < sid) + continue; + + if (!MID_lt(cmid, sctp_mid_peek(stream, in, csid))) + break; + + __skb_unlink(pos, lobby); + if (!event) + event = sctp_skb2event(pos); + + __skb_queue_tail(&temp, pos); + } + + if (!event && pos != (struct sk_buff *)lobby) { + cevent = (struct sctp_ulpevent *)pos->cb; + csid = cevent->stream; + cmid = cevent->mid; + + if (csid == sid && cmid == sctp_mid_peek(stream, in, csid)) { + sctp_mid_next(stream, in, csid); + __skb_unlink(pos, lobby); + __skb_queue_tail(&temp, pos); + event = sctp_skb2event(pos); + } + } + + if (event) { + sctp_intl_retrieve_ordered(ulpq, event); + sctp_enqueue_event(ulpq, event); + } +} + +static void sctp_intl_abort_pd(struct sctp_ulpq *ulpq, gfp_t gfp) +{ + struct sctp_stream *stream = &ulpq->asoc->stream; + __u16 sid; + + for (sid = 0; sid < stream->incnt; sid++) { + struct sctp_stream_in *sin = &stream->in[sid]; + __u32 mid; + + if (sin->pd_mode_uo) { + sin->pd_mode_uo = 0; + + mid = sin->mid_uo; + sctp_intl_stream_abort_pd(ulpq, sid, mid, 0x1, gfp); + } + + if (sin->pd_mode) { + sin->pd_mode = 0; + + mid = sin->mid; + sctp_intl_stream_abort_pd(ulpq, sid, mid, 0, gfp); + sctp_mid_skip(stream, in, sid, mid); + + sctp_intl_reap_ordered(ulpq, sid); + } + } + + /* intl abort pd happens only when all data needs to be cleaned */ + sctp_ulpq_flush(ulpq); +} + +static inline int sctp_get_skip_pos(struct sctp_ifwdtsn_skip *skiplist, + int nskips, __be16 stream, __u8 flags) +{ + int i; + + for (i = 0; i < nskips; i++) + if (skiplist[i].stream == stream && + skiplist[i].flags == flags) + return i; + + return i; +} + +#define SCTP_FTSN_U_BIT 0x1 +static void sctp_generate_iftsn(struct sctp_outq *q, __u32 ctsn) +{ + struct sctp_ifwdtsn_skip ftsn_skip_arr[10]; + struct sctp_association *asoc = q->asoc; + struct sctp_chunk *ftsn_chunk = NULL; + struct list_head *lchunk, *temp; + int nskips = 0, skip_pos; + struct sctp_chunk *chunk; + __u32 tsn; + + if (!asoc->peer.prsctp_capable) + return; + + if (TSN_lt(asoc->adv_peer_ack_point, ctsn)) + asoc->adv_peer_ack_point = ctsn; + + list_for_each_safe(lchunk, temp, &q->abandoned) { + chunk = list_entry(lchunk, struct sctp_chunk, transmitted_list); + tsn = ntohl(chunk->subh.data_hdr->tsn); + + if (TSN_lte(tsn, ctsn)) { + list_del_init(lchunk); + sctp_chunk_free(chunk); + } else if (TSN_lte(tsn, asoc->adv_peer_ack_point + 1)) { + __be16 sid = chunk->subh.idata_hdr->stream; + __be32 mid = chunk->subh.idata_hdr->mid; + __u8 flags = 0; + + if (chunk->chunk_hdr->flags & SCTP_DATA_UNORDERED) + flags |= SCTP_FTSN_U_BIT; + + asoc->adv_peer_ack_point = tsn; + skip_pos = sctp_get_skip_pos(&ftsn_skip_arr[0], nskips, + sid, flags); + ftsn_skip_arr[skip_pos].stream = sid; + ftsn_skip_arr[skip_pos].reserved = 0; + ftsn_skip_arr[skip_pos].flags = flags; + ftsn_skip_arr[skip_pos].mid = mid; + if (skip_pos == nskips) + nskips++; + if (nskips == 10) + break; + } else { + break; + } + } + + if (asoc->adv_peer_ack_point > ctsn) + ftsn_chunk = sctp_make_ifwdtsn(asoc, asoc->adv_peer_ack_point, + nskips, &ftsn_skip_arr[0]); + + if (ftsn_chunk) { + list_add_tail(&ftsn_chunk->list, &q->control_chunk_list); + SCTP_INC_STATS(sock_net(asoc->base.sk), SCTP_MIB_OUTCTRLCHUNKS); + } +} + +#define _sctp_walk_ifwdtsn(pos, chunk, end) \ + for (pos = chunk->subh.ifwdtsn_hdr->skip; \ + (void *)pos < (void *)chunk->subh.ifwdtsn_hdr->skip + (end); pos++) + +#define sctp_walk_ifwdtsn(pos, ch) \ + _sctp_walk_ifwdtsn((pos), (ch), ntohs((ch)->chunk_hdr->length) - \ + sizeof(struct sctp_ifwdtsn_chunk)) + +static bool sctp_validate_fwdtsn(struct sctp_chunk *chunk) +{ + struct sctp_fwdtsn_skip *skip; + __u16 incnt; + + if (chunk->chunk_hdr->type != SCTP_CID_FWD_TSN) + return false; + + incnt = chunk->asoc->stream.incnt; + sctp_walk_fwdtsn(skip, chunk) + if (ntohs(skip->stream) >= incnt) + return false; + + return true; +} + +static bool sctp_validate_iftsn(struct sctp_chunk *chunk) +{ + struct sctp_ifwdtsn_skip *skip; + __u16 incnt; + + if (chunk->chunk_hdr->type != SCTP_CID_I_FWD_TSN) + return false; + + incnt = chunk->asoc->stream.incnt; + sctp_walk_ifwdtsn(skip, chunk) + if (ntohs(skip->stream) >= incnt) + return false; + + return true; +} + +static void sctp_report_fwdtsn(struct sctp_ulpq *ulpq, __u32 ftsn) +{ + /* Move the Cumulattive TSN Ack ahead. */ + sctp_tsnmap_skip(&ulpq->asoc->peer.tsn_map, ftsn); + /* purge the fragmentation queue */ + sctp_ulpq_reasm_flushtsn(ulpq, ftsn); + /* Abort any in progress partial delivery. */ + sctp_ulpq_abort_pd(ulpq, GFP_ATOMIC); +} + +static void sctp_intl_reasm_flushtsn(struct sctp_ulpq *ulpq, __u32 ftsn) +{ + struct sk_buff *pos, *tmp; + + skb_queue_walk_safe(&ulpq->reasm, pos, tmp) { + struct sctp_ulpevent *event = sctp_skb2event(pos); + __u32 tsn = event->tsn; + + if (TSN_lte(tsn, ftsn)) { + __skb_unlink(pos, &ulpq->reasm); + sctp_ulpevent_free(event); + } + } + + skb_queue_walk_safe(&ulpq->reasm_uo, pos, tmp) { + struct sctp_ulpevent *event = sctp_skb2event(pos); + __u32 tsn = event->tsn; + + if (TSN_lte(tsn, ftsn)) { + __skb_unlink(pos, &ulpq->reasm_uo); + sctp_ulpevent_free(event); + } + } +} + +static void sctp_report_iftsn(struct sctp_ulpq *ulpq, __u32 ftsn) +{ + /* Move the Cumulattive TSN Ack ahead. */ + sctp_tsnmap_skip(&ulpq->asoc->peer.tsn_map, ftsn); + /* purge the fragmentation queue */ + sctp_intl_reasm_flushtsn(ulpq, ftsn); + /* abort only when it's for all data */ + if (ftsn == sctp_tsnmap_get_max_tsn_seen(&ulpq->asoc->peer.tsn_map)) + sctp_intl_abort_pd(ulpq, GFP_ATOMIC); +} + +static void sctp_handle_fwdtsn(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk) +{ + struct sctp_fwdtsn_skip *skip; + + /* Walk through all the skipped SSNs */ + sctp_walk_fwdtsn(skip, chunk) + sctp_ulpq_skip(ulpq, ntohs(skip->stream), ntohs(skip->ssn)); +} + +static void sctp_intl_skip(struct sctp_ulpq *ulpq, __u16 sid, __u32 mid, + __u8 flags) +{ + struct sctp_stream_in *sin = sctp_stream_in(ulpq->asoc, sid); + struct sctp_stream *stream = &ulpq->asoc->stream; + + if (flags & SCTP_FTSN_U_BIT) { + if (sin->pd_mode_uo && MID_lt(sin->mid_uo, mid)) { + sin->pd_mode_uo = 0; + sctp_intl_stream_abort_pd(ulpq, sid, mid, 0x1, + GFP_ATOMIC); + } + return; + } + + if (MID_lt(mid, sctp_mid_peek(stream, in, sid))) + return; + + if (sin->pd_mode) { + sin->pd_mode = 0; + sctp_intl_stream_abort_pd(ulpq, sid, mid, 0x0, GFP_ATOMIC); + } + + sctp_mid_skip(stream, in, sid, mid); + + sctp_intl_reap_ordered(ulpq, sid); +} + +static void sctp_handle_iftsn(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk) +{ + struct sctp_ifwdtsn_skip *skip; + + /* Walk through all the skipped MIDs and abort stream pd if possible */ + sctp_walk_ifwdtsn(skip, chunk) + sctp_intl_skip(ulpq, ntohs(skip->stream), + ntohl(skip->mid), skip->flags); +} + +static struct sctp_stream_interleave sctp_stream_interleave_0 = { + .data_chunk_len = sizeof(struct sctp_data_chunk), + .ftsn_chunk_len = sizeof(struct sctp_fwdtsn_chunk), + /* DATA process functions */ + .make_datafrag = sctp_make_datafrag_empty, + .assign_number = sctp_chunk_assign_ssn, + .validate_data = sctp_validate_data, + .ulpevent_data = sctp_ulpq_tail_data, + .enqueue_event = sctp_ulpq_tail_event, + .renege_events = sctp_ulpq_renege, + .start_pd = sctp_ulpq_partial_delivery, + .abort_pd = sctp_ulpq_abort_pd, + /* FORWARD-TSN process functions */ + .generate_ftsn = sctp_generate_fwdtsn, + .validate_ftsn = sctp_validate_fwdtsn, + .report_ftsn = sctp_report_fwdtsn, + .handle_ftsn = sctp_handle_fwdtsn, +}; + +static struct sctp_stream_interleave sctp_stream_interleave_1 = { + .data_chunk_len = sizeof(struct sctp_idata_chunk), + .ftsn_chunk_len = sizeof(struct sctp_ifwdtsn_chunk), + /* I-DATA process functions */ + .make_datafrag = sctp_make_idatafrag_empty, + .assign_number = sctp_chunk_assign_mid, + .validate_data = sctp_validate_idata, + .ulpevent_data = sctp_ulpevent_idata, + .enqueue_event = sctp_enqueue_event, + .renege_events = sctp_renege_events, + .start_pd = sctp_intl_start_pd, + .abort_pd = sctp_intl_abort_pd, + /* I-FORWARD-TSN process functions */ + .generate_ftsn = sctp_generate_iftsn, + .validate_ftsn = sctp_validate_iftsn, + .report_ftsn = sctp_report_iftsn, + .handle_ftsn = sctp_handle_iftsn, +}; + +void sctp_stream_interleave_init(struct sctp_stream *stream) +{ + struct sctp_association *asoc; + + asoc = container_of(stream, struct sctp_association, stream); + stream->si = asoc->intl_enable ? &sctp_stream_interleave_1 + : &sctp_stream_interleave_0; +} diff --git a/net/sctp/stream_sched.c b/net/sctp/stream_sched.c index d8c162a4089c..f5fcd425232a 100644 --- a/net/sctp/stream_sched.c +++ b/net/sctp/stream_sched.c @@ -242,7 +242,8 @@ int sctp_sched_get_value(struct sctp_association *asoc, __u16 sid, void sctp_sched_dequeue_done(struct sctp_outq *q, struct sctp_chunk *ch) { - if (!list_is_last(&ch->frag_list, &ch->msg->chunks)) { + if (!list_is_last(&ch->frag_list, &ch->msg->chunks) && + !q->asoc->intl_enable) { struct sctp_stream_out *sout; __u16 sid; diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c index ef7ca44d6e6a..33ca5b73cdb3 100644 --- a/net/sctp/sysctl.c +++ b/net/sctp/sysctl.c @@ -289,6 +289,13 @@ static struct ctl_table sctp_net_table[] = { .proc_handler = proc_sctp_do_auth, }, { + .procname = "intl_enable", + .data = &init_net.sctp.intl_enable, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { .procname = "addr_scope_policy", .data = &init_net.sctp.scope_policy, .maxlen = sizeof(int), diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c index 5447228bf1a0..84207ad33e8e 100644 --- a/net/sctp/ulpevent.c +++ b/net/sctp/ulpevent.c @@ -443,8 +443,8 @@ struct sctp_ulpevent *sctp_ulpevent_make_send_failed( goto fail; /* Pull off the common chunk header and DATA header. */ - skb_pull(skb, sizeof(struct sctp_data_chunk)); - len -= sizeof(struct sctp_data_chunk); + skb_pull(skb, sctp_datachk_len(&asoc->stream)); + len -= sctp_datachk_len(&asoc->stream); /* Embed the event fields inside the cloned skb. */ event = sctp_skb2event(skb); @@ -705,8 +705,6 @@ struct sctp_ulpevent *sctp_ulpevent_make_rcvmsg(struct sctp_association *asoc, sctp_ulpevent_receive_data(event, asoc); event->stream = ntohs(chunk->subh.data_hdr->stream); - event->ssn = ntohs(chunk->subh.data_hdr->ssn); - event->ppid = chunk->subh.data_hdr->ppid; if (chunk->chunk_hdr->flags & SCTP_DATA_UNORDERED) { event->flags |= SCTP_UNORDERED; event->cumtsn = sctp_tsnmap_get_ctsn(&asoc->peer.tsn_map); @@ -732,8 +730,9 @@ fail: * various events. */ struct sctp_ulpevent *sctp_ulpevent_make_pdapi( - const struct sctp_association *asoc, __u32 indication, - gfp_t gfp) + const struct sctp_association *asoc, + __u32 indication, __u32 sid, __u32 seq, + __u32 flags, gfp_t gfp) { struct sctp_ulpevent *event; struct sctp_pdapi_event *pd; @@ -754,7 +753,9 @@ struct sctp_ulpevent *sctp_ulpevent_make_pdapi( * Currently unused. */ pd->pdapi_type = SCTP_PARTIAL_DELIVERY_EVENT; - pd->pdapi_flags = 0; + pd->pdapi_flags = flags; + pd->pdapi_stream = sid; + pd->pdapi_seq = seq; /* pdapi_length: 32 bits (unsigned integer) * diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c index e36ec5dd64c6..0b427100b0d4 100644 --- a/net/sctp/ulpqueue.c +++ b/net/sctp/ulpqueue.c @@ -60,6 +60,7 @@ struct sctp_ulpq *sctp_ulpq_init(struct sctp_ulpq *ulpq, ulpq->asoc = asoc; skb_queue_head_init(&ulpq->reasm); + skb_queue_head_init(&ulpq->reasm_uo); skb_queue_head_init(&ulpq->lobby); ulpq->pd_mode = 0; @@ -83,6 +84,10 @@ void sctp_ulpq_flush(struct sctp_ulpq *ulpq) sctp_ulpevent_free(event); } + while ((skb = __skb_dequeue(&ulpq->reasm_uo)) != NULL) { + event = sctp_skb2event(skb); + sctp_ulpevent_free(event); + } } /* Dispose of a ulpqueue. */ @@ -104,6 +109,9 @@ int sctp_ulpq_tail_data(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk, if (!event) return -ENOMEM; + event->ssn = ntohs(chunk->subh.data_hdr->ssn); + event->ppid = chunk->subh.data_hdr->ppid; + /* Do reassembly if needed. */ event = sctp_ulpq_reasm(ulpq, event); @@ -328,9 +336,10 @@ static void sctp_ulpq_store_reasm(struct sctp_ulpq *ulpq, * payload was fragmented on the way and ip had to reassemble them. * We add the rest of skb's to the first skb's fraglist. */ -static struct sctp_ulpevent *sctp_make_reassembled_event(struct net *net, - struct sk_buff_head *queue, struct sk_buff *f_frag, - struct sk_buff *l_frag) +struct sctp_ulpevent *sctp_make_reassembled_event(struct net *net, + struct sk_buff_head *queue, + struct sk_buff *f_frag, + struct sk_buff *l_frag) { struct sk_buff *pos; struct sk_buff *new = NULL; @@ -853,7 +862,7 @@ static struct sctp_ulpevent *sctp_ulpq_order(struct sctp_ulpq *ulpq, struct sctp_stream *stream; /* Check if this message needs ordering. */ - if (SCTP_DATA_UNORDERED & event->msg_flags) + if (event->msg_flags & SCTP_DATA_UNORDERED) return event; /* Note: The stream ID must be verified before this routine. */ @@ -974,8 +983,8 @@ void sctp_ulpq_skip(struct sctp_ulpq *ulpq, __u16 sid, __u16 ssn) sctp_ulpq_reap_ordered(ulpq, sid); } -static __u16 sctp_ulpq_renege_list(struct sctp_ulpq *ulpq, - struct sk_buff_head *list, __u16 needed) +__u16 sctp_ulpq_renege_list(struct sctp_ulpq *ulpq, struct sk_buff_head *list, + __u16 needed) { __u16 freed = 0; __u32 tsn, last_tsn; @@ -1132,7 +1141,7 @@ void sctp_ulpq_abort_pd(struct sctp_ulpq *ulpq, gfp_t gfp) &sctp_sk(sk)->subscribe)) ev = sctp_ulpevent_make_pdapi(ulpq->asoc, SCTP_PARTIAL_DELIVERY_ABORTED, - gfp); + 0, 0, 0, gfp); if (ev) __skb_queue_tail(&sk->sk_receive_queue, sctp_event2skb(ev)); diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 449f62e1e270..3583c8ab1bae 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -115,7 +115,6 @@ static int smc_release(struct socket *sock) goto out; smc = smc_sk(sk); - sock_hold(sk); if (sk->sk_state == SMC_LISTEN) /* smc_close_non_accepted() is called and acquires * sock lock for child sockets again @@ -124,10 +123,7 @@ static int smc_release(struct socket *sock) else lock_sock(sk); - if (smc->use_fallback) { - sk->sk_state = SMC_CLOSED; - sk->sk_state_change(sk); - } else { + if (!smc->use_fallback) { rc = smc_close_active(smc); sock_set_flag(sk, SOCK_DEAD); sk->sk_shutdown |= SHUTDOWN_MASK; @@ -136,20 +132,21 @@ static int smc_release(struct socket *sock) sock_release(smc->clcsock); smc->clcsock = NULL; } + if (smc->use_fallback) { + sock_put(sk); /* passive closing */ + sk->sk_state = SMC_CLOSED; + sk->sk_state_change(sk); + } /* detach socket */ sock_orphan(sk); sock->sk = NULL; - if (smc->use_fallback) { - schedule_delayed_work(&smc->sock_put_work, TCP_TIMEWAIT_LEN); - } else if (sk->sk_state == SMC_CLOSED) { + if (!smc->use_fallback && sk->sk_state == SMC_CLOSED) smc_conn_free(&smc->conn); - schedule_delayed_work(&smc->sock_put_work, - SMC_CLOSE_SOCK_PUT_DELAY); - } release_sock(sk); - sock_put(sk); + sk->sk_prot->unhash(sk); + sock_put(sk); /* final sock_put */ out: return rc; } @@ -181,7 +178,6 @@ static struct sock *smc_sock_alloc(struct net *net, struct socket *sock) INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work); INIT_LIST_HEAD(&smc->accept_q); spin_lock_init(&smc->accept_q_lock); - INIT_DELAYED_WORK(&smc->sock_put_work, smc_close_sock_put_work); sk->sk_prot->hash(sk); sk_refcnt_debug_inc(sk); @@ -377,6 +373,15 @@ static void smc_link_save_peer_info(struct smc_link *link, link->peer_mtu = clc->qp_mtu; } +static void smc_lgr_forget(struct smc_link_group *lgr) +{ + spin_lock_bh(&smc_lgr_list.lock); + /* do not use this link group for new connections */ + if (!list_empty(&lgr->list)) + list_del_init(&lgr->list); + spin_unlock_bh(&smc_lgr_list.lock); +} + /* setup for RDMA connection of client */ static int smc_connect_rdma(struct smc_sock *smc) { @@ -390,6 +395,8 @@ static int smc_connect_rdma(struct smc_sock *smc) int rc = 0; u8 ibport; + sock_hold(&smc->sk); /* sock put in passive closing */ + if (!tcp_sk(smc->clcsock->sk)->syn_smc) { /* peer has not signalled SMC-capability */ smc->use_fallback = true; @@ -513,6 +520,8 @@ out_connected: return rc ? rc : local_contact; decline_rdma_unlock: + if (local_contact == SMC_FIRST_CONTACT) + smc_lgr_forget(smc->conn.lgr); mutex_unlock(&smc_create_lgr_pending); smc_conn_free(&smc->conn); decline_rdma: @@ -520,15 +529,19 @@ decline_rdma: smc->use_fallback = true; if (reason_code && (reason_code != SMC_CLC_DECL_REPLY)) { rc = smc_clc_send_decline(smc, reason_code); - if (rc < sizeof(struct smc_clc_msg_decline)) + if (rc < 0) goto out_err; } goto out_connected; out_err_unlock: + if (local_contact == SMC_FIRST_CONTACT) + smc_lgr_forget(smc->conn.lgr); mutex_unlock(&smc_create_lgr_pending); smc_conn_free(&smc->conn); out_err: + if (smc->sk.sk_state == SMC_INIT) + sock_put(&smc->sk); /* passive closing */ return rc; } @@ -581,40 +594,33 @@ out_err: static int smc_clcsock_accept(struct smc_sock *lsmc, struct smc_sock **new_smc) { - struct sock *sk = &lsmc->sk; - struct socket *new_clcsock; + struct socket *new_clcsock = NULL; + struct sock *lsk = &lsmc->sk; struct sock *new_sk; int rc; - release_sock(&lsmc->sk); - new_sk = smc_sock_alloc(sock_net(sk), NULL); + release_sock(lsk); + new_sk = smc_sock_alloc(sock_net(lsk), NULL); if (!new_sk) { rc = -ENOMEM; - lsmc->sk.sk_err = ENOMEM; + lsk->sk_err = ENOMEM; *new_smc = NULL; - lock_sock(&lsmc->sk); + lock_sock(lsk); goto out; } *new_smc = smc_sk(new_sk); rc = kernel_accept(lsmc->clcsock, &new_clcsock, 0); - lock_sock(&lsmc->sk); - if (rc < 0) { - lsmc->sk.sk_err = -rc; - new_sk->sk_state = SMC_CLOSED; - sock_set_flag(new_sk, SOCK_DEAD); - sk->sk_prot->unhash(new_sk); - sock_put(new_sk); - *new_smc = NULL; - goto out; - } - if (lsmc->sk.sk_state == SMC_CLOSED) { + lock_sock(lsk); + if (rc < 0) + lsk->sk_err = -rc; + if (rc < 0 || lsk->sk_state == SMC_CLOSED) { if (new_clcsock) sock_release(new_clcsock); new_sk->sk_state = SMC_CLOSED; sock_set_flag(new_sk, SOCK_DEAD); - sk->sk_prot->unhash(new_sk); - sock_put(new_sk); + new_sk->sk_prot->unhash(new_sk); + sock_put(new_sk); /* final */ *new_smc = NULL; goto out; } @@ -631,7 +637,7 @@ static void smc_accept_enqueue(struct sock *parent, struct sock *sk) { struct smc_sock *par = smc_sk(parent); - sock_hold(sk); + sock_hold(sk); /* sock_put in smc_accept_unlink () */ spin_lock(&par->accept_q_lock); list_add_tail(&smc_sk(sk)->accept_q, &par->accept_q); spin_unlock(&par->accept_q_lock); @@ -647,7 +653,7 @@ static void smc_accept_unlink(struct sock *sk) list_del_init(&smc_sk(sk)->accept_q); spin_unlock(&par->accept_q_lock); sk_acceptq_removed(&smc_sk(sk)->listen_smc->sk); - sock_put(sk); + sock_put(sk); /* sock_hold in smc_accept_enqueue */ } /* remove a sock from the accept queue to bind it to a new socket created @@ -664,8 +670,12 @@ struct sock *smc_accept_dequeue(struct sock *parent, smc_accept_unlink(new_sk); if (new_sk->sk_state == SMC_CLOSED) { + if (isk->clcsock) { + sock_release(isk->clcsock); + isk->clcsock = NULL; + } new_sk->sk_prot->unhash(new_sk); - sock_put(new_sk); + sock_put(new_sk); /* final */ continue; } if (new_sock) @@ -680,14 +690,11 @@ void smc_close_non_accepted(struct sock *sk) { struct smc_sock *smc = smc_sk(sk); - sock_hold(sk); lock_sock(sk); if (!sk->sk_lingertime) /* wait for peer closing */ sk->sk_lingertime = SMC_MAX_STREAM_WAIT_TIMEOUT; - if (smc->use_fallback) { - sk->sk_state = SMC_CLOSED; - } else { + if (!smc->use_fallback) { smc_close_active(smc); sock_set_flag(sk, SOCK_DEAD); sk->sk_shutdown |= SHUTDOWN_MASK; @@ -700,14 +707,15 @@ void smc_close_non_accepted(struct sock *sk) sock_release(tcp); } if (smc->use_fallback) { - schedule_delayed_work(&smc->sock_put_work, TCP_TIMEWAIT_LEN); - } else if (sk->sk_state == SMC_CLOSED) { - smc_conn_free(&smc->conn); - schedule_delayed_work(&smc->sock_put_work, - SMC_CLOSE_SOCK_PUT_DELAY); + sock_put(sk); /* passive closing */ + sk->sk_state = SMC_CLOSED; + } else { + if (sk->sk_state == SMC_CLOSED) + smc_conn_free(&smc->conn); } release_sock(sk); - sock_put(sk); + sk->sk_prot->unhash(sk); + sock_put(sk); /* final sock_put */ } static int smc_serv_conf_first_link(struct smc_sock *smc) @@ -751,14 +759,16 @@ static void smc_listen_work(struct work_struct *work) { struct smc_sock *new_smc = container_of(work, struct smc_sock, smc_listen_work); + struct smc_clc_msg_proposal_prefix *pclc_prfx; struct socket *newclcsock = new_smc->clcsock; struct smc_sock *lsmc = new_smc->listen_smc; struct smc_clc_msg_accept_confirm cclc; int local_contact = SMC_REUSE_CONTACT; struct sock *newsmcsk = &new_smc->sk; - struct smc_clc_msg_proposal pclc; + struct smc_clc_msg_proposal *pclc; struct smc_ib_device *smcibdev; struct sockaddr_in peeraddr; + u8 buf[SMC_CLC_MAX_LEN]; struct smc_link *link; int reason_code = 0; int rc = 0, len; @@ -775,7 +785,7 @@ static void smc_listen_work(struct work_struct *work) /* do inband token exchange - *wait for and receive SMC Proposal CLC message */ - reason_code = smc_clc_wait_msg(new_smc, &pclc, sizeof(pclc), + reason_code = smc_clc_wait_msg(new_smc, &buf, sizeof(buf), SMC_CLC_PROPOSAL); if (reason_code < 0) goto out_err; @@ -804,8 +814,11 @@ static void smc_listen_work(struct work_struct *work) reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */ goto decline_rdma; } - if ((pclc.outgoing_subnet != subnet) || - (pclc.prefix_len != prefix_len)) { + + pclc = (struct smc_clc_msg_proposal *)&buf; + pclc_prfx = smc_clc_proposal_get_prefix(pclc); + if (pclc_prfx->outgoing_subnet != subnet || + pclc_prfx->prefix_len != prefix_len) { reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */ goto decline_rdma; } @@ -816,7 +829,7 @@ static void smc_listen_work(struct work_struct *work) /* allocate connection / link group */ mutex_lock(&smc_create_lgr_pending); local_contact = smc_conn_create(new_smc, peeraddr.sin_addr.s_addr, - smcibdev, ibport, &pclc.lcl, 0); + smcibdev, ibport, &pclc->lcl, 0); if (local_contact < 0) { rc = local_contact; if (rc == -ENOMEM) @@ -879,11 +892,9 @@ static void smc_listen_work(struct work_struct *work) } /* QP confirmation over RoCE fabric */ reason_code = smc_serv_conf_first_link(new_smc); - if (reason_code < 0) { + if (reason_code < 0) /* peer is not aware of a problem */ - rc = reason_code; goto out_err_unlock; - } if (reason_code > 0) goto decline_rdma_unlock; } @@ -910,21 +921,26 @@ enqueue: return; decline_rdma_unlock: + if (local_contact == SMC_FIRST_CONTACT) + smc_lgr_forget(new_smc->conn.lgr); mutex_unlock(&smc_create_lgr_pending); decline_rdma: /* RDMA setup failed, switch back to TCP */ smc_conn_free(&new_smc->conn); new_smc->use_fallback = true; if (reason_code && (reason_code != SMC_CLC_DECL_REPLY)) { - rc = smc_clc_send_decline(new_smc, reason_code); - if (rc < sizeof(struct smc_clc_msg_decline)) + if (smc_clc_send_decline(new_smc, reason_code) < 0) goto out_err; } goto out_connected; out_err_unlock: + if (local_contact == SMC_FIRST_CONTACT) + smc_lgr_forget(new_smc->conn.lgr); mutex_unlock(&smc_create_lgr_pending); out_err: + if (newsmcsk->sk_state == SMC_INIT) + sock_put(&new_smc->sk); /* passive closing */ newsmcsk->sk_state = SMC_CLOSED; smc_conn_free(&new_smc->conn); goto enqueue; /* queue new sock with sk_err set */ @@ -934,11 +950,12 @@ static void smc_tcp_listen_work(struct work_struct *work) { struct smc_sock *lsmc = container_of(work, struct smc_sock, tcp_listen_work); + struct sock *lsk = &lsmc->sk; struct smc_sock *new_smc; int rc = 0; - lock_sock(&lsmc->sk); - while (lsmc->sk.sk_state == SMC_LISTEN) { + lock_sock(lsk); + while (lsk->sk_state == SMC_LISTEN) { rc = smc_clcsock_accept(lsmc, &new_smc); if (rc) goto out; @@ -947,15 +964,25 @@ static void smc_tcp_listen_work(struct work_struct *work) new_smc->listen_smc = lsmc; new_smc->use_fallback = false; /* assume rdma capability first*/ - sock_hold(&lsmc->sk); /* sock_put in smc_listen_work */ + sock_hold(lsk); /* sock_put in smc_listen_work */ INIT_WORK(&new_smc->smc_listen_work, smc_listen_work); smc_copy_sock_settings_to_smc(new_smc); - schedule_work(&new_smc->smc_listen_work); + sock_hold(&new_smc->sk); /* sock_put in passive closing */ + if (!schedule_work(&new_smc->smc_listen_work)) + sock_put(&new_smc->sk); } out: - release_sock(&lsmc->sk); - lsmc->sk.sk_data_ready(&lsmc->sk); /* no more listening, wake accept */ + if (lsmc->clcsock) { + sock_release(lsmc->clcsock); + lsmc->clcsock = NULL; + } + release_sock(lsk); + /* no more listening, wake up smc_close_wait_listen_clcsock and + * accept + */ + lsk->sk_state_change(lsk); + sock_put(&lsmc->sk); /* sock_hold in smc_listen */ } static int smc_listen(struct socket *sock, int backlog) @@ -989,7 +1016,9 @@ static int smc_listen(struct socket *sock, int backlog) sk->sk_ack_backlog = 0; sk->sk_state = SMC_LISTEN; INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work); - schedule_work(&smc->tcp_listen_work); + sock_hold(sk); /* sock_hold in tcp_listen_worker */ + if (!schedule_work(&smc->tcp_listen_work)) + sock_put(sk); out: release_sock(sk); @@ -1006,6 +1035,7 @@ static int smc_accept(struct socket *sock, struct socket *new_sock, int rc = 0; lsmc = smc_sk(sk); + sock_hold(sk); /* sock_put below */ lock_sock(sk); if (lsmc->sk.sk_state != SMC_LISTEN) { @@ -1040,6 +1070,7 @@ static int smc_accept(struct socket *sock, struct socket *new_sock, out: release_sock(sk); + sock_put(sk); /* sock_hold above */ return rc; } @@ -1109,21 +1140,15 @@ out: static __poll_t smc_accept_poll(struct sock *parent) { - struct smc_sock *isk; - struct sock *sk; + struct smc_sock *isk = smc_sk(parent); + int mask = 0; - lock_sock(parent); - list_for_each_entry(isk, &smc_sk(parent)->accept_q, accept_q) { - sk = (struct sock *)isk; + spin_lock(&isk->accept_q_lock); + if (!list_empty(&isk->accept_q)) + mask = POLLIN | POLLRDNORM; + spin_unlock(&isk->accept_q_lock); - if (sk->sk_state == SMC_ACTIVE) { - release_sock(parent); - return POLLIN | POLLRDNORM; - } - } - release_sock(parent); - - return 0; + return mask; } static __poll_t smc_poll(struct file *file, struct socket *sock, @@ -1134,9 +1159,15 @@ static __poll_t smc_poll(struct file *file, struct socket *sock, struct smc_sock *smc; int rc; + if (!sk) + return POLLNVAL; + smc = smc_sk(sock->sk); + sock_hold(sk); + lock_sock(sk); if ((sk->sk_state == SMC_INIT) || smc->use_fallback) { /* delegate to CLC child sock */ + release_sock(sk); mask = smc->clcsock->ops->poll(file, smc->clcsock, wait); /* if non-blocking connect finished ... */ lock_sock(sk); @@ -1148,37 +1179,43 @@ static __poll_t smc_poll(struct file *file, struct socket *sock, rc = smc_connect_rdma(smc); if (rc < 0) mask |= POLLERR; - else - /* success cases including fallback */ - mask |= POLLOUT | POLLWRNORM; + /* success cases including fallback */ + mask |= POLLOUT | POLLWRNORM; } } - release_sock(sk); } else { - sock_poll_wait(file, sk_sleep(sk), wait); - if (sk->sk_state == SMC_LISTEN) - /* woken up by sk_data_ready in smc_listen_work() */ - mask |= smc_accept_poll(sk); + if (sk->sk_state != SMC_CLOSED) { + release_sock(sk); + sock_poll_wait(file, sk_sleep(sk), wait); + lock_sock(sk); + } if (sk->sk_err) mask |= POLLERR; - if (atomic_read(&smc->conn.sndbuf_space) || - (sk->sk_shutdown & SEND_SHUTDOWN)) { - mask |= POLLOUT | POLLWRNORM; - } else { - sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); - set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); - } - if (atomic_read(&smc->conn.bytes_to_rcv)) - mask |= POLLIN | POLLRDNORM; if ((sk->sk_shutdown == SHUTDOWN_MASK) || (sk->sk_state == SMC_CLOSED)) mask |= POLLHUP; - if (sk->sk_shutdown & RCV_SHUTDOWN) - mask |= POLLIN | POLLRDNORM | POLLRDHUP; - if (sk->sk_state == SMC_APPCLOSEWAIT1) - mask |= POLLIN; + if (sk->sk_state == SMC_LISTEN) { + /* woken up by sk_data_ready in smc_listen_work() */ + mask = smc_accept_poll(sk); + } else { + if (atomic_read(&smc->conn.sndbuf_space) || + sk->sk_shutdown & SEND_SHUTDOWN) { + mask |= POLLOUT | POLLWRNORM; + } else { + sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); + set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); + } + if (atomic_read(&smc->conn.bytes_to_rcv)) + mask |= POLLIN | POLLRDNORM; + if (sk->sk_shutdown & RCV_SHUTDOWN) + mask |= POLLIN | POLLRDNORM | POLLRDHUP; + if (sk->sk_state == SMC_APPCLOSEWAIT1) + mask |= POLLIN; + } } + release_sock(sk); + sock_put(sk); return mask; } diff --git a/net/smc/smc.h b/net/smc/smc.h index 0bee9d16cf29..9518986c97b1 100644 --- a/net/smc/smc.h +++ b/net/smc/smc.h @@ -178,7 +178,6 @@ struct smc_sock { /* smc sock container */ struct work_struct smc_listen_work;/* prepare new accept socket */ struct list_head accept_q; /* sockets to be accepted */ spinlock_t accept_q_lock; /* protects accept_q */ - struct delayed_work sock_put_work; /* final socket freeing */ bool use_fallback; /* fallback to tcp */ u8 wait_close_tx_prepared : 1; /* shutdown wr or close @@ -253,12 +252,12 @@ static inline int smc_uncompress_bufsize(u8 compressed) static inline bool using_ipsec(struct smc_sock *smc) { return (smc->clcsock->sk->sk_policy[0] || - smc->clcsock->sk->sk_policy[1]) ? 1 : 0; + smc->clcsock->sk->sk_policy[1]) ? true : false; } #else static inline bool using_ipsec(struct smc_sock *smc) { - return 0; + return false; } #endif diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c index 87f7bede6eab..3cd086e5bd28 100644 --- a/net/smc/smc_cdc.c +++ b/net/smc/smc_cdc.c @@ -57,9 +57,6 @@ static void smc_cdc_tx_handler(struct smc_wr_tx_pend_priv *pnd_snd, cdcpend->conn); } smc_tx_sndbuf_nonfull(smc); - if (smc->sk.sk_state != SMC_ACTIVE) - /* wake up smc_close_wait_tx_pends() */ - smc->sk.sk_state_change(&smc->sk); bh_unlock_sock(&smc->sk); } @@ -68,9 +65,14 @@ int smc_cdc_get_free_slot(struct smc_connection *conn, struct smc_cdc_tx_pend **pend) { struct smc_link *link = &conn->lgr->lnk[SMC_SINGLE_LINK]; + int rc; - return smc_wr_tx_get_free_slot(link, smc_cdc_tx_handler, wr_buf, - (struct smc_wr_tx_pend_priv **)pend); + rc = smc_wr_tx_get_free_slot(link, smc_cdc_tx_handler, wr_buf, + (struct smc_wr_tx_pend_priv **)pend); + if (!conn->alert_token_local) + /* abnormal termination */ + rc = -EPIPE; + return rc; } static inline void smc_cdc_add_pending_send(struct smc_connection *conn, @@ -155,14 +157,6 @@ void smc_cdc_tx_dismiss_slots(struct smc_connection *conn) (unsigned long)conn); } -bool smc_cdc_tx_has_pending(struct smc_connection *conn) -{ - struct smc_link *link = &conn->lgr->lnk[SMC_SINGLE_LINK]; - - return smc_wr_tx_has_pending(link, SMC_CDC_MSG_TYPE, - smc_cdc_tx_filter, (unsigned long)conn); -} - /********************************* receive ***********************************/ static inline bool smc_cdc_before(u16 seq1, u16 seq2) @@ -213,6 +207,17 @@ static void smc_cdc_msg_recv_action(struct smc_sock *smc, /* guarantee 0 <= bytes_to_rcv <= rmbe_size */ smp_mb__after_atomic(); smc->sk.sk_data_ready(&smc->sk); + } else if ((conn->local_rx_ctrl.prod_flags.write_blocked) || + (conn->local_rx_ctrl.prod_flags.cons_curs_upd_req)) { + smc->sk.sk_data_ready(&smc->sk); + } + + /* piggy backed tx info */ + /* trigger sndbuf consumer: RDMA write into peer RMBE and CDC */ + if (diff_cons && smc_tx_prepared_sends(conn)) { + smc_tx_sndbuf_nonempty(conn); + /* trigger socket release if connection closed */ + smc_close_wake_tx_prepared(smc); } if (conn->local_rx_ctrl.conn_state_flags.peer_conn_abort) { @@ -224,25 +229,10 @@ static void smc_cdc_msg_recv_action(struct smc_sock *smc, if (smc->clcsock && smc->clcsock->sk) smc->clcsock->sk->sk_shutdown |= RCV_SHUTDOWN; sock_set_flag(&smc->sk, SOCK_DONE); - schedule_work(&conn->close_work); + sock_hold(&smc->sk); /* sock_put in close_work */ + if (!schedule_work(&conn->close_work)) + sock_put(&smc->sk); } - - /* piggy backed tx info */ - /* trigger sndbuf consumer: RDMA write into peer RMBE and CDC */ - if (diff_cons && smc_tx_prepared_sends(conn)) { - smc_tx_sndbuf_nonempty(conn); - /* trigger socket release if connection closed */ - smc_close_wake_tx_prepared(smc); - } - - /* socket connected but not accepted */ - if (!smc->sk.sk_socket) - return; - - /* data available */ - if ((conn->local_rx_ctrl.prod_flags.write_blocked) || - (conn->local_rx_ctrl.prod_flags.cons_curs_upd_req)) - smc_tx_consumer_update(conn); } /* called under tasklet context */ diff --git a/net/smc/smc_cdc.h b/net/smc/smc_cdc.h index 149ceda1b088..ab240b37ad11 100644 --- a/net/smc/smc_cdc.h +++ b/net/smc/smc_cdc.h @@ -214,7 +214,6 @@ void smc_cdc_tx_dismiss_slots(struct smc_connection *conn); int smc_cdc_msg_send(struct smc_connection *conn, struct smc_wr_buf *wr_buf, struct smc_cdc_tx_pend *pend); int smc_cdc_get_slot_and_msg_send(struct smc_connection *conn); -bool smc_cdc_tx_has_pending(struct smc_connection *conn); int smc_cdc_init(void) __init; #endif /* SMC_CDC_H */ diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c index 511548085d16..8ac51583a063 100644 --- a/net/smc/smc_clc.c +++ b/net/smc/smc_clc.c @@ -22,6 +22,54 @@ #include "smc_clc.h" #include "smc_ib.h" +/* check if received message has a correct header length and contains valid + * heading and trailing eyecatchers + */ +static bool smc_clc_msg_hdr_valid(struct smc_clc_msg_hdr *clcm) +{ + struct smc_clc_msg_proposal_prefix *pclc_prfx; + struct smc_clc_msg_accept_confirm *clc; + struct smc_clc_msg_proposal *pclc; + struct smc_clc_msg_decline *dclc; + struct smc_clc_msg_trail *trl; + + if (memcmp(clcm->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER))) + return false; + switch (clcm->type) { + case SMC_CLC_PROPOSAL: + pclc = (struct smc_clc_msg_proposal *)clcm; + pclc_prfx = smc_clc_proposal_get_prefix(pclc); + if (ntohs(pclc->hdr.length) != + sizeof(*pclc) + ntohs(pclc->iparea_offset) + + sizeof(*pclc_prfx) + + pclc_prfx->ipv6_prefixes_cnt * + sizeof(struct smc_clc_ipv6_prefix) + + sizeof(*trl)) + return false; + trl = (struct smc_clc_msg_trail *) + ((u8 *)pclc + ntohs(pclc->hdr.length) - sizeof(*trl)); + break; + case SMC_CLC_ACCEPT: + case SMC_CLC_CONFIRM: + clc = (struct smc_clc_msg_accept_confirm *)clcm; + if (ntohs(clc->hdr.length) != sizeof(*clc)) + return false; + trl = &clc->trl; + break; + case SMC_CLC_DECLINE: + dclc = (struct smc_clc_msg_decline *)clcm; + if (ntohs(dclc->hdr.length) != sizeof(*dclc)) + return false; + trl = &dclc->trl; + break; + default: + return false; + } + if (memcmp(trl->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER))) + return false; + return true; +} + /* Wait for data on the tcp-socket, analyze received data * Returns: * 0 if success and it was not a decline that we received. @@ -75,9 +123,7 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen, } datlen = ntohs(clcm->length); if ((len < sizeof(struct smc_clc_msg_hdr)) || - (datlen < sizeof(struct smc_clc_msg_decline)) || - (datlen > sizeof(struct smc_clc_msg_accept_confirm)) || - memcmp(clcm->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)) || + (datlen > buflen) || ((clcm->type != SMC_CLC_DECLINE) && (clcm->type != expected_type))) { smc->sk.sk_err = EPROTO; @@ -91,7 +137,7 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen, krflags = MSG_WAITALL; smc->clcsock->sk->sk_rcvtimeo = CLC_WAIT_TIME; len = sock_recvmsg(smc->clcsock, &msg, krflags); - if (len < datlen) { + if (len < datlen || !smc_clc_msg_hdr_valid(clcm)) { smc->sk.sk_err = EPROTO; reason_code = -EPROTO; goto out; @@ -135,7 +181,7 @@ int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info) smc->sk.sk_err = EPROTO; if (len < 0) smc->sk.sk_err = -len; - return len; + return sock_error(&smc->sk); } /* send CLC PROPOSAL message across internal TCP socket */ @@ -143,33 +189,43 @@ int smc_clc_send_proposal(struct smc_sock *smc, struct smc_ib_device *smcibdev, u8 ibport) { + struct smc_clc_msg_proposal_prefix pclc_prfx; struct smc_clc_msg_proposal pclc; + struct smc_clc_msg_trail trl; int reason_code = 0; + struct kvec vec[3]; struct msghdr msg; - struct kvec vec; - int len, rc; + int len, plen, rc; /* send SMC Proposal CLC message */ + plen = sizeof(pclc) + sizeof(pclc_prfx) + sizeof(trl); memset(&pclc, 0, sizeof(pclc)); memcpy(pclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); pclc.hdr.type = SMC_CLC_PROPOSAL; - pclc.hdr.length = htons(sizeof(pclc)); + pclc.hdr.length = htons(plen); pclc.hdr.version = SMC_CLC_V1; /* SMC version */ memcpy(pclc.lcl.id_for_peer, local_systemid, sizeof(local_systemid)); memcpy(&pclc.lcl.gid, &smcibdev->gid[ibport - 1], SMC_GID_SIZE); memcpy(&pclc.lcl.mac, &smcibdev->mac[ibport - 1], ETH_ALEN); + pclc.iparea_offset = htons(0); + memset(&pclc_prfx, 0, sizeof(pclc_prfx)); /* determine subnet and mask from internal TCP socket */ - rc = smc_netinfo_by_tcpsk(smc->clcsock, &pclc.outgoing_subnet, - &pclc.prefix_len); + rc = smc_netinfo_by_tcpsk(smc->clcsock, &pclc_prfx.outgoing_subnet, + &pclc_prfx.prefix_len); if (rc) return SMC_CLC_DECL_CNFERR; /* configuration error */ - memcpy(pclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); + pclc_prfx.ipv6_prefixes_cnt = 0; + memcpy(trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); memset(&msg, 0, sizeof(msg)); - vec.iov_base = &pclc; - vec.iov_len = sizeof(pclc); + vec[0].iov_base = &pclc; + vec[0].iov_len = sizeof(pclc); + vec[1].iov_base = &pclc_prfx; + vec[1].iov_len = sizeof(pclc_prfx); + vec[2].iov_base = &trl; + vec[2].iov_len = sizeof(trl); /* due to the few bytes needed for clc-handshake this cannot block */ - len = kernel_sendmsg(smc->clcsock, &msg, &vec, 1, sizeof(pclc)); + len = kernel_sendmsg(smc->clcsock, &msg, vec, 3, plen); if (len < sizeof(pclc)) { if (len >= 0) { reason_code = -ENETUNREACH; diff --git a/net/smc/smc_clc.h b/net/smc/smc_clc.h index 12a9af1539a2..c145a0f36a68 100644 --- a/net/smc/smc_clc.h +++ b/net/smc/smc_clc.h @@ -44,7 +44,7 @@ struct smc_clc_msg_hdr { /* header1 of clc messages */ #if defined(__BIG_ENDIAN_BITFIELD) u8 version : 4, flag : 1, - rsvd : 3; + rsvd : 3; #elif defined(__LITTLE_ENDIAN_BITFIELD) u8 rsvd : 3, flag : 1, @@ -62,17 +62,31 @@ struct smc_clc_msg_local { /* header2 of clc messages */ u8 mac[6]; /* mac of ib_device port */ }; -struct smc_clc_msg_proposal { /* clc proposal message */ - struct smc_clc_msg_hdr hdr; - struct smc_clc_msg_local lcl; - __be16 iparea_offset; /* offset to IP address information area */ +struct smc_clc_ipv6_prefix { + u8 prefix[4]; + u8 prefix_len; +} __packed; + +struct smc_clc_msg_proposal_prefix { /* prefix part of clc proposal message*/ __be32 outgoing_subnet; /* subnet mask */ u8 prefix_len; /* number of significant bits in mask */ u8 reserved[2]; u8 ipv6_prefixes_cnt; /* number of IPv6 prefixes in prefix array */ - struct smc_clc_msg_trail trl; /* eye catcher "SMCR" EBCDIC */ } __aligned(4); +struct smc_clc_msg_proposal { /* clc proposal message sent by Linux */ + struct smc_clc_msg_hdr hdr; + struct smc_clc_msg_local lcl; + __be16 iparea_offset; /* offset to IP address information area */ +} __aligned(4); + +#define SMC_CLC_PROPOSAL_MAX_OFFSET 0x28 +#define SMC_CLC_PROPOSAL_MAX_PREFIX (8 * sizeof(struct smc_clc_ipv6_prefix)) +#define SMC_CLC_MAX_LEN (sizeof(struct smc_clc_msg_proposal) + \ + SMC_CLC_PROPOSAL_MAX_OFFSET + \ + SMC_CLC_PROPOSAL_MAX_PREFIX + \ + sizeof(struct smc_clc_msg_trail)) + struct smc_clc_msg_accept_confirm { /* clc accept / confirm message */ struct smc_clc_msg_hdr hdr; struct smc_clc_msg_local lcl; @@ -102,6 +116,14 @@ struct smc_clc_msg_decline { /* clc decline message */ struct smc_clc_msg_trail trl; /* eye catcher "SMCR" EBCDIC */ } __aligned(4); +/* determine start of the prefix area within the proposal message */ +static inline struct smc_clc_msg_proposal_prefix * +smc_clc_proposal_get_prefix(struct smc_clc_msg_proposal *pclc) +{ + return (struct smc_clc_msg_proposal_prefix *) + ((u8 *)pclc + sizeof(*pclc) + ntohs(pclc->iparea_offset)); +} + struct smc_sock; struct smc_ib_device; diff --git a/net/smc/smc_close.c b/net/smc/smc_close.c index 48615d2ac4aa..e339c0186dcf 100644 --- a/net/smc/smc_close.c +++ b/net/smc/smc_close.c @@ -19,7 +19,7 @@ #include "smc_cdc.h" #include "smc_close.h" -#define SMC_CLOSE_WAIT_TX_PENDS_TIME (5 * HZ) +#define SMC_CLOSE_WAIT_LISTEN_CLCSOCK_TIME (5 * HZ) static void smc_close_cleanup_listen(struct sock *parent) { @@ -30,23 +30,24 @@ static void smc_close_cleanup_listen(struct sock *parent) smc_close_non_accepted(sk); } -static void smc_close_wait_tx_pends(struct smc_sock *smc) +static void smc_close_wait_listen_clcsock(struct smc_sock *smc) { DEFINE_WAIT_FUNC(wait, woken_wake_function); struct sock *sk = &smc->sk; signed long timeout; - timeout = SMC_CLOSE_WAIT_TX_PENDS_TIME; + timeout = SMC_CLOSE_WAIT_LISTEN_CLCSOCK_TIME; add_wait_queue(sk_sleep(sk), &wait); - while (!signal_pending(current) && timeout) { - int rc; - - rc = sk_wait_event(sk, &timeout, - !smc_cdc_tx_has_pending(&smc->conn), - &wait); - if (rc) + do { + release_sock(sk); + if (smc->clcsock) + timeout = wait_woken(&wait, TASK_UNINTERRUPTIBLE, + timeout); + sched_annotate_sleep(); + lock_sock(sk); + if (!smc->clcsock) break; - } + } while (timeout); remove_wait_queue(sk_sleep(sk), &wait); } @@ -111,58 +112,63 @@ static int smc_close_abort(struct smc_connection *conn) } /* terminate smc socket abnormally - active abort - * RDMA communication no longer possible + * link group is terminated, i.e. RDMA communication no longer possible */ -void smc_close_active_abort(struct smc_sock *smc) +static void smc_close_active_abort(struct smc_sock *smc) { + struct sock *sk = &smc->sk; + struct smc_cdc_conn_state_flags *txflags = &smc->conn.local_tx_ctrl.conn_state_flags; - smc->sk.sk_err = ECONNABORTED; + sk->sk_err = ECONNABORTED; if (smc->clcsock && smc->clcsock->sk) { smc->clcsock->sk->sk_err = ECONNABORTED; smc->clcsock->sk->sk_state_change(smc->clcsock->sk); } - switch (smc->sk.sk_state) { + switch (sk->sk_state) { case SMC_INIT: case SMC_ACTIVE: - smc->sk.sk_state = SMC_PEERABORTWAIT; + sk->sk_state = SMC_PEERABORTWAIT; + release_sock(sk); + cancel_delayed_work_sync(&smc->conn.tx_work); + lock_sock(sk); + sock_put(sk); /* passive closing */ break; case SMC_APPCLOSEWAIT1: case SMC_APPCLOSEWAIT2: - txflags->peer_conn_abort = 1; - sock_release(smc->clcsock); if (!smc_cdc_rxed_any_close(&smc->conn)) - smc->sk.sk_state = SMC_PEERABORTWAIT; + sk->sk_state = SMC_PEERABORTWAIT; else - smc->sk.sk_state = SMC_CLOSED; + sk->sk_state = SMC_CLOSED; + release_sock(sk); + cancel_delayed_work_sync(&smc->conn.tx_work); + lock_sock(sk); break; case SMC_PEERCLOSEWAIT1: case SMC_PEERCLOSEWAIT2: if (!txflags->peer_conn_closed) { - smc->sk.sk_state = SMC_PEERABORTWAIT; - txflags->peer_conn_abort = 1; - sock_release(smc->clcsock); + /* just SHUTDOWN_SEND done */ + sk->sk_state = SMC_PEERABORTWAIT; } else { - smc->sk.sk_state = SMC_CLOSED; + sk->sk_state = SMC_CLOSED; } + sock_put(sk); /* passive closing */ break; case SMC_PROCESSABORT: case SMC_APPFINCLOSEWAIT: - if (!txflags->peer_conn_closed) { - txflags->peer_conn_abort = 1; - sock_release(smc->clcsock); - } - smc->sk.sk_state = SMC_CLOSED; + sk->sk_state = SMC_CLOSED; break; case SMC_PEERFINCLOSEWAIT: + sock_put(sk); /* passive closing */ + break; case SMC_PEERABORTWAIT: case SMC_CLOSED: break; } - sock_set_flag(&smc->sk, SOCK_DEAD); - smc->sk.sk_state_change(&smc->sk); + sock_set_flag(sk, SOCK_DEAD); + sk->sk_state_change(sk); } static inline bool smc_close_sent_any_close(struct smc_connection *conn) @@ -185,13 +191,11 @@ int smc_close_active(struct smc_sock *smc) 0 : sock_flag(sk, SOCK_LINGER) ? sk->sk_lingertime : SMC_MAX_STREAM_WAIT_TIMEOUT; -again: old_state = sk->sk_state; - switch (old_state) { +again: + switch (sk->sk_state) { case SMC_INIT: sk->sk_state = SMC_CLOSED; - if (smc->smc_listen_work.func) - cancel_work_sync(&smc->smc_listen_work); break; case SMC_LISTEN: sk->sk_state = SMC_CLOSED; @@ -200,11 +204,9 @@ again: rc = kernel_sock_shutdown(smc->clcsock, SHUT_RDWR); /* wake up kernel_accept of smc_tcp_listen_worker */ smc->clcsock->sk->sk_data_ready(smc->clcsock->sk); + smc_close_wait_listen_clcsock(smc); } - release_sock(sk); smc_close_cleanup_listen(sk); - cancel_work_sync(&smc->smc_listen_work); - lock_sock(sk); break; case SMC_ACTIVE: smc_close_stream_wait(smc, timeout); @@ -214,6 +216,8 @@ again: if (sk->sk_state == SMC_ACTIVE) { /* send close request */ rc = smc_close_final(conn); + if (rc) + break; sk->sk_state = SMC_PEERCLOSEWAIT1; } else { /* peer event has changed the state */ @@ -226,9 +230,10 @@ again: !smc_close_sent_any_close(conn)) { /* just shutdown wr done, send close request */ rc = smc_close_final(conn); + if (rc) + break; } sk->sk_state = SMC_CLOSED; - smc_close_wait_tx_pends(smc); break; case SMC_APPCLOSEWAIT1: case SMC_APPCLOSEWAIT2: @@ -237,19 +242,21 @@ again: release_sock(sk); cancel_delayed_work_sync(&conn->tx_work); lock_sock(sk); - if (sk->sk_err != ECONNABORTED) { - /* confirm close from peer */ - rc = smc_close_final(conn); - if (rc) - break; - } - if (smc_cdc_rxed_any_close(conn)) + if (sk->sk_state != SMC_APPCLOSEWAIT1 && + sk->sk_state != SMC_APPCLOSEWAIT2) + goto again; + /* confirm close from peer */ + rc = smc_close_final(conn); + if (rc) + break; + if (smc_cdc_rxed_any_close(conn)) { /* peer has closed the socket already */ sk->sk_state = SMC_CLOSED; - else + sock_put(sk); /* postponed passive closing */ + } else { /* peer has just issued a shutdown write */ sk->sk_state = SMC_PEERFINCLOSEWAIT; - smc_close_wait_tx_pends(smc); + } break; case SMC_PEERCLOSEWAIT1: case SMC_PEERCLOSEWAIT2: @@ -257,6 +264,8 @@ again: !smc_close_sent_any_close(conn)) { /* just shutdown wr done, send close request */ rc = smc_close_final(conn); + if (rc) + break; } /* peer sending PeerConnectionClosed will cause transition */ break; @@ -264,12 +273,8 @@ again: /* peer sending PeerConnectionClosed will cause transition */ break; case SMC_PROCESSABORT: - release_sock(sk); - cancel_delayed_work_sync(&conn->tx_work); - lock_sock(sk); smc_close_abort(conn); sk->sk_state = SMC_CLOSED; - smc_close_wait_tx_pends(smc); break; case SMC_PEERABORTWAIT: case SMC_CLOSED: @@ -278,7 +283,7 @@ again: } if (old_state != sk->sk_state) - sk->sk_state_change(&smc->sk); + sk->sk_state_change(sk); return rc; } @@ -289,37 +294,42 @@ static void smc_close_passive_abort_received(struct smc_sock *smc) struct sock *sk = &smc->sk; switch (sk->sk_state) { + case SMC_INIT: case SMC_ACTIVE: - case SMC_APPFINCLOSEWAIT: case SMC_APPCLOSEWAIT1: - case SMC_APPCLOSEWAIT2: - smc_close_abort(&smc->conn); + sk->sk_state = SMC_PROCESSABORT; + sock_put(sk); /* passive closing */ + break; + case SMC_APPFINCLOSEWAIT: sk->sk_state = SMC_PROCESSABORT; break; case SMC_PEERCLOSEWAIT1: case SMC_PEERCLOSEWAIT2: if (txflags->peer_done_writing && - !smc_close_sent_any_close(&smc->conn)) { + !smc_close_sent_any_close(&smc->conn)) /* just shutdown, but not yet closed locally */ - smc_close_abort(&smc->conn); sk->sk_state = SMC_PROCESSABORT; - } else { + else sk->sk_state = SMC_CLOSED; - } + sock_put(sk); /* passive closing */ break; + case SMC_APPCLOSEWAIT2: case SMC_PEERFINCLOSEWAIT: + sk->sk_state = SMC_CLOSED; + sock_put(sk); /* passive closing */ + break; case SMC_PEERABORTWAIT: sk->sk_state = SMC_CLOSED; break; - case SMC_INIT: case SMC_PROCESSABORT: /* nothing to do, add tracing in future patch */ break; } } -/* Some kind of closing has been received: peer_conn_closed, peer_conn_abort, - * or peer_done_writing. +/* Either some kind of closing has been received: peer_conn_closed, + * peer_conn_abort, or peer_done_writing + * or the link group of the connection terminates abnormally. */ static void smc_close_passive_work(struct work_struct *work) { @@ -331,7 +341,7 @@ static void smc_close_passive_work(struct work_struct *work) struct sock *sk = &smc->sk; int old_state; - lock_sock(&smc->sk); + lock_sock(sk); old_state = sk->sk_state; if (!conn->alert_token_local) { @@ -340,23 +350,32 @@ static void smc_close_passive_work(struct work_struct *work) goto wakeup; } - rxflags = &smc->conn.local_rx_ctrl.conn_state_flags; + rxflags = &conn->local_rx_ctrl.conn_state_flags; if (rxflags->peer_conn_abort) { + /* peer has not received all data */ smc_close_passive_abort_received(smc); + release_sock(&smc->sk); + cancel_delayed_work_sync(&conn->tx_work); + lock_sock(&smc->sk); goto wakeup; } switch (sk->sk_state) { case SMC_INIT: - if (atomic_read(&smc->conn.bytes_to_rcv) || + if (atomic_read(&conn->bytes_to_rcv) || (rxflags->peer_done_writing && - !smc_cdc_rxed_any_close(conn))) + !smc_cdc_rxed_any_close(conn))) { sk->sk_state = SMC_APPCLOSEWAIT1; - else + } else { sk->sk_state = SMC_CLOSED; + sock_put(sk); /* passive closing */ + } break; case SMC_ACTIVE: sk->sk_state = SMC_APPCLOSEWAIT1; + /* postpone sock_put() for passive closing to cover + * received SEND_SHUTDOWN as well + */ break; case SMC_PEERCLOSEWAIT1: if (rxflags->peer_done_writing) @@ -364,8 +383,7 @@ static void smc_close_passive_work(struct work_struct *work) /* fall through */ /* to check for closing */ case SMC_PEERCLOSEWAIT2: - case SMC_PEERFINCLOSEWAIT: - if (!smc_cdc_rxed_any_close(&smc->conn)) + if (!smc_cdc_rxed_any_close(conn)) break; if (sock_flag(sk, SOCK_DEAD) && smc_close_sent_any_close(conn)) { @@ -375,9 +393,20 @@ static void smc_close_passive_work(struct work_struct *work) /* just shutdown, but not yet closed locally */ sk->sk_state = SMC_APPFINCLOSEWAIT; } + sock_put(sk); /* passive closing */ + break; + case SMC_PEERFINCLOSEWAIT: + if (smc_cdc_rxed_any_close(conn)) { + sk->sk_state = SMC_CLOSED; + sock_put(sk); /* passive closing */ + } break; case SMC_APPCLOSEWAIT1: case SMC_APPCLOSEWAIT2: + /* postpone sock_put() for passive closing to cover + * received SEND_SHUTDOWN as well + */ + break; case SMC_APPFINCLOSEWAIT: case SMC_PEERABORTWAIT: case SMC_PROCESSABORT: @@ -393,23 +422,11 @@ wakeup: if (old_state != sk->sk_state) { sk->sk_state_change(sk); if ((sk->sk_state == SMC_CLOSED) && - (sock_flag(sk, SOCK_DEAD) || !sk->sk_socket)) { - smc_conn_free(&smc->conn); - schedule_delayed_work(&smc->sock_put_work, - SMC_CLOSE_SOCK_PUT_DELAY); - } + (sock_flag(sk, SOCK_DEAD) || !sk->sk_socket)) + smc_conn_free(conn); } - release_sock(&smc->sk); -} - -void smc_close_sock_put_work(struct work_struct *work) -{ - struct smc_sock *smc = container_of(to_delayed_work(work), - struct smc_sock, - sock_put_work); - - smc->sk.sk_prot->unhash(&smc->sk); - sock_put(&smc->sk); + release_sock(sk); + sock_put(sk); /* sock_hold done by schedulers of close_work */ } int smc_close_shutdown_write(struct smc_sock *smc) @@ -424,20 +441,21 @@ int smc_close_shutdown_write(struct smc_sock *smc) 0 : sock_flag(sk, SOCK_LINGER) ? sk->sk_lingertime : SMC_MAX_STREAM_WAIT_TIMEOUT; -again: old_state = sk->sk_state; - switch (old_state) { +again: + switch (sk->sk_state) { case SMC_ACTIVE: smc_close_stream_wait(smc, timeout); release_sock(sk); cancel_delayed_work_sync(&conn->tx_work); lock_sock(sk); + if (sk->sk_state != SMC_ACTIVE) + goto again; /* send close wr request */ rc = smc_close_wr(conn); - if (sk->sk_state == SMC_ACTIVE) - sk->sk_state = SMC_PEERCLOSEWAIT1; - else - goto again; + if (rc) + break; + sk->sk_state = SMC_PEERCLOSEWAIT1; break; case SMC_APPCLOSEWAIT1: /* passive close */ @@ -446,8 +464,12 @@ again: release_sock(sk); cancel_delayed_work_sync(&conn->tx_work); lock_sock(sk); + if (sk->sk_state != SMC_APPCLOSEWAIT1) + goto again; /* confirm close from peer */ rc = smc_close_wr(conn); + if (rc) + break; sk->sk_state = SMC_APPCLOSEWAIT2; break; case SMC_APPCLOSEWAIT2: @@ -462,7 +484,7 @@ again: } if (old_state != sk->sk_state) - sk->sk_state_change(&smc->sk); + sk->sk_state_change(sk); return rc; } diff --git a/net/smc/smc_close.h b/net/smc/smc_close.h index ed82506b1b0a..19eb6a211c23 100644 --- a/net/smc/smc_close.h +++ b/net/smc/smc_close.h @@ -20,9 +20,7 @@ #define SMC_CLOSE_SOCK_PUT_DELAY HZ void smc_close_wake_tx_prepared(struct smc_sock *smc); -void smc_close_active_abort(struct smc_sock *smc); int smc_close_active(struct smc_sock *smc); -void smc_close_sock_put_work(struct work_struct *work); int smc_close_shutdown_write(struct smc_sock *smc); void smc_close_init(struct smc_sock *smc); diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 94f21116dac5..2424c7100aaf 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -128,6 +128,8 @@ static void smc_lgr_free_work(struct work_struct *work) bool conns; spin_lock_bh(&smc_lgr_list.lock); + if (list_empty(&lgr->list)) + goto free; read_lock_bh(&lgr->conns_lock); conns = RB_EMPTY_ROOT(&lgr->conns_all); read_unlock_bh(&lgr->conns_lock); @@ -136,6 +138,7 @@ static void smc_lgr_free_work(struct work_struct *work) return; } list_del_init(&lgr->list); /* remove from smc_lgr_list */ +free: spin_unlock_bh(&smc_lgr_list.lock); smc_lgr_free(lgr); } @@ -231,9 +234,7 @@ static void smc_buf_unuse(struct smc_connection *conn) /* remove a finished connection from its link group */ void smc_conn_free(struct smc_connection *conn) { - struct smc_link_group *lgr = conn->lgr; - - if (!lgr) + if (!conn->lgr) return; smc_cdc_tx_dismiss_slots(conn); smc_lgr_unregister_conn(conn); @@ -327,13 +328,17 @@ void smc_lgr_terminate(struct smc_link_group *lgr) while (node) { conn = rb_entry(node, struct smc_connection, alert_node); smc = container_of(conn, struct smc_sock, conn); - sock_hold(&smc->sk); + sock_hold(&smc->sk); /* sock_put in close work */ + conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1; __smc_lgr_unregister_conn(conn); - schedule_work(&conn->close_work); - sock_put(&smc->sk); + write_unlock_bh(&lgr->conns_lock); + if (!schedule_work(&conn->close_work)) + sock_put(&smc->sk); + write_lock_bh(&lgr->conns_lock); node = rb_first(&lgr->conns_all); } write_unlock_bh(&lgr->conns_lock); + wake_up(&lgr->lnk[SMC_SINGLE_LINK].wr_reg_wait); } /* Determine vlan of internal TCP socket. diff --git a/net/smc/smc_diag.c b/net/smc/smc_diag.c index d2d01cf70224..427b91c1c964 100644 --- a/net/smc/smc_diag.c +++ b/net/smc/smc_diag.c @@ -86,7 +86,8 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb, if (smc_diag_msg_attrs_fill(sk, skb, r, user_ns)) goto errout; - if ((req->diag_ext & (1 << (SMC_DIAG_CONNINFO - 1))) && smc->conn.lgr) { + if ((req->diag_ext & (1 << (SMC_DIAG_CONNINFO - 1))) && + smc->conn.alert_token_local) { struct smc_connection *conn = &smc->conn; struct smc_diag_conninfo cinfo = { .token = conn->alert_token_local, @@ -124,7 +125,8 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb, goto errout; } - if ((req->diag_ext & (1 << (SMC_DIAG_LGRINFO - 1))) && smc->conn.lgr) { + if ((req->diag_ext & (1 << (SMC_DIAG_LGRINFO - 1))) && smc->conn.lgr && + !list_empty(&smc->conn.lgr->list)) { struct smc_diag_lgrinfo linfo = { .role = smc->conn.lgr->role, .lnk[0].ibport = smc->conn.lgr->lnk[0].ibport, diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c index 90f1a7f9085c..2a8957bd6d38 100644 --- a/net/smc/smc_ib.c +++ b/net/smc/smc_ib.c @@ -141,6 +141,17 @@ out: return rc; } +static void smc_ib_port_terminate(struct smc_ib_device *smcibdev, u8 ibport) +{ + struct smc_link_group *lgr, *l; + + list_for_each_entry_safe(lgr, l, &smc_lgr_list.list, list) { + if (lgr->lnk[SMC_SINGLE_LINK].smcibdev == smcibdev && + lgr->lnk[SMC_SINGLE_LINK].ibport == ibport) + smc_lgr_terminate(lgr); + } +} + /* process context wrapper for might_sleep smc_ib_remember_port_attr */ static void smc_ib_port_event_work(struct work_struct *work) { @@ -151,6 +162,8 @@ static void smc_ib_port_event_work(struct work_struct *work) for_each_set_bit(port_idx, &smcibdev->port_event_mask, SMC_MAX_PORTS) { smc_ib_remember_port_attr(smcibdev, port_idx + 1); clear_bit(port_idx, &smcibdev->port_event_mask); + if (!smc_ib_port_active(smcibdev, port_idx + 1)) + smc_ib_port_terminate(smcibdev, port_idx + 1); } } @@ -165,15 +178,7 @@ static void smc_ib_global_event_handler(struct ib_event_handler *handler, switch (ibevent->event) { case IB_EVENT_PORT_ERR: - port_idx = ibevent->element.port_num - 1; - set_bit(port_idx, &smcibdev->port_event_mask); - schedule_work(&smcibdev->port_event_work); - /* fall through */ case IB_EVENT_DEVICE_FATAL: - /* tbd in follow-on patch: - * abnormal close of corresponding connections - */ - break; case IB_EVENT_PORT_ACTIVE: port_idx = ibevent->element.port_num - 1; set_bit(port_idx, &smcibdev->port_event_mask); @@ -186,7 +191,8 @@ static void smc_ib_global_event_handler(struct ib_event_handler *handler, void smc_ib_dealloc_protection_domain(struct smc_link *lnk) { - ib_dealloc_pd(lnk->roce_pd); + if (lnk->roce_pd) + ib_dealloc_pd(lnk->roce_pd); lnk->roce_pd = NULL; } @@ -203,14 +209,18 @@ int smc_ib_create_protection_domain(struct smc_link *lnk) static void smc_ib_qp_event_handler(struct ib_event *ibevent, void *priv) { + struct smc_ib_device *smcibdev = + (struct smc_ib_device *)ibevent->device; + u8 port_idx; + switch (ibevent->event) { case IB_EVENT_DEVICE_FATAL: case IB_EVENT_GID_CHANGE: case IB_EVENT_PORT_ERR: case IB_EVENT_QP_ACCESS_ERR: - /* tbd in follow-on patch: - * abnormal close of corresponding connections - */ + port_idx = ibevent->element.port_num - 1; + set_bit(port_idx, &smcibdev->port_event_mask); + schedule_work(&smcibdev->port_event_work); break; default: break; @@ -219,7 +229,8 @@ static void smc_ib_qp_event_handler(struct ib_event *ibevent, void *priv) void smc_ib_destroy_queue_pair(struct smc_link *lnk) { - ib_destroy_qp(lnk->roce_qp); + if (lnk->roce_qp) + ib_destroy_qp(lnk->roce_qp); lnk->roce_qp = NULL; } @@ -462,6 +473,7 @@ static void smc_ib_cleanup_per_ibdev(struct smc_ib_device *smcibdev) { if (!smcibdev->initialized) return; + smcibdev->initialized = 0; smc_wr_remove_dev(smcibdev); ib_unregister_event_handler(&smcibdev->event_handler); ib_destroy_cq(smcibdev->roce_cq_recv); diff --git a/net/smc/smc_rx.c b/net/smc/smc_rx.c index cbf58637ee14..9dc392ca06bf 100644 --- a/net/smc/smc_rx.c +++ b/net/smc/smc_rx.c @@ -65,7 +65,6 @@ static int smc_rx_wait_data(struct smc_sock *smc, long *timeo) rc = sk_wait_event(sk, timeo, sk->sk_err || sk->sk_shutdown & RCV_SHUTDOWN || - sock_flag(sk, SOCK_DONE) || atomic_read(&conn->bytes_to_rcv) || smc_cdc_rxed_any_close_or_senddone(conn), &wait); @@ -116,7 +115,7 @@ int smc_rx_recvmsg(struct smc_sock *smc, struct msghdr *msg, size_t len, if (read_done) { if (sk->sk_err || sk->sk_state == SMC_CLOSED || - (sk->sk_shutdown & RCV_SHUTDOWN) || + sk->sk_shutdown & RCV_SHUTDOWN || !timeo || signal_pending(current) || smc_cdc_rxed_any_close_or_senddone(conn) || @@ -124,8 +123,6 @@ int smc_rx_recvmsg(struct smc_sock *smc, struct msghdr *msg, size_t len, peer_conn_abort) break; } else { - if (sock_flag(sk, SOCK_DONE)) - break; if (sk->sk_err) { read_done = sock_error(sk); break; diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c index c48dc2d5fd3a..838bce20c361 100644 --- a/net/smc/smc_tx.c +++ b/net/smc/smc_tx.c @@ -86,7 +86,7 @@ static int smc_tx_wait_memory(struct smc_sock *smc, int flags) rc = -EPIPE; break; } - if (conn->local_rx_ctrl.conn_state_flags.peer_conn_abort) { + if (smc_cdc_rxed_any_close(conn)) { rc = -ECONNRESET; break; } @@ -104,14 +104,12 @@ static int smc_tx_wait_memory(struct smc_sock *smc, int flags) if (atomic_read(&conn->sndbuf_space)) break; /* at least 1 byte of free space available */ set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); - sk->sk_write_pending++; sk_wait_event(sk, &timeo, sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN) || - smc_cdc_rxed_any_close_or_senddone(conn) || + smc_cdc_rxed_any_close(conn) || atomic_read(&conn->sndbuf_space), &wait); - sk->sk_write_pending--; } remove_wait_queue(sk_sleep(sk), &wait); return rc; @@ -250,8 +248,10 @@ static int smc_tx_rdma_write(struct smc_connection *conn, int peer_rmbe_offset, peer_rmbe_offset; rdma_wr.rkey = lgr->rtokens[conn->rtoken_idx][SMC_SINGLE_LINK].rkey; rc = ib_post_send(link->roce_qp, &rdma_wr.wr, &failed_wr); - if (rc) + if (rc) { conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1; + smc_lgr_terminate(lgr); + } return rc; } @@ -408,8 +408,9 @@ int smc_tx_sndbuf_nonempty(struct smc_connection *conn) goto out_unlock; } rc = 0; - schedule_delayed_work(&conn->tx_work, - SMC_TX_WORK_DELAY); + if (conn->alert_token_local) /* connection healthy */ + schedule_delayed_work(&conn->tx_work, + SMC_TX_WORK_DELAY); } goto out_unlock; } @@ -440,19 +441,24 @@ static void smc_tx_work(struct work_struct *work) int rc; lock_sock(&smc->sk); + if (smc->sk.sk_err || + !conn->alert_token_local || + conn->local_rx_ctrl.conn_state_flags.peer_conn_abort) + goto out; + rc = smc_tx_sndbuf_nonempty(conn); if (!rc && conn->local_rx_ctrl.prod_flags.write_blocked && !atomic_read(&conn->bytes_to_rcv)) conn->local_rx_ctrl.prod_flags.write_blocked = 0; + +out: release_sock(&smc->sk); } void smc_tx_consumer_update(struct smc_connection *conn) { union smc_host_cursor cfed, cons; - struct smc_cdc_tx_pend *pend; - struct smc_wr_buf *wr_buf; - int to_confirm, rc; + int to_confirm; smc_curs_write(&cons, smc_curs_read(&conn->local_tx_ctrl.cons, conn), @@ -466,10 +472,8 @@ void smc_tx_consumer_update(struct smc_connection *conn) ((to_confirm > conn->rmbe_update_limit) && ((to_confirm > (conn->rmbe_size / 2)) || conn->local_rx_ctrl.prod_flags.write_blocked))) { - rc = smc_cdc_get_free_slot(conn, &wr_buf, &pend); - if (!rc) - rc = smc_cdc_msg_send(conn, wr_buf, pend); - if (rc < 0) { + if ((smc_cdc_get_slot_and_msg_send(conn) < 0) && + conn->alert_token_local) { /* connection healthy */ schedule_delayed_work(&conn->tx_work, SMC_TX_WORK_DELAY); return; diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c index de4537f66832..1b8af23e6e2b 100644 --- a/net/smc/smc_wr.c +++ b/net/smc/smc_wr.c @@ -122,6 +122,7 @@ static void smc_wr_tx_tasklet_fn(unsigned long data) again: polled++; do { + memset(&wc, 0, sizeof(wc)); rc = ib_poll_cq(dev->roce_cq_send, SMC_WR_MAX_POLL_CQE, wc); if (polled == 1) { ib_req_notify_cq(dev->roce_cq_send, @@ -173,9 +174,9 @@ int smc_wr_tx_get_free_slot(struct smc_link *link, struct smc_wr_tx_pend_priv **wr_pend_priv) { struct smc_wr_tx_pend *wr_pend; + u32 idx = link->wr_tx_cnt; struct ib_send_wr *wr_ib; u64 wr_id; - u32 idx; int rc; *wr_buf = NULL; @@ -185,21 +186,20 @@ int smc_wr_tx_get_free_slot(struct smc_link *link, if (rc) return rc; } else { - rc = wait_event_interruptible_timeout( + struct smc_link_group *lgr; + + lgr = container_of(link, struct smc_link_group, + lnk[SMC_SINGLE_LINK]); + rc = wait_event_timeout( link->wr_tx_wait, + list_empty(&lgr->list) || /* lgr terminated */ (smc_wr_tx_get_free_slot_index(link, &idx) != -EBUSY), SMC_WR_TX_WAIT_FREE_SLOT_TIME); if (!rc) { /* timeout - terminate connections */ - struct smc_link_group *lgr; - - lgr = container_of(link, struct smc_link_group, - lnk[SMC_SINGLE_LINK]); smc_lgr_terminate(lgr); return -EPIPE; } - if (rc == -ERESTARTSYS) - return -EINTR; if (idx == link->wr_tx_cnt) return -EPIPE; } @@ -249,8 +249,14 @@ int smc_wr_tx_send(struct smc_link *link, struct smc_wr_tx_pend_priv *priv) pend = container_of(priv, struct smc_wr_tx_pend, priv); rc = ib_post_send(link->roce_qp, &link->wr_tx_ibs[pend->idx], &failed_wr); - if (rc) + if (rc) { + struct smc_link_group *lgr = + container_of(link, struct smc_link_group, + lnk[SMC_SINGLE_LINK]); + smc_wr_tx_put_slot(link, priv); + smc_lgr_terminate(lgr); + } return rc; } @@ -300,18 +306,18 @@ int smc_wr_reg_send(struct smc_link *link, struct ib_mr *mr) return rc; } -void smc_wr_tx_dismiss_slots(struct smc_link *link, u8 wr_rx_hdr_type, +void smc_wr_tx_dismiss_slots(struct smc_link *link, u8 wr_tx_hdr_type, smc_wr_tx_filter filter, smc_wr_tx_dismisser dismisser, unsigned long data) { struct smc_wr_tx_pend_priv *tx_pend; - struct smc_wr_rx_hdr *wr_rx; + struct smc_wr_rx_hdr *wr_tx; int i; for_each_set_bit(i, link->wr_tx_mask, link->wr_tx_cnt) { - wr_rx = (struct smc_wr_rx_hdr *)&link->wr_rx_bufs[i]; - if (wr_rx->type != wr_rx_hdr_type) + wr_tx = (struct smc_wr_rx_hdr *)&link->wr_tx_bufs[i]; + if (wr_tx->type != wr_tx_hdr_type) continue; tx_pend = &link->wr_tx_pends[i].priv; if (filter(tx_pend, data)) @@ -319,24 +325,6 @@ void smc_wr_tx_dismiss_slots(struct smc_link *link, u8 wr_rx_hdr_type, } } -bool smc_wr_tx_has_pending(struct smc_link *link, u8 wr_rx_hdr_type, - smc_wr_tx_filter filter, unsigned long data) -{ - struct smc_wr_tx_pend_priv *tx_pend; - struct smc_wr_rx_hdr *wr_rx; - int i; - - for_each_set_bit(i, link->wr_tx_mask, link->wr_tx_cnt) { - wr_rx = (struct smc_wr_rx_hdr *)&link->wr_rx_bufs[i]; - if (wr_rx->type != wr_rx_hdr_type) - continue; - tx_pend = &link->wr_tx_pends[i].priv; - if (filter(tx_pend, data)) - return true; - } - return false; -} - /****************************** receive queue ********************************/ int smc_wr_rx_register_handler(struct smc_wr_rx_handler *handler) diff --git a/net/smc/smc_wr.h b/net/smc/smc_wr.h index 2acf12b06063..ef0c3494c9cb 100644 --- a/net/smc/smc_wr.h +++ b/net/smc/smc_wr.h @@ -93,8 +93,6 @@ int smc_wr_tx_put_slot(struct smc_link *link, int smc_wr_tx_send(struct smc_link *link, struct smc_wr_tx_pend_priv *wr_pend_priv); void smc_wr_tx_cq_handler(struct ib_cq *ib_cq, void *cq_context); -bool smc_wr_tx_has_pending(struct smc_link *link, u8 wr_rx_hdr_type, - smc_wr_tx_filter filter, unsigned long data); void smc_wr_tx_dismiss_slots(struct smc_link *lnk, u8 wr_rx_hdr_type, smc_wr_tx_filter filter, smc_wr_tx_dismisser dismisser, diff --git a/net/socket.c b/net/socket.c index 2f378449bc1b..a93c99b518ca 100644 --- a/net/socket.c +++ b/net/socket.c @@ -163,12 +163,6 @@ static DEFINE_SPINLOCK(net_family_lock); static const struct net_proto_family __rcu *net_families[NPROTO] __read_mostly; /* - * Statistics counters of the socket lists - */ - -static DEFINE_PER_CPU(int, sockets_in_use); - -/* * Support routines. * Move socket addresses back and forth across the kernel/user * divide and look after the messy bits. @@ -580,7 +574,6 @@ struct socket *sock_alloc(void) inode->i_gid = current_fsgid(); inode->i_op = &sockfs_inode_ops; - this_cpu_add(sockets_in_use, 1); return sock; } EXPORT_SYMBOL(sock_alloc); @@ -607,7 +600,6 @@ void sock_release(struct socket *sock) if (rcu_dereference_protected(sock->wq, 1)->fasync_list) pr_err("%s: fasync list not empty!\n", __func__); - this_cpu_sub(sockets_in_use, 1); if (!sock->file) { iput(SOCK_INODE(sock)); return; @@ -969,9 +961,28 @@ static long sock_do_ioctl(struct net *net, struct socket *sock, * If this ioctl is unknown try to hand it down * to the NIC driver. */ - if (err == -ENOIOCTLCMD) - err = dev_ioctl(net, cmd, argp); + if (err != -ENOIOCTLCMD) + return err; + if (cmd == SIOCGIFCONF) { + struct ifconf ifc; + if (copy_from_user(&ifc, argp, sizeof(struct ifconf))) + return -EFAULT; + rtnl_lock(); + err = dev_ifconf(net, &ifc, sizeof(struct ifreq)); + rtnl_unlock(); + if (!err && copy_to_user(argp, &ifc, sizeof(struct ifconf))) + err = -EFAULT; + } else { + struct ifreq ifr; + bool need_copyout; + if (copy_from_user(&ifr, argp, sizeof(struct ifreq))) + return -EFAULT; + err = dev_ioctl(net, cmd, &ifr, &need_copyout); + if (!err && need_copyout) + if (copy_to_user(argp, &ifr, sizeof(struct ifreq))) + return -EFAULT; + } return err; } @@ -996,12 +1007,19 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg) sock = file->private_data; sk = sock->sk; net = sock_net(sk); - if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) { - err = dev_ioctl(net, cmd, argp); + if (unlikely(cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))) { + struct ifreq ifr; + bool need_copyout; + if (copy_from_user(&ifr, argp, sizeof(struct ifreq))) + return -EFAULT; + err = dev_ioctl(net, cmd, &ifr, &need_copyout); + if (!err && need_copyout) + if (copy_to_user(argp, &ifr, sizeof(struct ifreq))) + return -EFAULT; } else #ifdef CONFIG_WEXT_CORE if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) { - err = dev_ioctl(net, cmd, argp); + err = wext_handle_ioctl(net, cmd, argp); } else #endif switch (cmd) { @@ -2621,29 +2639,11 @@ out_fs: core_initcall(sock_init); /* early initcall */ -static int __init jit_init(void) -{ -#ifdef CONFIG_BPF_JIT_ALWAYS_ON - bpf_jit_enable = 1; -#endif - return 0; -} -pure_initcall(jit_init); - #ifdef CONFIG_PROC_FS void socket_seq_show(struct seq_file *seq) { - int cpu; - int counter = 0; - - for_each_possible_cpu(cpu) - counter += per_cpu(sockets_in_use, cpu); - - /* It can be negative, by the way. 8) */ - if (counter < 0) - counter = 0; - - seq_printf(seq, "sockets: used %d\n", counter); + seq_printf(seq, "sockets: used %d\n", + sock_inuse_get(seq->private)); } #endif /* CONFIG_PROC_FS */ @@ -2680,89 +2680,25 @@ static int do_siocgstampns(struct net *net, struct socket *sock, return err; } -static int dev_ifname32(struct net *net, struct compat_ifreq __user *uifr32) -{ - struct ifreq __user *uifr; - int err; - - uifr = compat_alloc_user_space(sizeof(struct ifreq)); - if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq))) - return -EFAULT; - - err = dev_ioctl(net, SIOCGIFNAME, uifr); - if (err) - return err; - - if (copy_in_user(uifr32, uifr, sizeof(struct compat_ifreq))) - return -EFAULT; - - return 0; -} - -static int dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32) +static int compat_dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32) { struct compat_ifconf ifc32; struct ifconf ifc; - struct ifconf __user *uifc; - struct compat_ifreq __user *ifr32; - struct ifreq __user *ifr; - unsigned int i, j; int err; if (copy_from_user(&ifc32, uifc32, sizeof(struct compat_ifconf))) return -EFAULT; - memset(&ifc, 0, sizeof(ifc)); - if (ifc32.ifcbuf == 0) { - ifc32.ifc_len = 0; - ifc.ifc_len = 0; - ifc.ifc_req = NULL; - uifc = compat_alloc_user_space(sizeof(struct ifconf)); - } else { - size_t len = ((ifc32.ifc_len / sizeof(struct compat_ifreq)) + 1) * - sizeof(struct ifreq); - uifc = compat_alloc_user_space(sizeof(struct ifconf) + len); - ifc.ifc_len = len; - ifr = ifc.ifc_req = (void __user *)(uifc + 1); - ifr32 = compat_ptr(ifc32.ifcbuf); - for (i = 0; i < ifc32.ifc_len; i += sizeof(struct compat_ifreq)) { - if (copy_in_user(ifr, ifr32, sizeof(struct compat_ifreq))) - return -EFAULT; - ifr++; - ifr32++; - } - } - if (copy_to_user(uifc, &ifc, sizeof(struct ifconf))) - return -EFAULT; + ifc.ifc_len = ifc32.ifc_len; + ifc.ifc_req = compat_ptr(ifc32.ifcbuf); - err = dev_ioctl(net, SIOCGIFCONF, uifc); + rtnl_lock(); + err = dev_ifconf(net, &ifc, sizeof(struct compat_ifreq)); + rtnl_unlock(); if (err) return err; - if (copy_from_user(&ifc, uifc, sizeof(struct ifconf))) - return -EFAULT; - - ifr = ifc.ifc_req; - ifr32 = compat_ptr(ifc32.ifcbuf); - for (i = 0, j = 0; - i + sizeof(struct compat_ifreq) <= ifc32.ifc_len && j < ifc.ifc_len; - i += sizeof(struct compat_ifreq), j += sizeof(struct ifreq)) { - if (copy_in_user(ifr32, ifr, sizeof(struct compat_ifreq))) - return -EFAULT; - ifr32++; - ifr++; - } - - if (ifc32.ifcbuf == 0) { - /* Translate from 64-bit structure multiple to - * a 32-bit one. - */ - i = ifc.ifc_len; - i = ((i / sizeof(struct ifreq)) * sizeof(struct compat_ifreq)); - ifc32.ifc_len = i; - } else { - ifc32.ifc_len = i; - } + ifc32.ifc_len = ifc.ifc_len; if (copy_to_user(uifc32, &ifc32, sizeof(struct compat_ifconf))) return -EFAULT; @@ -2773,9 +2709,9 @@ static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32) { struct compat_ethtool_rxnfc __user *compat_rxnfc; bool convert_in = false, convert_out = false; - size_t buf_size = ALIGN(sizeof(struct ifreq), 8); - struct ethtool_rxnfc __user *rxnfc; - struct ifreq __user *ifr; + size_t buf_size = 0; + struct ethtool_rxnfc __user *rxnfc = NULL; + struct ifreq ifr; u32 rule_cnt = 0, actual_rule_cnt; u32 ethcmd; u32 data; @@ -2812,18 +2748,14 @@ static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32) case ETHTOOL_SRXCLSRLDEL: buf_size += sizeof(struct ethtool_rxnfc); convert_in = true; + rxnfc = compat_alloc_user_space(buf_size); break; } - ifr = compat_alloc_user_space(buf_size); - rxnfc = (void __user *)ifr + ALIGN(sizeof(struct ifreq), 8); - - if (copy_in_user(&ifr->ifr_name, &ifr32->ifr_name, IFNAMSIZ)) + if (copy_from_user(&ifr.ifr_name, &ifr32->ifr_name, IFNAMSIZ)) return -EFAULT; - if (put_user(convert_in ? rxnfc : compat_ptr(data), - &ifr->ifr_ifru.ifru_data)) - return -EFAULT; + ifr.ifr_data = convert_in ? rxnfc : (void __user *)compat_rxnfc; if (convert_in) { /* We expect there to be holes between fs.m_ext and @@ -2851,7 +2783,7 @@ static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32) return -EFAULT; } - ret = dev_ioctl(net, SIOCETHTOOL, ifr); + ret = dev_ioctl(net, SIOCETHTOOL, &ifr, NULL); if (ret) return ret; @@ -2892,113 +2824,43 @@ static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32) static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32) { - void __user *uptr; compat_uptr_t uptr32; - struct ifreq __user *uifr; + struct ifreq ifr; + void __user *saved; + int err; - uifr = compat_alloc_user_space(sizeof(*uifr)); - if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq))) + if (copy_from_user(&ifr, uifr32, sizeof(struct compat_ifreq))) return -EFAULT; if (get_user(uptr32, &uifr32->ifr_settings.ifs_ifsu)) return -EFAULT; - uptr = compat_ptr(uptr32); - - if (put_user(uptr, &uifr->ifr_settings.ifs_ifsu.raw_hdlc)) - return -EFAULT; - - return dev_ioctl(net, SIOCWANDEV, uifr); -} - -static int bond_ioctl(struct net *net, unsigned int cmd, - struct compat_ifreq __user *ifr32) -{ - struct ifreq kifr; - mm_segment_t old_fs; - int err; + saved = ifr.ifr_settings.ifs_ifsu.raw_hdlc; + ifr.ifr_settings.ifs_ifsu.raw_hdlc = compat_ptr(uptr32); - switch (cmd) { - case SIOCBONDENSLAVE: - case SIOCBONDRELEASE: - case SIOCBONDSETHWADDR: - case SIOCBONDCHANGEACTIVE: - if (copy_from_user(&kifr, ifr32, sizeof(struct compat_ifreq))) - return -EFAULT; - - old_fs = get_fs(); - set_fs(KERNEL_DS); - err = dev_ioctl(net, cmd, - (struct ifreq __user __force *) &kifr); - set_fs(old_fs); - - return err; - default: - return -ENOIOCTLCMD; + err = dev_ioctl(net, SIOCWANDEV, &ifr, NULL); + if (!err) { + ifr.ifr_settings.ifs_ifsu.raw_hdlc = saved; + if (copy_to_user(uifr32, &ifr, sizeof(struct compat_ifreq))) + err = -EFAULT; } + return err; } /* Handle ioctls that use ifreq::ifr_data and just need struct ifreq converted */ static int compat_ifr_data_ioctl(struct net *net, unsigned int cmd, struct compat_ifreq __user *u_ifreq32) { - struct ifreq __user *u_ifreq64; - char tmp_buf[IFNAMSIZ]; - void __user *data64; + struct ifreq ifreq; u32 data32; - if (copy_from_user(&tmp_buf[0], &(u_ifreq32->ifr_ifrn.ifrn_name[0]), - IFNAMSIZ)) + if (copy_from_user(ifreq.ifr_name, u_ifreq32->ifr_name, IFNAMSIZ)) return -EFAULT; - if (get_user(data32, &u_ifreq32->ifr_ifru.ifru_data)) + if (get_user(data32, &u_ifreq32->ifr_data)) return -EFAULT; - data64 = compat_ptr(data32); + ifreq.ifr_data = compat_ptr(data32); - u_ifreq64 = compat_alloc_user_space(sizeof(*u_ifreq64)); - - if (copy_to_user(&u_ifreq64->ifr_ifrn.ifrn_name[0], &tmp_buf[0], - IFNAMSIZ)) - return -EFAULT; - if (put_user(data64, &u_ifreq64->ifr_ifru.ifru_data)) - return -EFAULT; - - return dev_ioctl(net, cmd, u_ifreq64); -} - -static int dev_ifsioc(struct net *net, struct socket *sock, - unsigned int cmd, struct compat_ifreq __user *uifr32) -{ - struct ifreq __user *uifr; - int err; - - uifr = compat_alloc_user_space(sizeof(*uifr)); - if (copy_in_user(uifr, uifr32, sizeof(*uifr32))) - return -EFAULT; - - err = sock_do_ioctl(net, sock, cmd, (unsigned long)uifr); - - if (!err) { - switch (cmd) { - case SIOCGIFFLAGS: - case SIOCGIFMETRIC: - case SIOCGIFMTU: - case SIOCGIFMEM: - case SIOCGIFHWADDR: - case SIOCGIFINDEX: - case SIOCGIFADDR: - case SIOCGIFBRDADDR: - case SIOCGIFDSTADDR: - case SIOCGIFNETMASK: - case SIOCGIFPFLAGS: - case SIOCGIFTXQLEN: - case SIOCGMIIPHY: - case SIOCGMIIREG: - if (copy_in_user(uifr32, uifr, sizeof(*uifr32))) - err = -EFAULT; - break; - } - } - return err; + return dev_ioctl(net, cmd, &ifreq, NULL); } static int compat_sioc_ifmap(struct net *net, unsigned int cmd, @@ -3006,7 +2868,6 @@ static int compat_sioc_ifmap(struct net *net, unsigned int cmd, { struct ifreq ifr; struct compat_ifmap __user *uifmap32; - mm_segment_t old_fs; int err; uifmap32 = &uifr32->ifr_ifru.ifru_map; @@ -3020,10 +2881,7 @@ static int compat_sioc_ifmap(struct net *net, unsigned int cmd, if (err) return -EFAULT; - old_fs = get_fs(); - set_fs(KERNEL_DS); - err = dev_ioctl(net, cmd, (void __user __force *)&ifr); - set_fs(old_fs); + err = dev_ioctl(net, cmd, &ifr, NULL); if (cmd == SIOCGIFMAP && !err) { err = copy_to_user(uifr32, &ifr, sizeof(ifr.ifr_name)); @@ -3156,10 +3014,8 @@ static int compat_sock_ioctl_trans(struct file *file, struct socket *sock, case SIOCSIFBR: case SIOCGIFBR: return old_bridge_ioctl(argp); - case SIOCGIFNAME: - return dev_ifname32(net, argp); case SIOCGIFCONF: - return dev_ifconf(net, argp); + return compat_dev_ifconf(net, argp); case SIOCETHTOOL: return ethtool_ioctl(net, argp); case SIOCWANDEV: @@ -3167,11 +3023,6 @@ static int compat_sock_ioctl_trans(struct file *file, struct socket *sock, case SIOCGIFMAP: case SIOCSIFMAP: return compat_sioc_ifmap(net, cmd, argp); - case SIOCBONDENSLAVE: - case SIOCBONDRELEASE: - case SIOCBONDSETHWADDR: - case SIOCBONDCHANGEACTIVE: - return bond_ioctl(net, cmd, argp); case SIOCADDRT: case SIOCDELRT: return routing_ioctl(net, sock, cmd, argp); @@ -3231,12 +3082,15 @@ static int compat_sock_ioctl_trans(struct file *file, struct socket *sock, case SIOCGMIIPHY: case SIOCGMIIREG: case SIOCSMIIREG: - return dev_ifsioc(net, sock, cmd, argp); - case SIOCSARP: case SIOCGARP: case SIOCDARP: case SIOCATMARK: + case SIOCBONDENSLAVE: + case SIOCBONDRELEASE: + case SIOCBONDSETHWADDR: + case SIOCBONDCHANGEACTIVE: + case SIOCGIFNAME: return sock_do_ioctl(net, sock, cmd, arg); } @@ -3391,19 +3245,6 @@ int kernel_sendpage_locked(struct sock *sk, struct page *page, int offset, } EXPORT_SYMBOL(kernel_sendpage_locked); -int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg) -{ - mm_segment_t oldfs = get_fs(); - int err; - - set_fs(KERNEL_DS); - err = sock->ops->ioctl(sock, cmd, arg); - set_fs(oldfs); - - return err; -} -EXPORT_SYMBOL(kernel_sock_ioctl); - int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how) { return sock->ops->shutdown(sock, how); diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index 329325bd553e..37892b3909af 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -1,7 +1,7 @@ /* * net/tipc/bcast.c: TIPC broadcast code * - * Copyright (c) 2004-2006, 2014-2016, Ericsson AB + * Copyright (c) 2004-2006, 2014-2017, Ericsson AB * Copyright (c) 2004, Intel Corporation. * Copyright (c) 2005, 2010-2011, Wind River Systems * All rights reserved. @@ -42,8 +42,8 @@ #include "link.h" #include "name_table.h" -#define BCLINK_WIN_DEFAULT 50 /* bcast link window size (default) */ -#define BCLINK_WIN_MIN 32 /* bcast minimum link window size */ +#define BCLINK_WIN_DEFAULT 50 /* bcast link window size (default) */ +#define BCLINK_WIN_MIN 32 /* bcast minimum link window size */ const char tipc_bclink_name[] = "broadcast-link"; @@ -74,6 +74,10 @@ static struct tipc_bc_base *tipc_bc_base(struct net *net) return tipc_net(net)->bcbase; } +/* tipc_bcast_get_mtu(): -get the MTU currently used by broadcast link + * Note: the MTU is decremented to give room for a tunnel header, in + * case the message needs to be sent as replicast + */ int tipc_bcast_get_mtu(struct net *net) { return tipc_link_mtu(tipc_bc_sndlink(net)) - INT_H_SIZE; @@ -515,7 +519,7 @@ int tipc_bcast_init(struct net *net) spin_lock_init(&tipc_net(net)->bclock); if (!tipc_link_bc_create(net, 0, 0, - U16_MAX, + FB_MTU, BCLINK_WIN_DEFAULT, 0, &bb->inputq, diff --git a/net/tipc/core.h b/net/tipc/core.h index 964342689f2c..20b21af2ff14 100644 --- a/net/tipc/core.h +++ b/net/tipc/core.h @@ -49,7 +49,6 @@ #include <linux/uaccess.h> #include <linux/interrupt.h> #include <linux/atomic.h> -#include <asm/hardirq.h> #include <linux/netdevice.h> #include <linux/in.h> #include <linux/list.h> diff --git a/net/tipc/group.c b/net/tipc/group.c index 5f4ffae807ee..122162a31816 100644 --- a/net/tipc/group.c +++ b/net/tipc/group.c @@ -49,8 +49,6 @@ #define ADV_ACTIVE (ADV_UNIT * 12) enum mbr_state { - MBR_QUARANTINED, - MBR_DISCOVERED, MBR_JOINING, MBR_PUBLISHED, MBR_JOINED, @@ -64,8 +62,7 @@ enum mbr_state { struct tipc_member { struct rb_node tree_node; struct list_head list; - struct list_head congested; - struct sk_buff *event_msg; + struct list_head small_win; struct sk_buff_head deferredq; struct tipc_group *group; u32 node; @@ -77,21 +74,18 @@ struct tipc_member { u16 bc_rcv_nxt; u16 bc_syncpt; u16 bc_acked; - bool usr_pending; }; struct tipc_group { struct rb_root members; - struct list_head congested; + struct list_head small_win; struct list_head pending; struct list_head active; - struct list_head reclaiming; struct tipc_nlist dests; struct net *net; int subid; u32 type; u32 instance; - u32 domain; u32 scope; u32 portid; u16 member_cnt; @@ -99,6 +93,7 @@ struct tipc_group { u16 max_active; u16 bc_snd_nxt; u16 bc_ackers; + bool *open; bool loopback; bool events; }; @@ -106,6 +101,16 @@ struct tipc_group { static void tipc_group_proto_xmit(struct tipc_group *grp, struct tipc_member *m, int mtyp, struct sk_buff_head *xmitq); +static void tipc_group_open(struct tipc_member *m, bool *wakeup) +{ + *wakeup = false; + if (list_empty(&m->small_win)) + return; + list_del_init(&m->small_win); + *m->group->open = true; + *wakeup = true; +} + static void tipc_group_decr_active(struct tipc_group *grp, struct tipc_member *m) { @@ -137,14 +142,14 @@ u16 tipc_group_bc_snd_nxt(struct tipc_group *grp) return grp->bc_snd_nxt; } -static bool tipc_group_is_enabled(struct tipc_member *m) +static bool tipc_group_is_receiver(struct tipc_member *m) { - return m->state != MBR_QUARANTINED && m->state != MBR_LEAVING; + return m && m->state != MBR_JOINING && m->state != MBR_LEAVING; } -static bool tipc_group_is_receiver(struct tipc_member *m) +static bool tipc_group_is_sender(struct tipc_member *m) { - return m && m->state >= MBR_JOINED; + return m && m->state != MBR_JOINING && m->state != MBR_PUBLISHED; } u32 tipc_group_exclude(struct tipc_group *grp) @@ -160,8 +165,11 @@ int tipc_group_size(struct tipc_group *grp) } struct tipc_group *tipc_group_create(struct net *net, u32 portid, - struct tipc_group_req *mreq) + struct tipc_group_req *mreq, + bool *group_is_open) { + u32 filter = TIPC_SUB_PORTS | TIPC_SUB_NO_STATUS; + bool global = mreq->scope != TIPC_NODE_SCOPE; struct tipc_group *grp; u32 type = mreq->type; @@ -169,25 +177,41 @@ struct tipc_group *tipc_group_create(struct net *net, u32 portid, if (!grp) return NULL; tipc_nlist_init(&grp->dests, tipc_own_addr(net)); - INIT_LIST_HEAD(&grp->congested); + INIT_LIST_HEAD(&grp->small_win); INIT_LIST_HEAD(&grp->active); INIT_LIST_HEAD(&grp->pending); - INIT_LIST_HEAD(&grp->reclaiming); grp->members = RB_ROOT; grp->net = net; grp->portid = portid; - grp->domain = addr_domain(net, mreq->scope); grp->type = type; grp->instance = mreq->instance; grp->scope = mreq->scope; grp->loopback = mreq->flags & TIPC_GROUP_LOOPBACK; grp->events = mreq->flags & TIPC_GROUP_MEMBER_EVTS; - if (tipc_topsrv_kern_subscr(net, portid, type, 0, ~0, &grp->subid)) + grp->open = group_is_open; + filter |= global ? TIPC_SUB_CLUSTER_SCOPE : TIPC_SUB_NODE_SCOPE; + if (tipc_topsrv_kern_subscr(net, portid, type, 0, ~0, + filter, &grp->subid)) return grp; kfree(grp); return NULL; } +void tipc_group_join(struct net *net, struct tipc_group *grp, int *sk_rcvbuf) +{ + struct rb_root *tree = &grp->members; + struct tipc_member *m, *tmp; + struct sk_buff_head xmitq; + + skb_queue_head_init(&xmitq); + rbtree_postorder_for_each_entry_safe(m, tmp, tree, tree_node) { + tipc_group_proto_xmit(grp, m, GRP_JOIN_MSG, &xmitq); + tipc_group_update_member(m, 0); + } + tipc_node_distr_xmit(net, &xmitq); + *sk_rcvbuf = tipc_group_rcvbuf_limit(grp); +} + void tipc_group_delete(struct net *net, struct tipc_group *grp) { struct rb_root *tree = &grp->members; @@ -233,7 +257,7 @@ static struct tipc_member *tipc_group_find_dest(struct tipc_group *grp, struct tipc_member *m; m = tipc_group_find_member(grp, node, port); - if (m && tipc_group_is_enabled(m)) + if (m && tipc_group_is_receiver(m)) return m; return NULL; } @@ -278,7 +302,7 @@ static void tipc_group_add_to_tree(struct tipc_group *grp, static struct tipc_member *tipc_group_create_member(struct tipc_group *grp, u32 node, u32 port, - int state) + u32 instance, int state) { struct tipc_member *m; @@ -286,11 +310,12 @@ static struct tipc_member *tipc_group_create_member(struct tipc_group *grp, if (!m) return NULL; INIT_LIST_HEAD(&m->list); - INIT_LIST_HEAD(&m->congested); + INIT_LIST_HEAD(&m->small_win); __skb_queue_head_init(&m->deferredq); m->group = grp; m->node = node; m->port = port; + m->instance = instance; m->bc_acked = grp->bc_snd_nxt - 1; grp->member_cnt++; tipc_group_add_to_tree(grp, m); @@ -299,9 +324,10 @@ static struct tipc_member *tipc_group_create_member(struct tipc_group *grp, return m; } -void tipc_group_add_member(struct tipc_group *grp, u32 node, u32 port) +void tipc_group_add_member(struct tipc_group *grp, u32 node, + u32 port, u32 instance) { - tipc_group_create_member(grp, node, port, MBR_DISCOVERED); + tipc_group_create_member(grp, node, port, instance, MBR_PUBLISHED); } static void tipc_group_delete_member(struct tipc_group *grp, @@ -315,7 +341,7 @@ static void tipc_group_delete_member(struct tipc_group *grp, grp->bc_ackers--; list_del_init(&m->list); - list_del_init(&m->congested); + list_del_init(&m->small_win); tipc_group_decr_active(grp, m); /* If last member on a node, remove node from dest list */ @@ -344,7 +370,7 @@ void tipc_group_update_member(struct tipc_member *m, int len) struct tipc_group *grp = m->group; struct tipc_member *_m, *tmp; - if (!tipc_group_is_enabled(m)) + if (!tipc_group_is_receiver(m)) return; m->window -= len; @@ -352,16 +378,14 @@ void tipc_group_update_member(struct tipc_member *m, int len) if (m->window >= ADV_IDLE) return; - list_del_init(&m->congested); + list_del_init(&m->small_win); - /* Sort member into congested members' list */ - list_for_each_entry_safe(_m, tmp, &grp->congested, congested) { - if (m->window > _m->window) - continue; - list_add_tail(&m->congested, &_m->congested); - return; + /* Sort member into small_window members' list */ + list_for_each_entry_safe(_m, tmp, &grp->small_win, small_win) { + if (_m->window > m->window) + break; } - list_add_tail(&m->congested, &grp->congested); + list_add_tail(&m->small_win, &_m->small_win); } void tipc_group_update_bc_members(struct tipc_group *grp, int len, bool ack) @@ -373,7 +397,7 @@ void tipc_group_update_bc_members(struct tipc_group *grp, int len, bool ack) for (n = rb_first(&grp->members); n; n = rb_next(n)) { m = container_of(n, struct tipc_member, tree_node); - if (tipc_group_is_enabled(m)) { + if (tipc_group_is_receiver(m)) { tipc_group_update_member(m, len); m->bc_acked = prev; ackers++; @@ -394,20 +418,20 @@ bool tipc_group_cong(struct tipc_group *grp, u32 dnode, u32 dport, int adv, state; m = tipc_group_find_dest(grp, dnode, dport); - *mbr = m; - if (!m) + if (!tipc_group_is_receiver(m)) { + *mbr = NULL; return false; - if (m->usr_pending) - return true; + } + *mbr = m; + if (m->window >= len) return false; - m->usr_pending = true; + + *grp->open = false; /* If not fully advertised, do it now to prevent mutual blocking */ adv = m->advertised; state = m->state; - if (state < MBR_JOINED) - return true; if (state == MBR_JOINED && adv == ADV_IDLE) return true; if (state == MBR_ACTIVE && adv == ADV_ACTIVE) @@ -425,13 +449,14 @@ bool tipc_group_bc_cong(struct tipc_group *grp, int len) struct tipc_member *m = NULL; /* If prev bcast was replicast, reject until all receivers have acked */ - if (grp->bc_ackers) + if (grp->bc_ackers) { + *grp->open = false; return true; - - if (list_empty(&grp->congested)) + } + if (list_empty(&grp->small_win)) return false; - m = list_first_entry(&grp->congested, struct tipc_member, congested); + m = list_first_entry(&grp->small_win, struct tipc_member, small_win); if (m->window >= len) return false; @@ -486,7 +511,7 @@ void tipc_group_filter_msg(struct tipc_group *grp, struct sk_buff_head *inputq, goto drop; m = tipc_group_find_member(grp, node, port); - if (!tipc_group_is_receiver(m)) + if (!tipc_group_is_sender(m)) goto drop; if (less(msg_grp_bc_seqno(hdr), m->bc_rcv_nxt)) @@ -573,24 +598,34 @@ void tipc_group_update_rcv_win(struct tipc_group *grp, int blks, u32 node, switch (m->state) { case MBR_JOINED: - /* Reclaim advertised space from least active member */ - if (!list_empty(active) && active_cnt >= reclaim_limit) { + /* First, decide if member can go active */ + if (active_cnt <= max_active) { + m->state = MBR_ACTIVE; + list_add_tail(&m->list, active); + grp->active_cnt++; + tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, xmitq); + } else { + m->state = MBR_PENDING; + list_add_tail(&m->list, &grp->pending); + } + + if (active_cnt < reclaim_limit) + break; + + /* Reclaim from oldest active member, if possible */ + if (!list_empty(active)) { rm = list_first_entry(active, struct tipc_member, list); rm->state = MBR_RECLAIMING; - list_move_tail(&rm->list, &grp->reclaiming); + list_del_init(&rm->list); tipc_group_proto_xmit(grp, rm, GRP_RECLAIM_MSG, xmitq); - } - /* If max active, become pending and wait for reclaimed space */ - if (active_cnt >= max_active) { - m->state = MBR_PENDING; - list_add_tail(&m->list, &grp->pending); break; } - /* Otherwise become active */ - m->state = MBR_ACTIVE; - list_add_tail(&m->list, &grp->active); - grp->active_cnt++; - /* Fall through */ + /* Nobody to reclaim from; - revert oldest pending to JOINED */ + pm = list_first_entry(&grp->pending, struct tipc_member, list); + list_del_init(&pm->list); + pm->state = MBR_JOINED; + tipc_group_proto_xmit(grp, pm, GRP_ADV_MSG, xmitq); + break; case MBR_ACTIVE: if (!list_is_last(&m->list, &grp->active)) list_move_tail(&m->list, &grp->active); @@ -602,12 +637,12 @@ void tipc_group_update_rcv_win(struct tipc_group *grp, int blks, u32 node, if (m->advertised > ADV_IDLE) break; m->state = MBR_JOINED; + grp->active_cnt--; if (m->advertised < ADV_IDLE) { pr_warn_ratelimited("Rcv unexpected msg after REMIT\n"); tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, xmitq); } - grp->active_cnt--; - list_del_init(&m->list); + if (list_empty(&grp->pending)) return; @@ -619,7 +654,6 @@ void tipc_group_update_rcv_win(struct tipc_group *grp, int blks, u32 node, tipc_group_proto_xmit(grp, pm, GRP_ADV_MSG, xmitq); break; case MBR_RECLAIMING: - case MBR_DISCOVERED: case MBR_JOINING: case MBR_LEAVING: default: @@ -627,6 +661,40 @@ void tipc_group_update_rcv_win(struct tipc_group *grp, int blks, u32 node, } } +static void tipc_group_create_event(struct tipc_group *grp, + struct tipc_member *m, + u32 event, u16 seqno, + struct sk_buff_head *inputq) +{ u32 dnode = tipc_own_addr(grp->net); + struct tipc_event evt; + struct sk_buff *skb; + struct tipc_msg *hdr; + + evt.event = event; + evt.found_lower = m->instance; + evt.found_upper = m->instance; + evt.port.ref = m->port; + evt.port.node = m->node; + evt.s.seq.type = grp->type; + evt.s.seq.lower = m->instance; + evt.s.seq.upper = m->instance; + + skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, TIPC_GRP_MEMBER_EVT, + GROUP_H_SIZE, sizeof(evt), dnode, m->node, + grp->portid, m->port, 0); + if (!skb) + return; + + hdr = buf_msg(skb); + msg_set_nametype(hdr, grp->type); + msg_set_grp_evt(hdr, event); + msg_set_dest_droppable(hdr, true); + msg_set_grp_bc_seqno(hdr, seqno); + memcpy(msg_data(hdr), &evt, sizeof(evt)); + TIPC_SKB_CB(skb)->orig_member = m->instance; + __skb_queue_tail(inputq, skb); +} + static void tipc_group_proto_xmit(struct tipc_group *grp, struct tipc_member *m, int mtyp, struct sk_buff_head *xmitq) { @@ -672,83 +740,73 @@ void tipc_group_proto_rcv(struct tipc_group *grp, bool *usr_wakeup, u32 node = msg_orignode(hdr); u32 port = msg_origport(hdr); struct tipc_member *m, *pm; - struct tipc_msg *ehdr; u16 remitted, in_flight; if (!grp) return; + if (grp->scope == TIPC_NODE_SCOPE && node != tipc_own_addr(grp->net)) + return; + m = tipc_group_find_member(grp, node, port); switch (msg_type(hdr)) { case GRP_JOIN_MSG: if (!m) m = tipc_group_create_member(grp, node, port, - MBR_QUARANTINED); + 0, MBR_JOINING); if (!m) return; m->bc_syncpt = msg_grp_bc_syncpt(hdr); m->bc_rcv_nxt = m->bc_syncpt; m->window += msg_adv_win(hdr); - /* Wait until PUBLISH event is received */ - if (m->state == MBR_DISCOVERED) { - m->state = MBR_JOINING; - } else if (m->state == MBR_PUBLISHED) { - m->state = MBR_JOINED; - *usr_wakeup = true; - m->usr_pending = false; - tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, xmitq); - ehdr = buf_msg(m->event_msg); - msg_set_grp_bc_seqno(ehdr, m->bc_syncpt); - __skb_queue_tail(inputq, m->event_msg); - } - list_del_init(&m->congested); + /* Wait until PUBLISH event is received if necessary */ + if (m->state != MBR_PUBLISHED) + return; + + /* Member can be taken into service */ + m->state = MBR_JOINED; + tipc_group_open(m, usr_wakeup); tipc_group_update_member(m, 0); + tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, xmitq); + tipc_group_create_event(grp, m, TIPC_PUBLISHED, + m->bc_syncpt, inputq); return; case GRP_LEAVE_MSG: if (!m) return; m->bc_syncpt = msg_grp_bc_syncpt(hdr); list_del_init(&m->list); - list_del_init(&m->congested); - *usr_wakeup = true; - - /* Wait until WITHDRAW event is received */ - if (m->state != MBR_LEAVING) { - tipc_group_decr_active(grp, m); - m->state = MBR_LEAVING; - return; - } - /* Otherwise deliver already received WITHDRAW event */ - ehdr = buf_msg(m->event_msg); - msg_set_grp_bc_seqno(ehdr, m->bc_syncpt); - __skb_queue_tail(inputq, m->event_msg); + tipc_group_open(m, usr_wakeup); + tipc_group_decr_active(grp, m); + m->state = MBR_LEAVING; + tipc_group_create_event(grp, m, TIPC_WITHDRAWN, + m->bc_syncpt, inputq); return; case GRP_ADV_MSG: if (!m) return; m->window += msg_adv_win(hdr); - *usr_wakeup = m->usr_pending; - m->usr_pending = false; - list_del_init(&m->congested); + tipc_group_open(m, usr_wakeup); return; case GRP_ACK_MSG: if (!m) return; m->bc_acked = msg_grp_bc_acked(hdr); if (--grp->bc_ackers) - break; + return; + list_del_init(&m->small_win); + *m->group->open = true; *usr_wakeup = true; - m->usr_pending = false; + tipc_group_update_member(m, 0); return; case GRP_RECLAIM_MSG: if (!m) return; - *usr_wakeup = m->usr_pending; - m->usr_pending = false; tipc_group_proto_xmit(grp, m, GRP_REMIT_MSG, xmitq); m->window = ADV_IDLE; + tipc_group_open(m, usr_wakeup); return; case GRP_REMIT_MSG: if (!m || m->state != MBR_RECLAIMING) @@ -763,18 +821,14 @@ void tipc_group_proto_rcv(struct tipc_group *grp, bool *usr_wakeup, m->advertised = ADV_IDLE + in_flight; return; } - /* All messages preceding the REMIT have been read */ - if (m->advertised <= remitted) { - m->state = MBR_JOINED; - in_flight = 0; - } - /* ..and the REMIT overtaken by more messages => re-advertise */ + /* This should never happen */ if (m->advertised < remitted) - tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, xmitq); + pr_warn_ratelimited("Unexpected REMIT msg\n"); - m->advertised = ADV_IDLE + in_flight; + /* All messages preceding the REMIT have been read */ + m->state = MBR_JOINED; grp->active_cnt--; - list_del_init(&m->list); + m->advertised = ADV_IDLE; /* Set oldest pending member to active and advertise */ if (list_empty(&grp->pending)) @@ -796,11 +850,10 @@ void tipc_group_proto_rcv(struct tipc_group *grp, bool *usr_wakeup, void tipc_group_member_evt(struct tipc_group *grp, bool *usr_wakeup, int *sk_rcvbuf, - struct sk_buff *skb, + struct tipc_msg *hdr, struct sk_buff_head *inputq, struct sk_buff_head *xmitq) { - struct tipc_msg *hdr = buf_msg(skb); struct tipc_event *evt = (void *)msg_data(hdr); u32 instance = evt->found_lower; u32 node = evt->port.node; @@ -808,89 +861,59 @@ void tipc_group_member_evt(struct tipc_group *grp, int event = evt->event; struct tipc_member *m; struct net *net; - bool node_up; u32 self; if (!grp) - goto drop; + return; net = grp->net; self = tipc_own_addr(net); if (!grp->loopback && node == self && port == grp->portid) - goto drop; - - /* Convert message before delivery to user */ - msg_set_hdr_sz(hdr, GROUP_H_SIZE); - msg_set_user(hdr, TIPC_CRITICAL_IMPORTANCE); - msg_set_type(hdr, TIPC_GRP_MEMBER_EVT); - msg_set_origport(hdr, port); - msg_set_orignode(hdr, node); - msg_set_nametype(hdr, grp->type); - msg_set_grp_evt(hdr, event); + return; m = tipc_group_find_member(grp, node, port); - if (event == TIPC_PUBLISHED) { - if (!m) - m = tipc_group_create_member(grp, node, port, - MBR_DISCOVERED); - if (!m) - goto drop; - - /* Hold back event if JOIN message not yet received */ - if (m->state == MBR_DISCOVERED) { - m->event_msg = skb; - m->state = MBR_PUBLISHED; - } else { - msg_set_grp_bc_seqno(hdr, m->bc_syncpt); - __skb_queue_tail(inputq, skb); - m->state = MBR_JOINED; - *usr_wakeup = true; - m->usr_pending = false; + switch (event) { + case TIPC_PUBLISHED: + /* Send and wait for arrival of JOIN message if necessary */ + if (!m) { + m = tipc_group_create_member(grp, node, port, instance, + MBR_PUBLISHED); + if (!m) + break; + tipc_group_update_member(m, 0); + tipc_group_proto_xmit(grp, m, GRP_JOIN_MSG, xmitq); + break; } + + if (m->state != MBR_JOINING) + break; + + /* Member can be taken into service */ m->instance = instance; - TIPC_SKB_CB(skb)->orig_member = m->instance; + m->state = MBR_JOINED; + tipc_group_open(m, usr_wakeup); + tipc_group_update_member(m, 0); tipc_group_proto_xmit(grp, m, GRP_JOIN_MSG, xmitq); - if (m->window < ADV_IDLE) - tipc_group_update_member(m, 0); - else - list_del_init(&m->congested); - } else if (event == TIPC_WITHDRAWN) { + tipc_group_create_event(grp, m, TIPC_PUBLISHED, + m->bc_syncpt, inputq); + break; + case TIPC_WITHDRAWN: if (!m) - goto drop; - - TIPC_SKB_CB(skb)->orig_member = m->instance; + break; - *usr_wakeup = true; - m->usr_pending = false; - node_up = tipc_node_is_up(net, node); - m->event_msg = NULL; - - if (node_up) { - /* Hold back event if a LEAVE msg should be expected */ - if (m->state != MBR_LEAVING) { - m->event_msg = skb; - tipc_group_decr_active(grp, m); - m->state = MBR_LEAVING; - } else { - msg_set_grp_bc_seqno(hdr, m->bc_syncpt); - __skb_queue_tail(inputq, skb); - } - } else { - if (m->state != MBR_LEAVING) { - tipc_group_decr_active(grp, m); - m->state = MBR_LEAVING; - msg_set_grp_bc_seqno(hdr, m->bc_rcv_nxt); - } else { - msg_set_grp_bc_seqno(hdr, m->bc_syncpt); - } - __skb_queue_tail(inputq, skb); - } + tipc_group_decr_active(grp, m); + m->state = MBR_LEAVING; list_del_init(&m->list); - list_del_init(&m->congested); + tipc_group_open(m, usr_wakeup); + + /* Only send event if no LEAVE message can be expected */ + if (!tipc_node_is_up(net, node)) + tipc_group_create_event(grp, m, TIPC_WITHDRAWN, + m->bc_rcv_nxt, inputq); + break; + default: + break; } *sk_rcvbuf = tipc_group_rcvbuf_limit(grp); - return; -drop: - kfree_skb(skb); } diff --git a/net/tipc/group.h b/net/tipc/group.h index d525e1cd7de5..5996af6e9f1d 100644 --- a/net/tipc/group.h +++ b/net/tipc/group.h @@ -43,9 +43,12 @@ struct tipc_member; struct tipc_msg; struct tipc_group *tipc_group_create(struct net *net, u32 portid, - struct tipc_group_req *mreq); + struct tipc_group_req *mreq, + bool *group_is_open); +void tipc_group_join(struct net *net, struct tipc_group *grp, int *sk_rcv_buf); void tipc_group_delete(struct net *net, struct tipc_group *grp); -void tipc_group_add_member(struct tipc_group *grp, u32 node, u32 port); +void tipc_group_add_member(struct tipc_group *grp, u32 node, + u32 port, u32 instance); struct tipc_nlist *tipc_group_dests(struct tipc_group *grp); void tipc_group_self(struct tipc_group *grp, struct tipc_name_seq *seq, int *scope); @@ -54,7 +57,7 @@ void tipc_group_filter_msg(struct tipc_group *grp, struct sk_buff_head *inputq, struct sk_buff_head *xmitq); void tipc_group_member_evt(struct tipc_group *grp, bool *wakeup, - int *sk_rcvbuf, struct sk_buff *skb, + int *sk_rcvbuf, struct tipc_msg *hdr, struct sk_buff_head *inputq, struct sk_buff_head *xmitq); void tipc_group_proto_rcv(struct tipc_group *grp, bool *wakeup, @@ -69,5 +72,4 @@ void tipc_group_update_rcv_win(struct tipc_group *grp, int blks, u32 node, u32 port, struct sk_buff_head *xmitq); u16 tipc_group_bc_snd_nxt(struct tipc_group *grp); void tipc_group_update_member(struct tipc_member *m, int len); -int tipc_group_size(struct tipc_group *grp); #endif diff --git a/net/tipc/link.c b/net/tipc/link.c index 6bce0b1117bd..2d6b2aed30e0 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -483,7 +483,7 @@ bool tipc_link_create(struct net *net, char *if_name, int bearer_id, /** * tipc_link_bc_create - create new link to be used for broadcast * @n: pointer to associated node - * @mtu: mtu to be used + * @mtu: mtu to be used initially if no peers * @window: send window to be used * @inputq: queue to put messages ready for delivery * @namedq: queue to put binding table update messages ready for delivery diff --git a/net/tipc/msg.c b/net/tipc/msg.c index b0d07b35909d..55d8ba92291d 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -251,20 +251,23 @@ bool tipc_msg_validate(struct sk_buff **_skb) * @pktmax: Max packet size that can be used * @list: Buffer or chain of buffers to be returned to caller * + * Note that the recursive call we are making here is safe, since it can + * logically go only one further level down. + * * Returns message data size or errno: -ENOMEM, -EFAULT */ -int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m, - int offset, int dsz, int pktmax, struct sk_buff_head *list) +int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m, int offset, + int dsz, int pktmax, struct sk_buff_head *list) { int mhsz = msg_hdr_sz(mhdr); + struct tipc_msg pkthdr; int msz = mhsz + dsz; - int pktno = 1; - int pktsz; int pktrem = pktmax; - int drem = dsz; - struct tipc_msg pkthdr; struct sk_buff *skb; + int drem = dsz; + int pktno = 1; char *pktpos; + int pktsz; int rc; msg_set_size(mhdr, msz); @@ -272,8 +275,18 @@ int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m, /* No fragmentation needed? */ if (likely(msz <= pktmax)) { skb = tipc_buf_acquire(msz, GFP_KERNEL); - if (unlikely(!skb)) + + /* Fall back to smaller MTU if node local message */ + if (unlikely(!skb)) { + if (pktmax != MAX_MSG_SIZE) + return -ENOMEM; + rc = tipc_msg_build(mhdr, m, offset, dsz, FB_MTU, list); + if (rc != dsz) + return rc; + if (tipc_msg_assemble(list)) + return dsz; return -ENOMEM; + } skb_orphan(skb); __skb_queue_tail(list, skb); skb_copy_to_linear_data(skb, mhdr, mhsz); @@ -589,6 +602,30 @@ bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, int *err) return true; } +/* tipc_msg_assemble() - assemble chain of fragments into one message + */ +bool tipc_msg_assemble(struct sk_buff_head *list) +{ + struct sk_buff *skb, *tmp = NULL; + + if (skb_queue_len(list) == 1) + return true; + + while ((skb = __skb_dequeue(list))) { + skb->next = NULL; + if (tipc_buf_append(&tmp, &skb)) { + __skb_queue_tail(list, skb); + return true; + } + if (!tmp) + break; + } + __skb_queue_purge(list); + __skb_queue_head_init(list); + pr_warn("Failed do assemble buffer\n"); + return false; +} + /* tipc_msg_reassemble() - clone a buffer chain of fragments and * reassemble the clones into one message */ diff --git a/net/tipc/msg.h b/net/tipc/msg.h index 3e4384c222f7..b4ba1b4f9ae7 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -98,7 +98,7 @@ struct plist; #define MAX_H_SIZE 60 /* Largest possible TIPC header size */ #define MAX_MSG_SIZE (MAX_H_SIZE + TIPC_MAX_USER_MSG_SIZE) - +#define FB_MTU 3744 #define TIPC_MEDIA_INFO_OFFSET 5 struct tipc_skb_cb { @@ -943,6 +943,7 @@ bool tipc_msg_extract(struct sk_buff *skb, struct sk_buff **iskb, int *pos); int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m, int offset, int dsz, int mtu, struct sk_buff_head *list); bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, int *err); +bool tipc_msg_assemble(struct sk_buff_head *list); bool tipc_msg_reassemble(struct sk_buff_head *list, struct sk_buff_head *rcvq); bool tipc_msg_pskb_copy(u32 dst, struct sk_buff_head *msg, struct sk_buff_head *cpy); diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c index b3829bcf63c7..ed0457cc99d6 100644 --- a/net/tipc/name_table.c +++ b/net/tipc/name_table.c @@ -328,7 +328,8 @@ static struct publication *tipc_nameseq_insert_publ(struct net *net, list_for_each_entry_safe(s, st, &nseq->subscriptions, nameseq_list) { tipc_subscrp_report_overlap(s, publ->lower, publ->upper, TIPC_PUBLISHED, publ->ref, - publ->node, created_subseq); + publ->node, publ->scope, + created_subseq); } return publ; } @@ -398,19 +399,21 @@ found: list_for_each_entry_safe(s, st, &nseq->subscriptions, nameseq_list) { tipc_subscrp_report_overlap(s, publ->lower, publ->upper, TIPC_WITHDRAWN, publ->ref, - publ->node, removed_subseq); + publ->node, publ->scope, + removed_subseq); } return publ; } /** - * tipc_nameseq_subscribe - attach a subscription, and issue - * the prescribed number of events if there is any sub- + * tipc_nameseq_subscribe - attach a subscription, and optionally + * issue the prescribed number of events if there is any sub- * sequence overlapping with the requested sequence */ static void tipc_nameseq_subscribe(struct name_seq *nseq, - struct tipc_subscription *s) + struct tipc_subscription *s, + bool status) { struct sub_seq *sseq = nseq->sseqs; struct tipc_name_seq ns; @@ -420,7 +423,7 @@ static void tipc_nameseq_subscribe(struct name_seq *nseq, tipc_subscrp_get(s); list_add(&s->nameseq_list, &nseq->subscriptions); - if (!sseq) + if (!status || !sseq) return; while (sseq != &nseq->sseqs[nseq->first_free]) { @@ -434,6 +437,7 @@ static void tipc_nameseq_subscribe(struct name_seq *nseq, sseq->upper, TIPC_PUBLISHED, crs->ref, crs->node, + crs->scope, must_report); must_report = 0; } @@ -597,7 +601,7 @@ not_found: return ref; } -bool tipc_nametbl_lookup(struct net *net, u32 type, u32 instance, u32 domain, +bool tipc_nametbl_lookup(struct net *net, u32 type, u32 instance, u32 scope, struct list_head *dsts, int *dstcnt, u32 exclude, bool all) { @@ -607,9 +611,6 @@ bool tipc_nametbl_lookup(struct net *net, u32 type, u32 instance, u32 domain, struct name_seq *seq; struct sub_seq *sseq; - if (!tipc_in_scope(domain, self)) - return false; - *dstcnt = 0; rcu_read_lock(); seq = nametbl_find_seq(net, type); @@ -620,7 +621,7 @@ bool tipc_nametbl_lookup(struct net *net, u32 type, u32 instance, u32 domain, if (likely(sseq)) { info = sseq->info; list_for_each_entry(publ, &info->zone_list, zone_list) { - if (!tipc_in_scope(domain, publ->node)) + if (publ->scope != scope) continue; if (publ->ref == exclude && publ->node == self) continue; @@ -638,13 +639,14 @@ exit: return !list_empty(dsts); } -int tipc_nametbl_mc_translate(struct net *net, u32 type, u32 lower, u32 upper, - u32 limit, struct list_head *dports) +int tipc_nametbl_mc_lookup(struct net *net, u32 type, u32 lower, u32 upper, + u32 scope, bool exact, struct list_head *dports) { - struct name_seq *seq; - struct sub_seq *sseq; struct sub_seq *sseq_stop; struct name_info *info; + struct publication *p; + struct name_seq *seq; + struct sub_seq *sseq; int res = 0; rcu_read_lock(); @@ -656,15 +658,12 @@ int tipc_nametbl_mc_translate(struct net *net, u32 type, u32 lower, u32 upper, sseq = seq->sseqs + nameseq_locate_subseq(seq, lower); sseq_stop = seq->sseqs + seq->first_free; for (; sseq != sseq_stop; sseq++) { - struct publication *publ; - if (sseq->lower > upper) break; - info = sseq->info; - list_for_each_entry(publ, &info->node_list, node_list) { - if (publ->scope <= limit) - tipc_dest_push(dports, 0, publ->ref); + list_for_each_entry(p, &info->node_list, node_list) { + if (p->scope == scope || (!exact && p->scope < scope)) + tipc_dest_push(dports, 0, p->ref); } if (info->cluster_list_size != info->node_list_size) @@ -681,8 +680,7 @@ exit: * - Determines if any node local ports overlap */ void tipc_nametbl_lookup_dst_nodes(struct net *net, u32 type, u32 lower, - u32 upper, u32 domain, - struct tipc_nlist *nodes) + u32 upper, struct tipc_nlist *nodes) { struct sub_seq *sseq, *stop; struct publication *publ; @@ -700,8 +698,7 @@ void tipc_nametbl_lookup_dst_nodes(struct net *net, u32 type, u32 lower, for (; sseq != stop && sseq->lower <= upper; sseq++) { info = sseq->info; list_for_each_entry(publ, &info->zone_list, zone_list) { - if (tipc_in_scope(domain, publ->node)) - tipc_nlist_add(nodes, publ->node); + tipc_nlist_add(nodes, publ->node); } } spin_unlock_bh(&seq->lock); @@ -712,7 +709,7 @@ exit: /* tipc_nametbl_build_group - build list of communication group members */ void tipc_nametbl_build_group(struct net *net, struct tipc_group *grp, - u32 type, u32 domain) + u32 type, u32 scope) { struct sub_seq *sseq, *stop; struct name_info *info; @@ -730,9 +727,9 @@ void tipc_nametbl_build_group(struct net *net, struct tipc_group *grp, for (; sseq != stop; sseq++) { info = sseq->info; list_for_each_entry(p, &info->zone_list, zone_list) { - if (!tipc_in_scope(domain, p->node)) + if (p->scope != scope) continue; - tipc_group_add_member(grp, p->node, p->ref); + tipc_group_add_member(grp, p->node, p->ref, p->lower); } } spin_unlock_bh(&seq->lock); @@ -811,7 +808,7 @@ int tipc_nametbl_withdraw(struct net *net, u32 type, u32 lower, u32 ref, /** * tipc_nametbl_subscribe - add a subscription object to the name table */ -void tipc_nametbl_subscribe(struct tipc_subscription *s) +void tipc_nametbl_subscribe(struct tipc_subscription *s, bool status) { struct tipc_net *tn = net_generic(s->net, tipc_net_id); u32 type = tipc_subscrp_convert_seq_type(s->evt.s.seq.type, s->swap); @@ -825,7 +822,7 @@ void tipc_nametbl_subscribe(struct tipc_subscription *s) seq = tipc_nameseq_create(type, &tn->nametbl->seq_hlist[index]); if (seq) { spin_lock_bh(&seq->lock); - tipc_nameseq_subscribe(seq, s); + tipc_nameseq_subscribe(seq, s, status); spin_unlock_bh(&seq->lock); } else { tipc_subscrp_convert_seq(&s->evt.s.seq, s->swap, &ns); diff --git a/net/tipc/name_table.h b/net/tipc/name_table.h index 71926e429446..f56e7cb3d436 100644 --- a/net/tipc/name_table.h +++ b/net/tipc/name_table.h @@ -100,13 +100,12 @@ struct name_table { int tipc_nl_name_table_dump(struct sk_buff *skb, struct netlink_callback *cb); u32 tipc_nametbl_translate(struct net *net, u32 type, u32 instance, u32 *node); -int tipc_nametbl_mc_translate(struct net *net, u32 type, u32 lower, u32 upper, - u32 limit, struct list_head *dports); +int tipc_nametbl_mc_lookup(struct net *net, u32 type, u32 lower, u32 upper, + u32 scope, bool exact, struct list_head *dports); void tipc_nametbl_build_group(struct net *net, struct tipc_group *grp, u32 type, u32 domain); void tipc_nametbl_lookup_dst_nodes(struct net *net, u32 type, u32 lower, - u32 upper, u32 domain, - struct tipc_nlist *nodes); + u32 upper, struct tipc_nlist *nodes); bool tipc_nametbl_lookup(struct net *net, u32 type, u32 instance, u32 domain, struct list_head *dsts, int *dstcnt, u32 exclude, bool all); @@ -121,7 +120,7 @@ struct publication *tipc_nametbl_insert_publ(struct net *net, u32 type, struct publication *tipc_nametbl_remove_publ(struct net *net, u32 type, u32 lower, u32 node, u32 ref, u32 key); -void tipc_nametbl_subscribe(struct tipc_subscription *s); +void tipc_nametbl_subscribe(struct tipc_subscription *s, bool status); void tipc_nametbl_unsubscribe(struct tipc_subscription *s); int tipc_nametbl_init(struct net *net); void tipc_nametbl_stop(struct net *net); diff --git a/net/tipc/server.c b/net/tipc/server.c index 78a292a84afc..df0c563c90cd 100644 --- a/net/tipc/server.c +++ b/net/tipc/server.c @@ -132,10 +132,11 @@ static struct tipc_conn *tipc_conn_lookup(struct tipc_server *s, int conid) spin_lock_bh(&s->idr_lock); con = idr_find(&s->conn_idr, conid); - if (con && test_bit(CF_CONNECTED, &con->flags)) - conn_get(con); - else - con = NULL; + if (con) { + if (!test_bit(CF_CONNECTED, &con->flags) || + !kref_get_unless_zero(&con->kref)) + con = NULL; + } spin_unlock_bh(&s->idr_lock); return con; } @@ -183,35 +184,28 @@ static void tipc_register_callbacks(struct socket *sock, struct tipc_conn *con) write_unlock_bh(&sk->sk_callback_lock); } -static void tipc_unregister_callbacks(struct tipc_conn *con) -{ - struct sock *sk = con->sock->sk; - - write_lock_bh(&sk->sk_callback_lock); - sk->sk_user_data = NULL; - write_unlock_bh(&sk->sk_callback_lock); -} - static void tipc_close_conn(struct tipc_conn *con) { struct tipc_server *s = con->server; + struct sock *sk = con->sock->sk; + bool disconnect = false; - if (test_and_clear_bit(CF_CONNECTED, &con->flags)) { - if (con->sock) - tipc_unregister_callbacks(con); - + write_lock_bh(&sk->sk_callback_lock); + disconnect = test_and_clear_bit(CF_CONNECTED, &con->flags); + if (disconnect) { + sk->sk_user_data = NULL; if (con->conid) s->tipc_conn_release(con->conid, con->usr_data); - - /* We shouldn't flush pending works as we may be in the - * thread. In fact the races with pending rx/tx work structs - * are harmless for us here as we have already deleted this - * connection from server connection list. - */ - if (con->sock) - kernel_sock_shutdown(con->sock, SHUT_RDWR); - conn_put(con); } + write_unlock_bh(&sk->sk_callback_lock); + + /* Handle concurrent calls from sending and receiving threads */ + if (!disconnect) + return; + + /* Don't flush pending works, -just let them expire */ + kernel_sock_shutdown(con->sock, SHUT_RDWR); + conn_put(con); } static struct tipc_conn *tipc_alloc_conn(struct tipc_server *s) @@ -248,9 +242,10 @@ static struct tipc_conn *tipc_alloc_conn(struct tipc_server *s) static int tipc_receive_from_sock(struct tipc_conn *con) { - struct msghdr msg = {}; struct tipc_server *s = con->server; + struct sock *sk = con->sock->sk; struct sockaddr_tipc addr; + struct msghdr msg = {}; struct kvec iov; void *buf; int ret; @@ -271,12 +266,15 @@ static int tipc_receive_from_sock(struct tipc_conn *con) goto out_close; } - s->tipc_conn_recvmsg(sock_net(con->sock->sk), con->conid, &addr, - con->usr_data, buf, ret); - + read_lock_bh(&sk->sk_callback_lock); + if (test_bit(CF_CONNECTED, &con->flags)) + ret = s->tipc_conn_recvmsg(sock_net(con->sock->sk), con->conid, + &addr, con->usr_data, buf, ret); + read_unlock_bh(&sk->sk_callback_lock); kmem_cache_free(s->rcvbuf_cache, buf); - - return 0; + if (ret < 0) + tipc_conn_terminate(s, con->conid); + return ret; out_close: if (ret != -EWOULDBLOCK) @@ -489,8 +487,8 @@ void tipc_conn_terminate(struct tipc_server *s, int conid) } } -bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type, - u32 lower, u32 upper, int *conid) +bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type, u32 lower, + u32 upper, u32 filter, int *conid) { struct tipc_subscriber *scbr; struct tipc_subscr sub; @@ -501,7 +499,7 @@ bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type, sub.seq.lower = lower; sub.seq.upper = upper; sub.timeout = TIPC_WAIT_FOREVER; - sub.filter = TIPC_SUB_PORTS; + sub.filter = filter; *(u32 *)&sub.usr_handle = port; con = tipc_alloc_conn(tipc_topsrv(net)); @@ -525,11 +523,17 @@ bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type, void tipc_topsrv_kern_unsubscr(struct net *net, int conid) { struct tipc_conn *con; + struct tipc_server *srv; con = tipc_conn_lookup(tipc_topsrv(net), conid); if (!con) return; - tipc_close_conn(con); + + test_and_clear_bit(CF_CONNECTED, &con->flags); + srv = con->server; + if (con->conid) + srv->tipc_conn_release(con->conid, con->usr_data); + conn_put(con); conn_put(con); } diff --git a/net/tipc/server.h b/net/tipc/server.h index 2113c9192633..64df7513cd70 100644 --- a/net/tipc/server.h +++ b/net/tipc/server.h @@ -41,6 +41,9 @@ #include <net/net_namespace.h> #define TIPC_SERVER_NAME_LEN 32 +#define TIPC_SUB_CLUSTER_SCOPE 0x20 +#define TIPC_SUB_NODE_SCOPE 0x40 +#define TIPC_SUB_NO_STATUS 0x80 /** * struct tipc_server - TIPC server structure @@ -71,9 +74,9 @@ struct tipc_server { int max_rcvbuf_size; void *(*tipc_conn_new)(int conid); void (*tipc_conn_release)(int conid, void *usr_data); - void (*tipc_conn_recvmsg)(struct net *net, int conid, - struct sockaddr_tipc *addr, void *usr_data, - void *buf, size_t len); + int (*tipc_conn_recvmsg)(struct net *net, int conid, + struct sockaddr_tipc *addr, void *usr_data, + void *buf, size_t len); struct sockaddr_tipc *saddr; char name[TIPC_SERVER_NAME_LEN]; int imp; @@ -83,8 +86,8 @@ struct tipc_server { int tipc_conn_sendmsg(struct tipc_server *s, int conid, struct sockaddr_tipc *addr, void *data, size_t len); -bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type, - u32 lower, u32 upper, int *conid); +bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type, u32 lower, + u32 upper, u32 filter, int *conid); void tipc_topsrv_kern_unsubscr(struct net *net, int conid); /** diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 2aa46e8cd8fe..163f3a547501 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -116,6 +116,7 @@ struct tipc_sock { struct tipc_mc_method mc_method; struct rcu_head rcu; struct tipc_group *group; + bool group_is_open; }; static int tipc_sk_backlog_rcv(struct sock *sk, struct sk_buff *skb); @@ -715,7 +716,6 @@ static __poll_t tipc_poll(struct file *file, struct socket *sock, { struct sock *sk = sock->sk; struct tipc_sock *tsk = tipc_sk(sk); - struct tipc_group *grp = tsk->group; __poll_t revents = 0; sock_poll_wait(file, sk_sleep(sk), wait); @@ -736,9 +736,8 @@ static __poll_t tipc_poll(struct file *file, struct socket *sock, revents |= POLLIN | POLLRDNORM; break; case TIPC_OPEN: - if (!grp || tipc_group_size(grp)) - if (!tsk->cong_link_cnt) - revents |= POLLOUT; + if (tsk->group_is_open && !tsk->cong_link_cnt) + revents |= POLLOUT; if (!tipc_sk_type_connectionless(sk)) break; if (skb_queue_empty(&sk->sk_receive_queue)) @@ -772,7 +771,6 @@ static int tipc_sendmcast(struct socket *sock, struct tipc_name_seq *seq, struct net *net = sock_net(sk); int mtu = tipc_bcast_get_mtu(net); struct tipc_mc_method *method = &tsk->mc_method; - u32 domain = addr_domain(net, TIPC_CLUSTER_SCOPE); struct sk_buff_head pkts; struct tipc_nlist dsts; int rc; @@ -788,7 +786,7 @@ static int tipc_sendmcast(struct socket *sock, struct tipc_name_seq *seq, /* Lookup destination nodes */ tipc_nlist_init(&dsts, tipc_own_addr(net)); tipc_nametbl_lookup_dst_nodes(net, seq->type, seq->lower, - seq->upper, domain, &dsts); + seq->upper, &dsts); if (!dsts.local && !dsts.remote) return -EHOSTUNREACH; @@ -928,21 +926,22 @@ static int tipc_send_group_anycast(struct socket *sock, struct msghdr *m, struct list_head *cong_links = &tsk->cong_links; int blks = tsk_blocks(GROUP_H_SIZE + dlen); struct tipc_group *grp = tsk->group; + struct tipc_msg *hdr = &tsk->phdr; struct tipc_member *first = NULL; struct tipc_member *mbr = NULL; struct net *net = sock_net(sk); u32 node, port, exclude; - u32 type, inst, domain; struct list_head dsts; + u32 type, inst, scope; int lookups = 0; int dstcnt, rc; bool cong; INIT_LIST_HEAD(&dsts); - type = dest->addr.name.name.type; + type = msg_nametype(hdr); inst = dest->addr.name.name.instance; - domain = addr_domain(net, dest->scope); + scope = msg_lookup_scope(hdr); exclude = tipc_group_exclude(grp); while (++lookups < 4) { @@ -950,7 +949,7 @@ static int tipc_send_group_anycast(struct socket *sock, struct msghdr *m, /* Look for a non-congested destination member, if any */ while (1) { - if (!tipc_nametbl_lookup(net, type, inst, domain, &dsts, + if (!tipc_nametbl_lookup(net, type, inst, scope, &dsts, &dstcnt, exclude, false)) return -EHOSTUNREACH; tipc_dest_pop(&dsts, &node, &port); @@ -1079,22 +1078,23 @@ static int tipc_send_group_mcast(struct socket *sock, struct msghdr *m, { struct sock *sk = sock->sk; DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name); - struct tipc_name_seq *seq = &dest->addr.nameseq; struct tipc_sock *tsk = tipc_sk(sk); struct tipc_group *grp = tsk->group; + struct tipc_msg *hdr = &tsk->phdr; struct net *net = sock_net(sk); - u32 domain, exclude, dstcnt; + u32 type, inst, scope, exclude; struct list_head dsts; + u32 dstcnt; INIT_LIST_HEAD(&dsts); - if (seq->lower != seq->upper) - return -ENOTSUPP; - - domain = addr_domain(net, dest->scope); + type = msg_nametype(hdr); + inst = dest->addr.name.name.instance; + scope = msg_lookup_scope(hdr); exclude = tipc_group_exclude(grp); - if (!tipc_nametbl_lookup(net, seq->type, seq->lower, domain, - &dsts, &dstcnt, exclude, true)) + + if (!tipc_nametbl_lookup(net, type, inst, scope, &dsts, + &dstcnt, exclude, true)) return -EHOSTUNREACH; if (dstcnt == 1) { @@ -1116,24 +1116,29 @@ static int tipc_send_group_mcast(struct socket *sock, struct msghdr *m, void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq, struct sk_buff_head *inputq) { - u32 scope = TIPC_CLUSTER_SCOPE; u32 self = tipc_own_addr(net); + u32 type, lower, upper, scope; struct sk_buff *skb, *_skb; - u32 lower = 0, upper = ~0; - struct sk_buff_head tmpq; u32 portid, oport, onode; + struct sk_buff_head tmpq; struct list_head dports; - struct tipc_msg *msg; - int user, mtyp, hsz; + struct tipc_msg *hdr; + int user, mtyp, hlen; + bool exact; __skb_queue_head_init(&tmpq); INIT_LIST_HEAD(&dports); skb = tipc_skb_peek(arrvq, &inputq->lock); for (; skb; skb = tipc_skb_peek(arrvq, &inputq->lock)) { - msg = buf_msg(skb); - user = msg_user(msg); - mtyp = msg_type(msg); + hdr = buf_msg(skb); + user = msg_user(hdr); + mtyp = msg_type(hdr); + hlen = skb_headroom(skb) + msg_hdr_sz(hdr); + oport = msg_origport(hdr); + onode = msg_orignode(hdr); + type = msg_nametype(hdr); + if (mtyp == TIPC_GRP_UCAST_MSG || user == GROUP_PROTOCOL) { spin_lock_bh(&inputq->lock); if (skb_peek(arrvq) == skb) { @@ -1144,21 +1149,31 @@ void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq, spin_unlock_bh(&inputq->lock); continue; } - hsz = skb_headroom(skb) + msg_hdr_sz(msg); - oport = msg_origport(msg); - onode = msg_orignode(msg); - if (onode == self) - scope = TIPC_NODE_SCOPE; - - /* Create destination port list and message clones: */ - if (!msg_in_group(msg)) { - lower = msg_namelower(msg); - upper = msg_nameupper(msg); + + /* Group messages require exact scope match */ + if (msg_in_group(hdr)) { + lower = 0; + upper = ~0; + scope = msg_lookup_scope(hdr); + exact = true; + } else { + /* TIPC_NODE_SCOPE means "any scope" in this context */ + if (onode == self) + scope = TIPC_NODE_SCOPE; + else + scope = TIPC_CLUSTER_SCOPE; + exact = false; + lower = msg_namelower(hdr); + upper = msg_nameupper(hdr); } - tipc_nametbl_mc_translate(net, msg_nametype(msg), lower, upper, - scope, &dports); + + /* Create destination port list: */ + tipc_nametbl_mc_lookup(net, type, lower, upper, + scope, exact, &dports); + + /* Clone message per destination */ while (tipc_dest_pop(&dports, NULL, &portid)) { - _skb = __pskb_copy(skb, hsz, GFP_ATOMIC); + _skb = __pskb_copy(skb, hlen, GFP_ATOMIC); if (_skb) { msg_set_destport(buf_msg(_skb), portid); __skb_queue_tail(&tmpq, _skb); @@ -1933,8 +1948,7 @@ static void tipc_sk_proto_rcv(struct sock *sk, break; case TOP_SRV: tipc_group_member_evt(tsk->group, &wakeup, &sk->sk_rcvbuf, - skb, inputq, xmitq); - skb = NULL; + hdr, inputq, xmitq); break; default: break; @@ -2640,9 +2654,7 @@ void tipc_sk_reinit(struct net *net) rhashtable_walk_enter(&tn->sk_rht, &iter); do { - tsk = ERR_PTR(rhashtable_walk_start(&iter)); - if (IS_ERR(tsk)) - goto walk_stop; + rhashtable_walk_start(&iter); while ((tsk = rhashtable_walk_next(&iter)) && !IS_ERR(tsk)) { spin_lock_bh(&tsk->sk.sk_lock.slock); @@ -2651,7 +2663,7 @@ void tipc_sk_reinit(struct net *net) msg_set_orignode(msg, tn->own_addr); spin_unlock_bh(&tsk->sk.sk_lock.slock); } -walk_stop: + rhashtable_walk_stop(&iter); } while (tsk == ERR_PTR(-EAGAIN)); } @@ -2734,7 +2746,6 @@ void tipc_sk_rht_destroy(struct net *net) static int tipc_sk_join(struct tipc_sock *tsk, struct tipc_group_req *mreq) { struct net *net = sock_net(&tsk->sk); - u32 domain = addr_domain(net, mreq->scope); struct tipc_group *grp = tsk->group; struct tipc_msg *hdr = &tsk->phdr; struct tipc_name_seq seq; @@ -2742,9 +2753,11 @@ static int tipc_sk_join(struct tipc_sock *tsk, struct tipc_group_req *mreq) if (mreq->type < TIPC_RESERVED_TYPES) return -EACCES; + if (mreq->scope > TIPC_NODE_SCOPE) + return -EINVAL; if (grp) return -EACCES; - grp = tipc_group_create(net, tsk->portid, mreq); + grp = tipc_group_create(net, tsk->portid, mreq, &tsk->group_is_open); if (!grp) return -ENOMEM; tsk->group = grp; @@ -2754,16 +2767,17 @@ static int tipc_sk_join(struct tipc_sock *tsk, struct tipc_group_req *mreq) seq.type = mreq->type; seq.lower = mreq->instance; seq.upper = seq.lower; - tipc_nametbl_build_group(net, grp, mreq->type, domain); + tipc_nametbl_build_group(net, grp, mreq->type, mreq->scope); rc = tipc_sk_publish(tsk, mreq->scope, &seq); if (rc) { tipc_group_delete(net, grp); tsk->group = NULL; + return rc; } - - /* Eliminate any risk that a broadcast overtakes the sent JOIN */ + /* Eliminate any risk that a broadcast overtakes sent JOINs */ tsk->mc_method.rcast = true; tsk->mc_method.mandatory = true; + tipc_group_join(net, grp, &tsk->sk.sk_rcvbuf); return rc; } diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c index 251065dfd8df..68e26470c516 100644 --- a/net/tipc/subscr.c +++ b/net/tipc/subscr.c @@ -118,15 +118,19 @@ void tipc_subscrp_convert_seq(struct tipc_name_seq *in, int swap, void tipc_subscrp_report_overlap(struct tipc_subscription *sub, u32 found_lower, u32 found_upper, u32 event, u32 port_ref, - u32 node, int must) + u32 node, u32 scope, int must) { + u32 filter = htohl(sub->evt.s.filter, sub->swap); struct tipc_name_seq seq; tipc_subscrp_convert_seq(&sub->evt.s.seq, sub->swap, &seq); if (!tipc_subscrp_check_overlap(&seq, found_lower, found_upper)) return; - if (!must && - !(htohl(sub->evt.s.filter, sub->swap) & TIPC_SUB_PORTS)) + if (!must && !(filter & TIPC_SUB_PORTS)) + return; + if (filter & TIPC_SUB_CLUSTER_SCOPE && scope == TIPC_NODE_SCOPE) + return; + if (filter & TIPC_SUB_NODE_SCOPE && scope != TIPC_NODE_SCOPE) return; tipc_subscrp_send_event(sub, found_lower, found_upper, event, port_ref, @@ -285,21 +289,21 @@ static struct tipc_subscription *tipc_subscrp_create(struct net *net, return sub; } -static void tipc_subscrp_subscribe(struct net *net, struct tipc_subscr *s, - struct tipc_subscriber *subscriber, int swap) +static int tipc_subscrp_subscribe(struct net *net, struct tipc_subscr *s, + struct tipc_subscriber *subscriber, int swap, + bool status) { - struct tipc_net *tn = net_generic(net, tipc_net_id); struct tipc_subscription *sub = NULL; u32 timeout; sub = tipc_subscrp_create(net, s, swap); if (!sub) - return tipc_conn_terminate(tn->topsrv, subscriber->conid); + return -1; spin_lock_bh(&subscriber->lock); list_add(&sub->subscrp_list, &subscriber->subscrp_list); sub->subscriber = subscriber; - tipc_nametbl_subscribe(sub); + tipc_nametbl_subscribe(sub, status); tipc_subscrb_get(subscriber); spin_unlock_bh(&subscriber->lock); @@ -308,6 +312,7 @@ static void tipc_subscrp_subscribe(struct net *net, struct tipc_subscr *s, if (timeout != TIPC_WAIT_FOREVER) mod_timer(&sub->timer, jiffies + msecs_to_jiffies(timeout)); + return 0; } /* Handle one termination request for the subscriber */ @@ -317,12 +322,13 @@ static void tipc_subscrb_release_cb(int conid, void *usr_data) } /* Handle one request to create a new subscription for the subscriber */ -static void tipc_subscrb_rcv_cb(struct net *net, int conid, - struct sockaddr_tipc *addr, void *usr_data, - void *buf, size_t len) +static int tipc_subscrb_rcv_cb(struct net *net, int conid, + struct sockaddr_tipc *addr, void *usr_data, + void *buf, size_t len) { struct tipc_subscriber *subscriber = usr_data; struct tipc_subscr *s = (struct tipc_subscr *)buf; + bool status; int swap; /* Determine subscriber's endianness */ @@ -332,10 +338,11 @@ static void tipc_subscrb_rcv_cb(struct net *net, int conid, /* Detect & process a subscription cancellation request */ if (s->filter & htohl(TIPC_SUB_CANCEL, swap)) { s->filter &= ~htohl(TIPC_SUB_CANCEL, swap); - return tipc_subscrp_cancel(s, subscriber); + tipc_subscrp_cancel(s, subscriber); + return 0; } - - tipc_subscrp_subscribe(net, s, subscriber, swap); + status = !(s->filter & htohl(TIPC_SUB_NO_STATUS, swap)); + return tipc_subscrp_subscribe(net, s, subscriber, swap, status); } /* Handle one request to establish a new subscriber */ diff --git a/net/tipc/subscr.h b/net/tipc/subscr.h index ee52957dc952..f3edca775d9f 100644 --- a/net/tipc/subscr.h +++ b/net/tipc/subscr.h @@ -71,7 +71,7 @@ int tipc_subscrp_check_overlap(struct tipc_name_seq *seq, u32 found_lower, u32 found_upper); void tipc_subscrp_report_overlap(struct tipc_subscription *sub, u32 found_lower, u32 found_upper, u32 event, - u32 port_ref, u32 node, int must); + u32 port_ref, u32 node, u32 scope, int must); void tipc_subscrp_convert_seq(struct tipc_name_seq *in, int swap, struct tipc_name_seq *out); u32 tipc_subscrp_convert_seq_type(u32 type, int swap); diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 0a9b72fbd761..f26376e954ae 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -214,7 +214,11 @@ static int tls_do_encryption(struct tls_context *tls_ctx, aead_request_set_ad(aead_req, TLS_AAD_SPACE_SIZE); aead_request_set_crypt(aead_req, ctx->sg_aead_in, ctx->sg_aead_out, data_len, tls_ctx->iv); - rc = crypto_aead_encrypt(aead_req); + + aead_request_set_callback(aead_req, CRYPTO_TFM_REQ_MAY_BACKLOG, + crypto_req_done, &ctx->async_wait); + + rc = crypto_wait_req(crypto_aead_encrypt(aead_req), &ctx->async_wait); ctx->sg_encrypted_data[0].offset -= tls_ctx->prepend_size; ctx->sg_encrypted_data[0].length += tls_ctx->prepend_size; @@ -665,6 +669,8 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx) goto out; } + crypto_init_wait(&sw_ctx->async_wait); + ctx->priv_ctx = (struct tls_offload_context *)sw_ctx; crypto_info = &ctx->crypto_send; diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 6b7678df41e5..0214acbd6bff 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2870,7 +2870,6 @@ static int unix_seq_open(struct inode *inode, struct file *file) } static const struct file_operations unix_seq_fops = { - .owner = THIS_MODULE, .open = unix_seq_open, .read = seq_read, .llseek = seq_lseek, diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c index 413d4f4e6334..a1d10993d08a 100644 --- a/net/wireless/ibss.c +++ b/net/wireless/ibss.c @@ -126,6 +126,11 @@ static int __cfg80211_join_ibss(struct cfg80211_registered_device *rdev, wdev->ibss_fixed = params->channel_fixed; wdev->ibss_dfs_possible = params->userspace_handles_dfs; wdev->chandef = params->chandef; + if (connkeys) { + params->wep_keys = connkeys->params; + params->wep_tx_key = connkeys->def; + } + #ifdef CONFIG_CFG80211_WEXT wdev->wext.ibss.chandef = params->chandef; #endif diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index e7c64a8dce54..bbb9907bfa86 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -692,7 +692,7 @@ int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev, return rdev_mgmt_tx(rdev, wdev, params, cookie); } -bool cfg80211_rx_mgmt(struct wireless_dev *wdev, int freq, int sig_mbm, +bool cfg80211_rx_mgmt(struct wireless_dev *wdev, int freq, int sig_dbm, const u8 *buf, size_t len, u32 flags) { struct wiphy *wiphy = wdev->wiphy; @@ -708,7 +708,7 @@ bool cfg80211_rx_mgmt(struct wireless_dev *wdev, int freq, int sig_mbm, cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE); u16 stype; - trace_cfg80211_rx_mgmt(wdev, freq, sig_mbm); + trace_cfg80211_rx_mgmt(wdev, freq, sig_dbm); stype = (le16_to_cpu(mgmt->frame_control) & IEEE80211_FCTL_STYPE) >> 4; if (!(stypes->rx & BIT(stype))) { @@ -735,7 +735,7 @@ bool cfg80211_rx_mgmt(struct wireless_dev *wdev, int freq, int sig_mbm, /* Indicate the received Action frame to user space */ if (nl80211_send_mgmt(rdev, wdev, reg->nlportid, - freq, sig_mbm, + freq, sig_dbm, buf, len, flags, GFP_ATOMIC)) continue; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 542a4fc0a8d7..ab0c687d0c44 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -734,11 +734,12 @@ struct key_parse { bool def_uni, def_multi; }; -static int nl80211_parse_key_new(struct nlattr *key, struct key_parse *k) +static int nl80211_parse_key_new(struct genl_info *info, struct nlattr *key, + struct key_parse *k) { struct nlattr *tb[NL80211_KEY_MAX + 1]; int err = nla_parse_nested(tb, NL80211_KEY_MAX, key, - nl80211_key_policy, NULL); + nl80211_key_policy, info->extack); if (err) return err; @@ -771,7 +772,8 @@ static int nl80211_parse_key_new(struct nlattr *key, struct key_parse *k) if (tb[NL80211_KEY_TYPE]) { k->type = nla_get_u32(tb[NL80211_KEY_TYPE]); if (k->type < 0 || k->type >= NUM_NL80211_KEYTYPES) - return -EINVAL; + return genl_err_attr(info, -EINVAL, + tb[NL80211_KEY_TYPE]); } if (tb[NL80211_KEY_DEFAULT_TYPES]) { @@ -779,7 +781,8 @@ static int nl80211_parse_key_new(struct nlattr *key, struct key_parse *k) err = nla_parse_nested(kdt, NUM_NL80211_KEY_DEFAULT_TYPES - 1, tb[NL80211_KEY_DEFAULT_TYPES], - nl80211_key_default_policy, NULL); + nl80211_key_default_policy, + info->extack); if (err) return err; @@ -820,8 +823,10 @@ static int nl80211_parse_key_old(struct genl_info *info, struct key_parse *k) if (info->attrs[NL80211_ATTR_KEY_TYPE]) { k->type = nla_get_u32(info->attrs[NL80211_ATTR_KEY_TYPE]); - if (k->type < 0 || k->type >= NUM_NL80211_KEYTYPES) + if (k->type < 0 || k->type >= NUM_NL80211_KEYTYPES) { + GENL_SET_ERR_MSG(info, "key type out of range"); return -EINVAL; + } } if (info->attrs[NL80211_ATTR_KEY_DEFAULT_TYPES]) { @@ -850,31 +855,42 @@ static int nl80211_parse_key(struct genl_info *info, struct key_parse *k) k->type = -1; if (info->attrs[NL80211_ATTR_KEY]) - err = nl80211_parse_key_new(info->attrs[NL80211_ATTR_KEY], k); + err = nl80211_parse_key_new(info, info->attrs[NL80211_ATTR_KEY], k); else err = nl80211_parse_key_old(info, k); if (err) return err; - if (k->def && k->defmgmt) + if (k->def && k->defmgmt) { + GENL_SET_ERR_MSG(info, "key with def && defmgmt is invalid"); return -EINVAL; + } if (k->defmgmt) { - if (k->def_uni || !k->def_multi) + if (k->def_uni || !k->def_multi) { + GENL_SET_ERR_MSG(info, "defmgmt key must be mcast"); return -EINVAL; + } } if (k->idx != -1) { if (k->defmgmt) { - if (k->idx < 4 || k->idx > 5) + if (k->idx < 4 || k->idx > 5) { + GENL_SET_ERR_MSG(info, + "defmgmt key idx not 4 or 5"); return -EINVAL; + } } else if (k->def) { - if (k->idx < 0 || k->idx > 3) + if (k->idx < 0 || k->idx > 3) { + GENL_SET_ERR_MSG(info, "def key idx not 0-3"); return -EINVAL; + } } else { - if (k->idx < 0 || k->idx > 5) + if (k->idx < 0 || k->idx > 5) { + GENL_SET_ERR_MSG(info, "key idx not 0-5"); return -EINVAL; + } } } @@ -883,8 +899,9 @@ static int nl80211_parse_key(struct genl_info *info, struct key_parse *k) static struct cfg80211_cached_keys * nl80211_parse_connkeys(struct cfg80211_registered_device *rdev, - struct nlattr *keys, bool *no_ht) + struct genl_info *info, bool *no_ht) { + struct nlattr *keys = info->attrs[NL80211_ATTR_KEYS]; struct key_parse parse; struct nlattr *key; struct cfg80211_cached_keys *result; @@ -909,17 +926,22 @@ nl80211_parse_connkeys(struct cfg80211_registered_device *rdev, memset(&parse, 0, sizeof(parse)); parse.idx = -1; - err = nl80211_parse_key_new(key, &parse); + err = nl80211_parse_key_new(info, key, &parse); if (err) goto error; err = -EINVAL; if (!parse.p.key) goto error; - if (parse.idx < 0 || parse.idx > 3) + if (parse.idx < 0 || parse.idx > 3) { + GENL_SET_ERR_MSG(info, "key index out of range [0-3]"); goto error; + } if (parse.def) { - if (def) + if (def) { + GENL_SET_ERR_MSG(info, + "only one key can be default"); goto error; + } def = 1; result->def = parse.idx; if (!parse.def_uni || !parse.def_multi) @@ -932,6 +954,7 @@ nl80211_parse_connkeys(struct cfg80211_registered_device *rdev, goto error; if (parse.p.cipher != WLAN_CIPHER_SUITE_WEP40 && parse.p.cipher != WLAN_CIPHER_SUITE_WEP104) { + GENL_SET_ERR_MSG(info, "connect key must be WEP"); err = -EINVAL; goto error; } @@ -947,6 +970,7 @@ nl80211_parse_connkeys(struct cfg80211_registered_device *rdev, if (result->def < 0) { err = -EINVAL; + GENL_SET_ERR_MSG(info, "need a default/TX key"); goto error; } @@ -7820,6 +7844,11 @@ static int nl80211_send_bss(struct sk_buff *msg, struct netlink_callback *cb, intbss->ts_boottime, NL80211_BSS_PAD)) goto nla_put_failure; + if (!nl80211_put_signal(msg, intbss->pub.chains, + intbss->pub.chain_signal, + NL80211_BSS_CHAIN_SIGNAL)) + goto nla_put_failure; + switch (rdev->wiphy.signal_type) { case CFG80211_SIGNAL_TYPE_MBM: if (nla_put_u32(msg, NL80211_BSS_SIGNAL_MBM, res->signal)) @@ -8616,9 +8645,7 @@ static int nl80211_join_ibss(struct sk_buff *skb, struct genl_info *info) if (ibss.privacy && info->attrs[NL80211_ATTR_KEYS]) { bool no_ht = false; - connkeys = nl80211_parse_connkeys(rdev, - info->attrs[NL80211_ATTR_KEYS], - &no_ht); + connkeys = nl80211_parse_connkeys(rdev, info, &no_ht); if (IS_ERR(connkeys)) return PTR_ERR(connkeys); @@ -9022,8 +9049,7 @@ static int nl80211_connect(struct sk_buff *skb, struct genl_info *info) } if (connect.privacy && info->attrs[NL80211_ATTR_KEYS]) { - connkeys = nl80211_parse_connkeys(rdev, - info->attrs[NL80211_ATTR_KEYS], NULL); + connkeys = nl80211_parse_connkeys(rdev, info, NULL); if (IS_ERR(connkeys)) return PTR_ERR(connkeys); } @@ -13948,7 +13974,7 @@ void nl80211_send_disconnected(struct cfg80211_registered_device *rdev, if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex) || - (from_ap && reason && + (reason && nla_put_u16(msg, NL80211_ATTR_REASON_CODE, reason)) || (from_ap && nla_put_flag(msg, NL80211_ATTR_DISCONNECTED_BY_AP)) || diff --git a/net/wireless/scan.c b/net/wireless/scan.c index f6c5fe482506..d36c3eb7b931 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -981,6 +981,9 @@ cfg80211_bss_update(struct cfg80211_registered_device *rdev, found->ts = tmp->ts; found->ts_boottime = tmp->ts_boottime; found->parent_tsf = tmp->parent_tsf; + found->pub.chains = tmp->pub.chains; + memcpy(found->pub.chain_signal, tmp->pub.chain_signal, + IEEE80211_MAX_CHAINS); ether_addr_copy(found->parent_bssid, tmp->parent_bssid); } else { struct cfg80211_internal_bss *new; @@ -1233,6 +1236,8 @@ cfg80211_inform_bss_frame_data(struct wiphy *wiphy, tmp.pub.capability = le16_to_cpu(mgmt->u.probe_resp.capab_info); tmp.ts_boottime = data->boottime_ns; tmp.parent_tsf = data->parent_tsf; + tmp.pub.chains = data->chains; + memcpy(tmp.pub.chain_signal, data->chain_signal, IEEE80211_MAX_CHAINS); ether_addr_copy(tmp.parent_bssid, data->parent_bssid); signal_valid = abs(data->chan->center_freq - channel->center_freq) <= diff --git a/net/wireless/trace.h b/net/wireless/trace.h index f3353fe5b35b..bcfedd39e7a3 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -2544,20 +2544,20 @@ DEFINE_EVENT(cfg80211_netdev_mac_evt, cfg80211_del_sta, ); TRACE_EVENT(cfg80211_rx_mgmt, - TP_PROTO(struct wireless_dev *wdev, int freq, int sig_mbm), - TP_ARGS(wdev, freq, sig_mbm), + TP_PROTO(struct wireless_dev *wdev, int freq, int sig_dbm), + TP_ARGS(wdev, freq, sig_dbm), TP_STRUCT__entry( WDEV_ENTRY __field(int, freq) - __field(int, sig_mbm) + __field(int, sig_dbm) ), TP_fast_assign( WDEV_ASSIGN; __entry->freq = freq; - __entry->sig_mbm = sig_mbm; + __entry->sig_dbm = sig_dbm; ), - TP_printk(WDEV_PR_FMT ", freq: %d, sig mbm: %d", - WDEV_PR_ARG, __entry->freq, __entry->sig_mbm) + TP_printk(WDEV_PR_FMT ", freq: %d, sig dbm: %d", + WDEV_PR_ARG, __entry->freq, __entry->sig_dbm) ); TRACE_EVENT(cfg80211_mgmt_tx_status, diff --git a/net/wireless/wext-core.c b/net/wireless/wext-core.c index 6cdb054484d6..9efbfc753347 100644 --- a/net/wireless/wext-core.c +++ b/net/wireless/wext-core.c @@ -1035,18 +1035,23 @@ static int ioctl_standard_call(struct net_device * dev, } -int wext_handle_ioctl(struct net *net, struct iwreq *iwr, unsigned int cmd, - void __user *arg) +int wext_handle_ioctl(struct net *net, unsigned int cmd, void __user *arg) { struct iw_request_info info = { .cmd = cmd, .flags = 0 }; + struct iwreq iwr; int ret; - ret = wext_ioctl_dispatch(net, iwr, cmd, &info, + if (copy_from_user(&iwr, arg, sizeof(iwr))) + return -EFAULT; + + iwr.ifr_name[sizeof(iwr.ifr_name) - 1] = 0; + + ret = wext_ioctl_dispatch(net, &iwr, cmd, &info, ioctl_standard_call, ioctl_private_call); if (ret >= 0 && IW_IS_GET(cmd) && - copy_to_user(arg, iwr, sizeof(struct iwreq))) + copy_to_user(arg, &iwr, sizeof(struct iwreq))) return -EFAULT; return ret; diff --git a/net/wireless/wext-proc.c b/net/wireless/wext-proc.c index e98a01c1034f..5511f989ef47 100644 --- a/net/wireless/wext-proc.c +++ b/net/wireless/wext-proc.c @@ -133,7 +133,6 @@ static int seq_open_wireless(struct inode *inode, struct file *file) } static const struct file_operations wireless_seq_fops = { - .owner = THIS_MODULE, .open = seq_open_wireless, .read = seq_read, .llseek = seq_lseek, diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c index ac9477189d1c..8e70291e586a 100644 --- a/net/xfrm/xfrm_device.c +++ b/net/xfrm/xfrm_device.c @@ -23,32 +23,114 @@ #include <linux/notifier.h> #ifdef CONFIG_XFRM_OFFLOAD -int validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t features) +struct sk_buff *validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t features, bool *again) { int err; + unsigned long flags; struct xfrm_state *x; + struct sk_buff *skb2; + struct softnet_data *sd; + netdev_features_t esp_features = features; struct xfrm_offload *xo = xfrm_offload(skb); - if (skb_is_gso(skb)) - return 0; + if (!xo) + return skb; - if (xo) { - x = skb->sp->xvec[skb->sp->len - 1]; - if (xo->flags & XFRM_GRO || x->xso.flags & XFRM_OFFLOAD_INBOUND) - return 0; + if (!(features & NETIF_F_HW_ESP)) + esp_features = features & ~(NETIF_F_SG | NETIF_F_CSUM_MASK); + + x = skb->sp->xvec[skb->sp->len - 1]; + if (xo->flags & XFRM_GRO || x->xso.flags & XFRM_OFFLOAD_INBOUND) + return skb; + + local_irq_save(flags); + sd = this_cpu_ptr(&softnet_data); + err = !skb_queue_empty(&sd->xfrm_backlog); + local_irq_restore(flags); + + if (err) { + *again = true; + return skb; + } + + if (skb_is_gso(skb)) { + struct net_device *dev = skb->dev; + + if (unlikely(!x->xso.offload_handle || (x->xso.dev != dev))) { + struct sk_buff *segs; + + /* Packet got rerouted, fixup features and segment it. */ + esp_features = esp_features & ~(NETIF_F_HW_ESP + | NETIF_F_GSO_ESP); + + segs = skb_gso_segment(skb, esp_features); + if (IS_ERR(segs)) { + kfree_skb(skb); + atomic_long_inc(&dev->tx_dropped); + return NULL; + } else { + consume_skb(skb); + skb = segs; + } + } + } + if (!skb->next) { x->outer_mode->xmit(x, skb); - err = x->type_offload->xmit(x, skb, features); + xo->flags |= XFRM_DEV_RESUME; + + err = x->type_offload->xmit(x, skb, esp_features); if (err) { + if (err == -EINPROGRESS) + return NULL; + XFRM_INC_STATS(xs_net(x), LINUX_MIB_XFRMOUTSTATEPROTOERROR); - return err; + kfree_skb(skb); + return NULL; } skb_push(skb, skb->data - skb_mac_header(skb)); + + return skb; } - return 0; + skb2 = skb; + + do { + struct sk_buff *nskb = skb2->next; + skb2->next = NULL; + + xo = xfrm_offload(skb2); + xo->flags |= XFRM_DEV_RESUME; + + x->outer_mode->xmit(x, skb2); + + err = x->type_offload->xmit(x, skb2, esp_features); + if (!err) { + skb2->next = nskb; + } else if (err != -EINPROGRESS) { + XFRM_INC_STATS(xs_net(x), LINUX_MIB_XFRMOUTSTATEPROTOERROR); + skb2->next = nskb; + kfree_skb_list(skb2); + return NULL; + } else { + if (skb == skb2) + skb = nskb; + + if (!skb) + return NULL; + + goto skip_push; + } + + skb_push(skb2, skb2->data - skb_mac_header(skb2)); + +skip_push: + skb2 = nskb; + } while (skb2); + + return skb; } EXPORT_SYMBOL_GPL(validate_xmit_xfrm); @@ -65,9 +147,9 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x, if (!x->type_offload) return -EINVAL; - /* We don't yet support UDP encapsulation, TFC padding and ESN. */ - if (x->encap || x->tfcpad || (x->props.flags & XFRM_STATE_ESN)) - return 0; + /* We don't yet support UDP encapsulation and TFC padding. */ + if (x->encap || x->tfcpad) + return -EINVAL; dev = dev_get_by_index(net, xuo->ifindex); if (!dev) { @@ -96,6 +178,13 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x, return 0; } + if (x->props.flags & XFRM_STATE_ESN && + !dev->xfrmdev_ops->xdo_dev_state_advance_esn) { + xso->dev = NULL; + dev_put(dev); + return -EINVAL; + } + xso->dev = dev; xso->num_exthdrs = 1; xso->flags = xuo->flags; @@ -121,8 +210,8 @@ bool xfrm_dev_offload_ok(struct sk_buff *skb, struct xfrm_state *x) if (!x->type_offload || x->encap) return false; - if ((x->xso.offload_handle && (dev == dst->path->dev)) && - !dst->child->xfrm && x->type->get_mtu) { + if ((!dev || (x->xso.offload_handle && (dev == xfrm_dst_path(dst)->dev))) && + (!xdst->child->xfrm && x->type->get_mtu)) { mtu = x->type->get_mtu(x, xdst->child_mtu_cached); if (skb->len <= mtu) @@ -141,19 +230,82 @@ ok: return true; } EXPORT_SYMBOL_GPL(xfrm_dev_offload_ok); + +void xfrm_dev_resume(struct sk_buff *skb) +{ + struct net_device *dev = skb->dev; + int ret = NETDEV_TX_BUSY; + struct netdev_queue *txq; + struct softnet_data *sd; + unsigned long flags; + + rcu_read_lock(); + txq = netdev_pick_tx(dev, skb, NULL); + + HARD_TX_LOCK(dev, txq, smp_processor_id()); + if (!netif_xmit_frozen_or_stopped(txq)) + skb = dev_hard_start_xmit(skb, dev, txq, &ret); + HARD_TX_UNLOCK(dev, txq); + + if (!dev_xmit_complete(ret)) { + local_irq_save(flags); + sd = this_cpu_ptr(&softnet_data); + skb_queue_tail(&sd->xfrm_backlog, skb); + raise_softirq_irqoff(NET_TX_SOFTIRQ); + local_irq_restore(flags); + } + rcu_read_unlock(); +} +EXPORT_SYMBOL_GPL(xfrm_dev_resume); + +void xfrm_dev_backlog(struct softnet_data *sd) +{ + struct sk_buff_head *xfrm_backlog = &sd->xfrm_backlog; + struct sk_buff_head list; + struct sk_buff *skb; + + if (skb_queue_empty(xfrm_backlog)) + return; + + __skb_queue_head_init(&list); + + spin_lock(&xfrm_backlog->lock); + skb_queue_splice_init(xfrm_backlog, &list); + spin_unlock(&xfrm_backlog->lock); + + while (!skb_queue_empty(&list)) { + skb = __skb_dequeue(&list); + xfrm_dev_resume(skb); + } + +} #endif -static int xfrm_dev_register(struct net_device *dev) +static int xfrm_api_check(struct net_device *dev) { - if ((dev->features & NETIF_F_HW_ESP) && !dev->xfrmdev_ops) - return NOTIFY_BAD; +#ifdef CONFIG_XFRM_OFFLOAD if ((dev->features & NETIF_F_HW_ESP_TX_CSUM) && !(dev->features & NETIF_F_HW_ESP)) return NOTIFY_BAD; + if ((dev->features & NETIF_F_HW_ESP) && + (!(dev->xfrmdev_ops && + dev->xfrmdev_ops->xdo_dev_state_add && + dev->xfrmdev_ops->xdo_dev_state_delete))) + return NOTIFY_BAD; +#else + if (dev->features & (NETIF_F_HW_ESP | NETIF_F_HW_ESP_TX_CSUM)) + return NOTIFY_BAD; +#endif + return NOTIFY_DONE; } +static int xfrm_dev_register(struct net_device *dev) +{ + return xfrm_api_check(dev); +} + static int xfrm_dev_unregister(struct net_device *dev) { xfrm_policy_cache_flush(); @@ -162,16 +314,7 @@ static int xfrm_dev_unregister(struct net_device *dev) static int xfrm_dev_feat_change(struct net_device *dev) { - if ((dev->features & NETIF_F_HW_ESP) && !dev->xfrmdev_ops) - return NOTIFY_BAD; - else if (!(dev->features & NETIF_F_HW_ESP)) - dev->xfrmdev_ops = NULL; - - if ((dev->features & NETIF_F_HW_ESP_TX_CSUM) && - !(dev->features & NETIF_F_HW_ESP)) - return NOTIFY_BAD; - - return NOTIFY_DONE; + return xfrm_api_check(dev); } static int xfrm_dev_down(struct net_device *dev) diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index 5b2409746ae0..1472c0857975 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -257,7 +257,6 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) if (xo && (xo->flags & CRYPTO_DONE)) { crypto_done = true; - x = xfrm_input_state(skb); family = XFRM_SPI_SKB_CB(skb)->family; if (!(xo->status & CRYPTO_SUCCESS)) { diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index 73ad8c8ef344..23468672a767 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -44,7 +44,7 @@ static int xfrm_skb_check_space(struct sk_buff *skb) static struct dst_entry *skb_dst_pop(struct sk_buff *skb) { - struct dst_entry *child = dst_clone(skb_dst(skb)->child); + struct dst_entry *child = dst_clone(xfrm_dst_child(skb_dst(skb))); skb_dst_drop(skb); return child; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index bd6b0e7a0ee4..7a23078132cf 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -54,7 +54,7 @@ static struct xfrm_policy_afinfo const __rcu *xfrm_policy_afinfo[AF_INET6 + 1] static struct kmem_cache *xfrm_dst_cache __read_mostly; static __read_mostly seqcount_t xfrm_policy_hash_generation; -static void xfrm_init_pmtu(struct dst_entry *dst); +static void xfrm_init_pmtu(struct xfrm_dst **bundle, int nr); static int stale_bundle(struct dst_entry *dst); static int xfrm_bundle_ok(struct xfrm_dst *xdst); static void xfrm_policy_queue_process(struct timer_list *t); @@ -1256,7 +1256,7 @@ EXPORT_SYMBOL(xfrm_policy_delete); int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol) { - struct net *net = xp_net(pol); + struct net *net = sock_net(sk); struct xfrm_policy *old_pol; #ifdef CONFIG_XFRM_SUB_POLICY @@ -1543,7 +1543,9 @@ static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, */ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy, - struct xfrm_state **xfrm, int nx, + struct xfrm_state **xfrm, + struct xfrm_dst **bundle, + int nx, const struct flowi *fl, struct dst_entry *dst) { @@ -1551,8 +1553,8 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy, unsigned long now = jiffies; struct net_device *dev; struct xfrm_mode *inner_mode; - struct dst_entry *dst_prev = NULL; - struct dst_entry *dst0 = NULL; + struct xfrm_dst *xdst_prev = NULL; + struct xfrm_dst *xdst0 = NULL; int i = 0; int err; int header_len = 0; @@ -1578,13 +1580,14 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy, goto put_states; } - if (!dst_prev) - dst0 = dst1; + bundle[i] = xdst; + if (!xdst_prev) + xdst0 = xdst; else /* Ref count is taken during xfrm_alloc_dst() * No need to do dst_clone() on dst1 */ - dst_prev->child = dst1; + xfrm_dst_set_child(xdst_prev, &xdst->u.dst); if (xfrm[i]->sel.family == AF_UNSPEC) { inner_mode = xfrm_ip2inner_mode(xfrm[i], @@ -1621,8 +1624,7 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy, dst1->input = dst_discard; dst1->output = inner_mode->afinfo->output; - dst1->next = dst_prev; - dst_prev = dst1; + xdst_prev = xdst; header_len += xfrm[i]->props.header_len; if (xfrm[i]->type->flags & XFRM_TYPE_NON_FRAGMENT) @@ -1630,40 +1632,39 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy, trailer_len += xfrm[i]->props.trailer_len; } - dst_prev->child = dst; - dst0->path = dst; + xfrm_dst_set_child(xdst_prev, dst); + xdst0->path = dst; err = -ENODEV; dev = dst->dev; if (!dev) goto free_dst; - xfrm_init_path((struct xfrm_dst *)dst0, dst, nfheader_len); - xfrm_init_pmtu(dst_prev); - - for (dst_prev = dst0; dst_prev != dst; dst_prev = dst_prev->child) { - struct xfrm_dst *xdst = (struct xfrm_dst *)dst_prev; + xfrm_init_path(xdst0, dst, nfheader_len); + xfrm_init_pmtu(bundle, nx); - err = xfrm_fill_dst(xdst, dev, fl); + for (xdst_prev = xdst0; xdst_prev != (struct xfrm_dst *)dst; + xdst_prev = (struct xfrm_dst *) xfrm_dst_child(&xdst_prev->u.dst)) { + err = xfrm_fill_dst(xdst_prev, dev, fl); if (err) goto free_dst; - dst_prev->header_len = header_len; - dst_prev->trailer_len = trailer_len; - header_len -= xdst->u.dst.xfrm->props.header_len; - trailer_len -= xdst->u.dst.xfrm->props.trailer_len; + xdst_prev->u.dst.header_len = header_len; + xdst_prev->u.dst.trailer_len = trailer_len; + header_len -= xdst_prev->u.dst.xfrm->props.header_len; + trailer_len -= xdst_prev->u.dst.xfrm->props.trailer_len; } out: - return dst0; + return &xdst0->u.dst; put_states: for (; i < nx; i++) xfrm_state_put(xfrm[i]); free_dst: - if (dst0) - dst_release_immediate(dst0); - dst0 = ERR_PTR(err); + if (xdst0) + dst_release_immediate(&xdst0->u.dst); + xdst0 = ERR_PTR(err); goto out; } @@ -1807,7 +1808,7 @@ static bool xfrm_xdst_can_reuse(struct xfrm_dst *xdst, for (i = 0; i < num; i++) { if (!dst || dst->xfrm != xfrm[i]) return false; - dst = dst->child; + dst = xfrm_dst_child(dst); } return xfrm_bundle_ok(xdst); @@ -1820,6 +1821,7 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols, { struct net *net = xp_net(pols[0]); struct xfrm_state *xfrm[XFRM_MAX_DEPTH]; + struct xfrm_dst *bundle[XFRM_MAX_DEPTH]; struct xfrm_dst *xdst, *old; struct dst_entry *dst; int err; @@ -1848,7 +1850,7 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols, old = xdst; - dst = xfrm_bundle_create(pols[0], xfrm, err, fl, dst_orig); + dst = xfrm_bundle_create(pols[0], xfrm, bundle, err, fl, dst_orig); if (IS_ERR(dst)) { XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLEGENERROR); return ERR_CAST(dst); @@ -1888,8 +1890,8 @@ static void xfrm_policy_queue_process(struct timer_list *t) xfrm_decode_session(skb, &fl, dst->ops->family); spin_unlock(&pq->hold_queue.lock); - dst_hold(dst->path); - dst = xfrm_lookup(net, dst->path, &fl, sk, 0); + dst_hold(xfrm_dst_path(dst)); + dst = xfrm_lookup(net, xfrm_dst_path(dst), &fl, sk, 0); if (IS_ERR(dst)) goto purge_queue; @@ -1918,8 +1920,8 @@ static void xfrm_policy_queue_process(struct timer_list *t) skb = __skb_dequeue(&list); xfrm_decode_session(skb, &fl, skb_dst(skb)->ops->family); - dst_hold(skb_dst(skb)->path); - dst = xfrm_lookup(net, skb_dst(skb)->path, &fl, skb->sk, 0); + dst_hold(xfrm_dst_path(skb_dst(skb))); + dst = xfrm_lookup(net, xfrm_dst_path(skb_dst(skb)), &fl, skb->sk, 0); if (IS_ERR(dst)) { kfree_skb(skb); continue; @@ -2020,8 +2022,8 @@ static struct xfrm_dst *xfrm_create_dummy_bundle(struct net *net, dst1->output = xdst_queue_output; dst_hold(dst); - dst1->child = dst; - dst1->path = dst; + xfrm_dst_set_child(xdst, dst); + xdst->path = dst; xfrm_init_path((struct xfrm_dst *)dst1, dst, 0); @@ -2590,7 +2592,7 @@ static int stale_bundle(struct dst_entry *dst) void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev) { - while ((dst = dst->child) && dst->xfrm && dst->dev == dev) { + while ((dst = xfrm_dst_child(dst)) && dst->xfrm && dst->dev == dev) { dst->dev = dev_net(dev)->loopback_dev; dev_hold(dst->dev); dev_put(dev); @@ -2614,13 +2616,15 @@ static struct dst_entry *xfrm_negative_advice(struct dst_entry *dst) return dst; } -static void xfrm_init_pmtu(struct dst_entry *dst) +static void xfrm_init_pmtu(struct xfrm_dst **bundle, int nr) { - do { - struct xfrm_dst *xdst = (struct xfrm_dst *)dst; + while (nr--) { + struct xfrm_dst *xdst = bundle[nr]; u32 pmtu, route_mtu_cached; + struct dst_entry *dst; - pmtu = dst_mtu(dst->child); + dst = &xdst->u.dst; + pmtu = dst_mtu(xfrm_dst_child(dst)); xdst->child_mtu_cached = pmtu; pmtu = xfrm_state_mtu(dst->xfrm, pmtu); @@ -2632,7 +2636,7 @@ static void xfrm_init_pmtu(struct dst_entry *dst) pmtu = route_mtu_cached; dst_metric_set(dst, RTAX_MTU, pmtu); - } while ((dst = dst->next)); + } } /* Check that the bundle accepts the flow and its components are @@ -2641,19 +2645,20 @@ static void xfrm_init_pmtu(struct dst_entry *dst) static int xfrm_bundle_ok(struct xfrm_dst *first) { + struct xfrm_dst *bundle[XFRM_MAX_DEPTH]; struct dst_entry *dst = &first->u.dst; - struct xfrm_dst *last; + struct xfrm_dst *xdst; + int start_from, nr; u32 mtu; - if (!dst_check(dst->path, ((struct xfrm_dst *)dst)->path_cookie) || + if (!dst_check(xfrm_dst_path(dst), ((struct xfrm_dst *)dst)->path_cookie) || (dst->dev && !netif_running(dst->dev))) return 0; if (dst->flags & DST_XFRM_QUEUE) return 1; - last = NULL; - + start_from = nr = 0; do { struct xfrm_dst *xdst = (struct xfrm_dst *)dst; @@ -2665,9 +2670,11 @@ static int xfrm_bundle_ok(struct xfrm_dst *first) xdst->policy_genid != atomic_read(&xdst->pols[0]->genid)) return 0; - mtu = dst_mtu(dst->child); + bundle[nr++] = xdst; + + mtu = dst_mtu(xfrm_dst_child(dst)); if (xdst->child_mtu_cached != mtu) { - last = xdst; + start_from = nr; xdst->child_mtu_cached = mtu; } @@ -2675,30 +2682,30 @@ static int xfrm_bundle_ok(struct xfrm_dst *first) return 0; mtu = dst_mtu(xdst->route); if (xdst->route_mtu_cached != mtu) { - last = xdst; + start_from = nr; xdst->route_mtu_cached = mtu; } - dst = dst->child; + dst = xfrm_dst_child(dst); } while (dst->xfrm); - if (likely(!last)) + if (likely(!start_from)) return 1; - mtu = last->child_mtu_cached; - for (;;) { - dst = &last->u.dst; + xdst = bundle[start_from - 1]; + mtu = xdst->child_mtu_cached; + while (start_from--) { + dst = &xdst->u.dst; mtu = xfrm_state_mtu(dst->xfrm, mtu); - if (mtu > last->route_mtu_cached) - mtu = last->route_mtu_cached; + if (mtu > xdst->route_mtu_cached) + mtu = xdst->route_mtu_cached; dst_metric_set(dst, RTAX_MTU, mtu); - - if (last == first) + if (!start_from) break; - last = (struct xfrm_dst *)last->u.dst.next; - last->child_mtu_cached = mtu; + xdst = bundle[start_from - 1]; + xdst->child_mtu_cached = mtu; } return 1; @@ -2706,22 +2713,20 @@ static int xfrm_bundle_ok(struct xfrm_dst *first) static unsigned int xfrm_default_advmss(const struct dst_entry *dst) { - return dst_metric_advmss(dst->path); + return dst_metric_advmss(xfrm_dst_path(dst)); } static unsigned int xfrm_mtu(const struct dst_entry *dst) { unsigned int mtu = dst_metric_raw(dst, RTAX_MTU); - return mtu ? : dst_mtu(dst->path); + return mtu ? : dst_mtu(xfrm_dst_path(dst)); } static const void *xfrm_get_dst_nexthop(const struct dst_entry *dst, const void *daddr) { - const struct dst_entry *path = dst->path; - - for (; dst != path; dst = dst->child) { + while (dst->xfrm) { const struct xfrm_state *xfrm = dst->xfrm; if (xfrm->props.mode == XFRM_MODE_TRANSPORT) @@ -2730,6 +2735,8 @@ static const void *xfrm_get_dst_nexthop(const struct dst_entry *dst, daddr = xfrm->coaddr; else if (!(xfrm->type->flags & XFRM_TYPE_LOCAL_COADDR)) daddr = &xfrm->id.daddr; + + dst = xfrm_dst_child(dst); } return daddr; } @@ -2738,7 +2745,7 @@ static struct neighbour *xfrm_neigh_lookup(const struct dst_entry *dst, struct sk_buff *skb, const void *daddr) { - const struct dst_entry *path = dst->path; + const struct dst_entry *path = xfrm_dst_path(dst); if (!skb) daddr = xfrm_get_dst_nexthop(dst, daddr); @@ -2747,7 +2754,7 @@ static struct neighbour *xfrm_neigh_lookup(const struct dst_entry *dst, static void xfrm_confirm_neigh(const struct dst_entry *dst, const void *daddr) { - const struct dst_entry *path = dst->path; + const struct dst_entry *path = xfrm_dst_path(dst); daddr = xfrm_get_dst_nexthop(dst, daddr); path->ops->confirm_neigh(path, daddr); diff --git a/net/xfrm/xfrm_proc.c b/net/xfrm/xfrm_proc.c index ba2b539879bc..6d5f85f4e672 100644 --- a/net/xfrm/xfrm_proc.c +++ b/net/xfrm/xfrm_proc.c @@ -71,7 +71,6 @@ static int xfrm_statistics_seq_open(struct inode *inode, struct file *file) } static const struct file_operations xfrm_statistics_seq_fops = { - .owner = THIS_MODULE, .open = xfrm_statistics_seq_open, .read = seq_read, .llseek = seq_lseek, diff --git a/net/xfrm/xfrm_replay.c b/net/xfrm/xfrm_replay.c index 8b23c5bcf8e8..1d38c6acf8af 100644 --- a/net/xfrm/xfrm_replay.c +++ b/net/xfrm/xfrm_replay.c @@ -551,6 +551,8 @@ static void xfrm_replay_advance_esn(struct xfrm_state *x, __be32 net_seq) bitnr = replay_esn->replay_window - (diff - pos); } + xfrm_dev_state_advance_esn(x); + nr = bitnr >> 5; bitnr = bitnr & 0x1F; replay_esn->bmp[nr] |= (1U << bitnr); @@ -666,7 +668,7 @@ static int xfrm_replay_overflow_offload_esn(struct xfrm_state *x, struct sk_buff if (unlikely(oseq < replay_esn->oseq)) { XFRM_SKB_CB(skb)->seq.output.hi = ++oseq_hi; xo->seq.hi = oseq_hi; - + replay_esn->oseq_hi = oseq_hi; if (replay_esn->oseq_hi == 0) { replay_esn->oseq--; replay_esn->oseq_hi--; @@ -678,7 +680,6 @@ static int xfrm_replay_overflow_offload_esn(struct xfrm_state *x, struct sk_buff } replay_esn->oseq = oseq; - replay_esn->oseq_hi = oseq_hi; if (xfrm_aevent_is_on(net)) x->repl->notify(x, XFRM_REPLAY_UPDATE); diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index a3785f538018..54e21f19d722 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -2056,6 +2056,13 @@ int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen struct xfrm_mgr *km; struct xfrm_policy *pol = NULL; + if (!optval && !optlen) { + xfrm_sk_policy_insert(sk, XFRM_POLICY_IN, NULL); + xfrm_sk_policy_insert(sk, XFRM_POLICY_OUT, NULL); + __sk_dst_reset(sk); + return 0; + } + if (optlen <= 0 || optlen > PAGE_SIZE) return -EMSGSIZE; |