diff options
Diffstat (limited to 'net')
354 files changed, 6427 insertions, 3004 deletions
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index bd0ed39f65fb..5505ee6ebdbe 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -360,6 +360,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event, struct vlan_dev_priv *vlan; bool last = false; LIST_HEAD(list); + int err; if (is_vlan_dev(dev)) { int err = __vlan_device_event(dev, event); @@ -489,6 +490,26 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event, vlan_group_for_each_dev(grp, i, vlandev) call_netdevice_notifiers(event, vlandev); break; + + case NETDEV_CVLAN_FILTER_PUSH_INFO: + err = vlan_filter_push_vids(vlan_info, htons(ETH_P_8021Q)); + if (err) + return notifier_from_errno(err); + break; + + case NETDEV_CVLAN_FILTER_DROP_INFO: + vlan_filter_drop_vids(vlan_info, htons(ETH_P_8021Q)); + break; + + case NETDEV_SVLAN_FILTER_PUSH_INFO: + err = vlan_filter_push_vids(vlan_info, htons(ETH_P_8021AD)); + if (err) + return notifier_from_errno(err); + break; + + case NETDEV_SVLAN_FILTER_DROP_INFO: + vlan_filter_drop_vids(vlan_info, htons(ETH_P_8021AD)); + break; } out: @@ -729,7 +750,6 @@ static struct pernet_operations vlan_net_ops = { .exit = vlan_exit_net, .id = &vlan_net_id, .size = sizeof(struct vlan_net), - .async = true, }; static int __init vlan_proto_init(void) diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h index a8ba51030b75..e23aac3e4d37 100644 --- a/net/8021q/vlan.h +++ b/net/8021q/vlan.h @@ -97,6 +97,9 @@ static inline struct net_device *vlan_find_dev(struct net_device *real_dev, if (((dev) = __vlan_group_get_device((grp), (i) / VLAN_N_VID, \ (i) % VLAN_N_VID))) +int vlan_filter_push_vids(struct vlan_info *vlan_info, __be16 proto); +void vlan_filter_drop_vids(struct vlan_info *vlan_info, __be16 proto); + /* found in vlan_dev.c */ void vlan_dev_set_ingress_priority(const struct net_device *dev, u32 skb_prio, u16 vlan_prio); diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c index 64aa9f755e1d..c8d7abdc0463 100644 --- a/net/8021q/vlan_core.c +++ b/net/8021q/vlan_core.c @@ -48,8 +48,8 @@ bool vlan_do_receive(struct sk_buff **skbp) * original position later */ skb_push(skb, offset); - skb = *skbp = vlan_insert_tag(skb, skb->vlan_proto, - skb->vlan_tci); + skb = *skbp = vlan_insert_inner_tag(skb, skb->vlan_proto, + skb->vlan_tci, skb->mac_len); if (!skb) return false; skb_pull(skb, offset + VLAN_HLEN); @@ -165,13 +165,12 @@ struct vlan_vid_info { int refcount; }; -static bool vlan_hw_filter_capable(const struct net_device *dev, - const struct vlan_vid_info *vid_info) +bool vlan_hw_filter_capable(const struct net_device *dev, __be16 proto) { - if (vid_info->proto == htons(ETH_P_8021Q) && + if (proto == htons(ETH_P_8021Q) && dev->features & NETIF_F_HW_VLAN_CTAG_FILTER) return true; - if (vid_info->proto == htons(ETH_P_8021AD) && + if (proto == htons(ETH_P_8021AD) && dev->features & NETIF_F_HW_VLAN_STAG_FILTER) return true; return false; @@ -202,11 +201,73 @@ static struct vlan_vid_info *vlan_vid_info_alloc(__be16 proto, u16 vid) return vid_info; } +static int vlan_add_rx_filter_info(struct net_device *dev, __be16 proto, u16 vid) +{ + if (!vlan_hw_filter_capable(dev, proto)) + return 0; + + if (netif_device_present(dev)) + return dev->netdev_ops->ndo_vlan_rx_add_vid(dev, proto, vid); + else + return -ENODEV; +} + +static int vlan_kill_rx_filter_info(struct net_device *dev, __be16 proto, u16 vid) +{ + if (!vlan_hw_filter_capable(dev, proto)) + return 0; + + if (netif_device_present(dev)) + return dev->netdev_ops->ndo_vlan_rx_kill_vid(dev, proto, vid); + else + return -ENODEV; +} + +int vlan_filter_push_vids(struct vlan_info *vlan_info, __be16 proto) +{ + struct net_device *real_dev = vlan_info->real_dev; + struct vlan_vid_info *vlan_vid_info; + int err; + + list_for_each_entry(vlan_vid_info, &vlan_info->vid_list, list) { + if (vlan_vid_info->proto == proto) { + err = vlan_add_rx_filter_info(real_dev, proto, + vlan_vid_info->vid); + if (err) + goto unwind; + } + } + + return 0; + +unwind: + list_for_each_entry_continue_reverse(vlan_vid_info, + &vlan_info->vid_list, list) { + if (vlan_vid_info->proto == proto) + vlan_kill_rx_filter_info(real_dev, proto, + vlan_vid_info->vid); + } + + return err; +} +EXPORT_SYMBOL(vlan_filter_push_vids); + +void vlan_filter_drop_vids(struct vlan_info *vlan_info, __be16 proto) +{ + struct vlan_vid_info *vlan_vid_info; + + list_for_each_entry(vlan_vid_info, &vlan_info->vid_list, list) + if (vlan_vid_info->proto == proto) + vlan_kill_rx_filter_info(vlan_info->real_dev, + vlan_vid_info->proto, + vlan_vid_info->vid); +} +EXPORT_SYMBOL(vlan_filter_drop_vids); + static int __vlan_vid_add(struct vlan_info *vlan_info, __be16 proto, u16 vid, struct vlan_vid_info **pvid_info) { struct net_device *dev = vlan_info->real_dev; - const struct net_device_ops *ops = dev->netdev_ops; struct vlan_vid_info *vid_info; int err; @@ -214,16 +275,12 @@ static int __vlan_vid_add(struct vlan_info *vlan_info, __be16 proto, u16 vid, if (!vid_info) return -ENOMEM; - if (vlan_hw_filter_capable(dev, vid_info)) { - if (netif_device_present(dev)) - err = ops->ndo_vlan_rx_add_vid(dev, proto, vid); - else - err = -ENODEV; - if (err) { - kfree(vid_info); - return err; - } + err = vlan_add_rx_filter_info(dev, proto, vid); + if (err) { + kfree(vid_info); + return err; } + list_add(&vid_info->list, &vlan_info->vid_list); vlan_info->nr_vids++; *pvid_info = vid_info; @@ -270,21 +327,15 @@ static void __vlan_vid_del(struct vlan_info *vlan_info, struct vlan_vid_info *vid_info) { struct net_device *dev = vlan_info->real_dev; - const struct net_device_ops *ops = dev->netdev_ops; __be16 proto = vid_info->proto; u16 vid = vid_info->vid; int err; - if (vlan_hw_filter_capable(dev, vid_info)) { - if (netif_device_present(dev)) - err = ops->ndo_vlan_rx_kill_vid(dev, proto, vid); - else - err = -ENODEV; - if (err) { - pr_warn("failed to kill vid %04x/%d for device %s\n", - proto, vid, dev->name); - } - } + err = vlan_kill_rx_filter_info(dev, proto, vid); + if (err) + pr_warn("failed to kill vid %04x/%d for device %s\n", + proto, vid, dev->name); + list_del(&vid_info->list); kfree(vid_info); vlan_info->nr_vids--; diff --git a/net/8021q/vlanproc.c b/net/8021q/vlanproc.c index a662ccc166df..a627a5db2125 100644 --- a/net/8021q/vlanproc.c +++ b/net/8021q/vlanproc.c @@ -148,8 +148,8 @@ int __net_init vlan_proc_init(struct net *net) if (!vn->proc_vlan_dir) goto err; - vn->proc_vlan_conf = proc_create(name_conf, S_IFREG|S_IRUSR|S_IWUSR, - vn->proc_vlan_dir, &vlan_fops); + vn->proc_vlan_conf = proc_create(name_conf, S_IFREG | 0600, + vn->proc_vlan_dir, &vlan_fops); if (!vn->proc_vlan_conf) goto err; return 0; @@ -172,7 +172,7 @@ int vlan_proc_add_dev(struct net_device *vlandev) if (!strcmp(vlandev->name, name_conf)) return -EINVAL; vlan->dent = - proc_create_data(vlandev->name, S_IFREG|S_IRUSR|S_IWUSR, + proc_create_data(vlandev->name, S_IFREG | 0600, vn->proc_vlan_dir, &vlandev_fops, vlandev); if (!vlan->dent) return -ENOBUFS; diff --git a/net/appletalk/atalk_proc.c b/net/appletalk/atalk_proc.c index a3bf9d519193..7214aea14cb3 100644 --- a/net/appletalk/atalk_proc.c +++ b/net/appletalk/atalk_proc.c @@ -257,22 +257,22 @@ int __init atalk_proc_init(void) if (!atalk_proc_dir) goto out; - p = proc_create("interface", S_IRUGO, atalk_proc_dir, + p = proc_create("interface", 0444, atalk_proc_dir, &atalk_seq_interface_fops); if (!p) goto out_interface; - p = proc_create("route", S_IRUGO, atalk_proc_dir, + p = proc_create("route", 0444, atalk_proc_dir, &atalk_seq_route_fops); if (!p) goto out_route; - p = proc_create("socket", S_IRUGO, atalk_proc_dir, + p = proc_create("socket", 0444, atalk_proc_dir, &atalk_seq_socket_fops); if (!p) goto out_socket; - p = proc_create("arp", S_IRUGO, atalk_proc_dir, &atalk_seq_arp_fops); + p = proc_create("arp", 0444, atalk_proc_dir, &atalk_seq_arp_fops); if (!p) goto out_arp; diff --git a/net/atm/atm_sysfs.c b/net/atm/atm_sysfs.c index 5d2fed9f5710..39b94ca5f65d 100644 --- a/net/atm/atm_sysfs.c +++ b/net/atm/atm_sysfs.c @@ -96,12 +96,12 @@ static ssize_t show_link_rate(struct device *cdev, return scnprintf(buf, PAGE_SIZE, "%d\n", link_rate); } -static DEVICE_ATTR(address, S_IRUGO, show_address, NULL); -static DEVICE_ATTR(atmaddress, S_IRUGO, show_atmaddress, NULL); -static DEVICE_ATTR(atmindex, S_IRUGO, show_atmindex, NULL); -static DEVICE_ATTR(carrier, S_IRUGO, show_carrier, NULL); -static DEVICE_ATTR(type, S_IRUGO, show_type, NULL); -static DEVICE_ATTR(link_rate, S_IRUGO, show_link_rate, NULL); +static DEVICE_ATTR(address, 0444, show_address, NULL); +static DEVICE_ATTR(atmaddress, 0444, show_atmaddress, NULL); +static DEVICE_ATTR(atmindex, 0444, show_atmindex, NULL); +static DEVICE_ATTR(carrier, 0444, show_carrier, NULL); +static DEVICE_ATTR(type, 0444, show_type, NULL); +static DEVICE_ATTR(link_rate, 0444, show_link_rate, NULL); static struct device_attribute *atm_attrs[] = { &dev_attr_atmaddress, diff --git a/net/atm/clip.c b/net/atm/clip.c index d4f6029d5109..f07dbc632222 100644 --- a/net/atm/clip.c +++ b/net/atm/clip.c @@ -893,7 +893,7 @@ static int __init atm_clip_init(void) { struct proc_dir_entry *p; - p = proc_create("arp", S_IRUGO, atm_proc_root, &arp_seq_fops); + p = proc_create("arp", 0444, atm_proc_root, &arp_seq_fops); if (!p) { pr_err("Unable to initialize /proc/net/atm/arp\n"); atm_clip_exit_noproc(); diff --git a/net/atm/lec.c b/net/atm/lec.c index 09a1f056712a..01d5d20a6eb1 100644 --- a/net/atm/lec.c +++ b/net/atm/lec.c @@ -1042,7 +1042,7 @@ static int __init lane_module_init(void) #ifdef CONFIG_PROC_FS struct proc_dir_entry *p; - p = proc_create("lec", S_IRUGO, atm_proc_root, &lec_seq_fops); + p = proc_create("lec", 0444, atm_proc_root, &lec_seq_fops); if (!p) { pr_err("Unable to initialize /proc/net/atm/lec\n"); return -ENOMEM; diff --git a/net/atm/proc.c b/net/atm/proc.c index edc48edc95c1..55410c00c7e2 100644 --- a/net/atm/proc.c +++ b/net/atm/proc.c @@ -474,7 +474,7 @@ int __init atm_proc_init(void) for (e = atm_proc_ents; e->name; e++) { struct proc_dir_entry *dirent; - dirent = proc_create(e->name, S_IRUGO, + dirent = proc_create(e->name, 0444, atm_proc_root, e->proc_fops); if (!dirent) goto err_out_remove; diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index c8319ed48485..2b41366fcad2 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -1989,10 +1989,10 @@ static int __init ax25_init(void) dev_add_pack(&ax25_packet_type); register_netdevice_notifier(&ax25_dev_notifier); - proc_create("ax25_route", S_IRUGO, init_net.proc_net, + proc_create("ax25_route", 0444, init_net.proc_net, &ax25_route_fops); - proc_create("ax25", S_IRUGO, init_net.proc_net, &ax25_info_fops); - proc_create("ax25_calls", S_IRUGO, init_net.proc_net, &ax25_uid_fops); + proc_create("ax25", 0444, init_net.proc_net, &ax25_info_fops); + proc_create("ax25_calls", 0444, init_net.proc_net, &ax25_uid_fops); out: return rc; } diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index e21aa147607b..be09a9883825 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -157,7 +157,7 @@ static void batadv_iv_ogm_orig_free(struct batadv_orig_node *orig_node) * Return: 0 on success, a negative error code otherwise. */ static int batadv_iv_ogm_orig_add_if(struct batadv_orig_node *orig_node, - int max_if_num) + unsigned int max_if_num) { void *data_ptr; size_t old_size; @@ -201,7 +201,8 @@ unlock: */ static void batadv_iv_ogm_drop_bcast_own_entry(struct batadv_orig_node *orig_node, - int max_if_num, int del_if_num) + unsigned int max_if_num, + unsigned int del_if_num) { size_t chunk_size; size_t if_offset; @@ -239,7 +240,8 @@ batadv_iv_ogm_drop_bcast_own_entry(struct batadv_orig_node *orig_node, */ static void batadv_iv_ogm_drop_bcast_own_sum_entry(struct batadv_orig_node *orig_node, - int max_if_num, int del_if_num) + unsigned int max_if_num, + unsigned int del_if_num) { size_t if_offset; void *data_ptr; @@ -276,7 +278,8 @@ batadv_iv_ogm_drop_bcast_own_sum_entry(struct batadv_orig_node *orig_node, * Return: 0 on success, a negative error code otherwise. */ static int batadv_iv_ogm_orig_del_if(struct batadv_orig_node *orig_node, - int max_if_num, int del_if_num) + unsigned int max_if_num, + unsigned int del_if_num) { spin_lock_bh(&orig_node->bat_iv.ogm_cnt_lock); @@ -311,7 +314,8 @@ static struct batadv_orig_node * batadv_iv_ogm_orig_get(struct batadv_priv *bat_priv, const u8 *addr) { struct batadv_orig_node *orig_node; - int size, hash_added; + int hash_added; + size_t size; orig_node = batadv_orig_hash_find(bat_priv, addr); if (orig_node) @@ -893,7 +897,7 @@ batadv_iv_ogm_slide_own_bcast_window(struct batadv_hard_iface *hard_iface) u32 i; size_t word_index; u8 *w; - int if_num; + unsigned int if_num; for (i = 0; i < hash->size; i++) { head = &hash->table[i]; @@ -1023,7 +1027,7 @@ batadv_iv_ogm_orig_update(struct batadv_priv *bat_priv, struct batadv_neigh_node *tmp_neigh_node = NULL; struct batadv_neigh_node *router = NULL; struct batadv_orig_node *orig_node_tmp; - int if_num; + unsigned int if_num; u8 sum_orig, sum_neigh; u8 *neigh_addr; u8 tq_avg; @@ -1182,7 +1186,7 @@ static bool batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node, u8 total_count; u8 orig_eq_count, neigh_rq_count, neigh_rq_inv, tq_own; unsigned int neigh_rq_inv_cube, neigh_rq_max_cube; - int if_num; + unsigned int if_num; unsigned int tq_asym_penalty, inv_asym_penalty; unsigned int combined_tq; unsigned int tq_iface_penalty; @@ -1702,9 +1706,9 @@ static void batadv_iv_ogm_process(const struct sk_buff *skb, int ogm_offset, if (is_my_orig) { unsigned long *word; - int offset; + size_t offset; s32 bit_pos; - s16 if_num; + unsigned int if_num; u8 *weight; orig_neigh_node = batadv_iv_ogm_orig_get(bat_priv, @@ -2729,7 +2733,7 @@ static int batadv_iv_gw_dump_entry(struct sk_buff *msg, u32 portid, u32 seq, struct batadv_neigh_ifinfo *router_ifinfo = NULL; struct batadv_neigh_node *router; struct batadv_gw_node *curr_gw; - int ret = -EINVAL; + int ret = 0; void *hdr; router = batadv_orig_router_get(gw_node->orig_node, BATADV_IF_DEFAULT); diff --git a/net/batman-adv/bat_v.c b/net/batman-adv/bat_v.c index 9c3a34b65b15..ec93337ee259 100644 --- a/net/batman-adv/bat_v.c +++ b/net/batman-adv/bat_v.c @@ -928,7 +928,7 @@ static int batadv_v_gw_dump_entry(struct sk_buff *msg, u32 portid, u32 seq, struct batadv_neigh_ifinfo *router_ifinfo = NULL; struct batadv_neigh_node *router; struct batadv_gw_node *curr_gw; - int ret = -EINVAL; + int ret = 0; void *hdr; router = batadv_orig_router_get(gw_node->orig_node, BATADV_IF_DEFAULT); diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index 8ff81346ff0c..a2de5a44bd41 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -2161,22 +2161,25 @@ batadv_bla_claim_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq, { struct batadv_bla_claim *claim; int idx = 0; + int ret = 0; rcu_read_lock(); hlist_for_each_entry_rcu(claim, head, hash_entry) { if (idx++ < *idx_skip) continue; - if (batadv_bla_claim_dump_entry(msg, portid, seq, - primary_if, claim)) { + + ret = batadv_bla_claim_dump_entry(msg, portid, seq, + primary_if, claim); + if (ret) { *idx_skip = idx - 1; goto unlock; } } - *idx_skip = idx; + *idx_skip = 0; unlock: rcu_read_unlock(); - return 0; + return ret; } /** @@ -2391,22 +2394,25 @@ batadv_bla_backbone_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq, { struct batadv_bla_backbone_gw *backbone_gw; int idx = 0; + int ret = 0; rcu_read_lock(); hlist_for_each_entry_rcu(backbone_gw, head, hash_entry) { if (idx++ < *idx_skip) continue; - if (batadv_bla_backbone_dump_entry(msg, portid, seq, - primary_if, backbone_gw)) { + + ret = batadv_bla_backbone_dump_entry(msg, portid, seq, + primary_if, backbone_gw); + if (ret) { *idx_skip = idx - 1; goto unlock; } } - *idx_skip = idx; + *idx_skip = 0; unlock: rcu_read_unlock(); - return 0; + return ret; } /** diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c index 4469dcc1558f..a60bacf7120b 100644 --- a/net/batman-adv/distributed-arp-table.c +++ b/net/batman-adv/distributed-arp-table.c @@ -33,6 +33,7 @@ #include <linux/kernel.h> #include <linux/kref.h> #include <linux/list.h> +#include <linux/netlink.h> #include <linux/rculist.h> #include <linux/rcupdate.h> #include <linux/seq_file.h> @@ -43,13 +44,19 @@ #include <linux/string.h> #include <linux/workqueue.h> #include <net/arp.h> +#include <net/genetlink.h> +#include <net/netlink.h> +#include <net/sock.h> +#include <uapi/linux/batman_adv.h> #include "bridge_loop_avoidance.h" #include "hard-interface.h" #include "hash.h" #include "log.h" +#include "netlink.h" #include "originator.h" #include "send.h" +#include "soft-interface.h" #include "translation-table.h" #include "tvlv.h" @@ -393,7 +400,7 @@ static void batadv_dbg_arp(struct batadv_priv *bat_priv, struct sk_buff *skb, batadv_arp_hw_src(skb, hdr_size), &ip_src, batadv_arp_hw_dst(skb, hdr_size), &ip_dst); - if (hdr_size == 0) + if (hdr_size < sizeof(struct batadv_unicast_packet)) return; unicast_4addr_packet = (struct batadv_unicast_4addr_packet *)skb->data; @@ -852,6 +859,151 @@ out: #endif /** + * batadv_dat_cache_dump_entry() - dump one entry of the DAT cache table to a + * netlink socket + * @msg: buffer for the message + * @portid: netlink port + * @seq: Sequence number of netlink message + * @dat_entry: entry to dump + * + * Return: 0 or error code. + */ +static int +batadv_dat_cache_dump_entry(struct sk_buff *msg, u32 portid, u32 seq, + struct batadv_dat_entry *dat_entry) +{ + int msecs; + void *hdr; + + hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family, + NLM_F_MULTI, BATADV_CMD_GET_DAT_CACHE); + if (!hdr) + return -ENOBUFS; + + msecs = jiffies_to_msecs(jiffies - dat_entry->last_update); + + if (nla_put_in_addr(msg, BATADV_ATTR_DAT_CACHE_IP4ADDRESS, + dat_entry->ip) || + nla_put(msg, BATADV_ATTR_DAT_CACHE_HWADDRESS, ETH_ALEN, + dat_entry->mac_addr) || + nla_put_u16(msg, BATADV_ATTR_DAT_CACHE_VID, dat_entry->vid) || + nla_put_u32(msg, BATADV_ATTR_LAST_SEEN_MSECS, msecs)) { + genlmsg_cancel(msg, hdr); + return -EMSGSIZE; + } + + genlmsg_end(msg, hdr); + return 0; +} + +/** + * batadv_dat_cache_dump_bucket() - dump one bucket of the DAT cache table to + * a netlink socket + * @msg: buffer for the message + * @portid: netlink port + * @seq: Sequence number of netlink message + * @head: bucket to dump + * @idx_skip: How many entries to skip + * + * Return: 0 or error code. + */ +static int +batadv_dat_cache_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq, + struct hlist_head *head, int *idx_skip) +{ + struct batadv_dat_entry *dat_entry; + int idx = 0; + + rcu_read_lock(); + hlist_for_each_entry_rcu(dat_entry, head, hash_entry) { + if (idx < *idx_skip) + goto skip; + + if (batadv_dat_cache_dump_entry(msg, portid, seq, + dat_entry)) { + rcu_read_unlock(); + *idx_skip = idx; + + return -EMSGSIZE; + } + +skip: + idx++; + } + rcu_read_unlock(); + + return 0; +} + +/** + * batadv_dat_cache_dump() - dump DAT cache table to a netlink socket + * @msg: buffer for the message + * @cb: callback structure containing arguments + * + * Return: message length. + */ +int batadv_dat_cache_dump(struct sk_buff *msg, struct netlink_callback *cb) +{ + struct batadv_hard_iface *primary_if = NULL; + int portid = NETLINK_CB(cb->skb).portid; + struct net *net = sock_net(cb->skb->sk); + struct net_device *soft_iface; + struct batadv_hashtable *hash; + struct batadv_priv *bat_priv; + int bucket = cb->args[0]; + struct hlist_head *head; + int idx = cb->args[1]; + int ifindex; + int ret = 0; + + ifindex = batadv_netlink_get_ifindex(cb->nlh, + BATADV_ATTR_MESH_IFINDEX); + if (!ifindex) + return -EINVAL; + + soft_iface = dev_get_by_index(net, ifindex); + if (!soft_iface || !batadv_softif_is_valid(soft_iface)) { + ret = -ENODEV; + goto out; + } + + bat_priv = netdev_priv(soft_iface); + hash = bat_priv->dat.hash; + + primary_if = batadv_primary_if_get_selected(bat_priv); + if (!primary_if || primary_if->if_status != BATADV_IF_ACTIVE) { + ret = -ENOENT; + goto out; + } + + while (bucket < hash->size) { + head = &hash->table[bucket]; + + if (batadv_dat_cache_dump_bucket(msg, portid, + cb->nlh->nlmsg_seq, head, + &idx)) + break; + + bucket++; + idx = 0; + } + + cb->args[0] = bucket; + cb->args[1] = idx; + + ret = msg->len; + +out: + if (primary_if) + batadv_hardif_put(primary_if); + + if (soft_iface) + dev_put(soft_iface); + + return ret; +} + +/** * batadv_arp_get_type() - parse an ARP packet and gets the type * @bat_priv: the bat priv with all the soft interface information * @skb: packet to analyse diff --git a/net/batman-adv/distributed-arp-table.h b/net/batman-adv/distributed-arp-table.h index e24aa9601c52..a04596028337 100644 --- a/net/batman-adv/distributed-arp-table.h +++ b/net/batman-adv/distributed-arp-table.h @@ -28,6 +28,7 @@ #include "originator.h" +struct netlink_callback; struct seq_file; struct sk_buff; @@ -81,6 +82,7 @@ batadv_dat_init_own_addr(struct batadv_priv *bat_priv, int batadv_dat_init(struct batadv_priv *bat_priv); void batadv_dat_free(struct batadv_priv *bat_priv); int batadv_dat_cache_seq_print_text(struct seq_file *seq, void *offset); +int batadv_dat_cache_dump(struct sk_buff *msg, struct netlink_callback *cb); /** * batadv_dat_inc_counter() - increment the correct DAT packet counter @@ -169,6 +171,12 @@ static inline void batadv_dat_free(struct batadv_priv *bat_priv) { } +static inline int +batadv_dat_cache_dump(struct sk_buff *msg, struct netlink_callback *cb) +{ + return -EOPNOTSUPP; +} + static inline void batadv_dat_inc_counter(struct batadv_priv *bat_priv, u8 subtype) { diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c index d815acc13c35..0fddc17106bd 100644 --- a/net/batman-adv/fragmentation.c +++ b/net/batman-adv/fragmentation.c @@ -288,7 +288,8 @@ batadv_frag_merge_packets(struct hlist_head *chain) /* Move the existing MAC header to just before the payload. (Override * the fragment header.) */ - skb_pull_rcsum(skb_out, hdr_size); + skb_pull(skb_out, hdr_size); + skb_out->ip_summed = CHECKSUM_NONE; memmove(skb_out->data - ETH_HLEN, skb_mac_header(skb_out), ETH_HLEN); skb_set_mac_header(skb_out, -ETH_HLEN); skb_reset_network_header(skb_out); diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index fd4a263dd6b7..c405d15befd6 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -763,6 +763,11 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface, hard_iface->soft_iface = soft_iface; bat_priv = netdev_priv(hard_iface->soft_iface); + if (bat_priv->num_ifaces >= UINT_MAX) { + ret = -ENOSPC; + goto err_dev; + } + ret = netdev_master_upper_dev_link(hard_iface->net_dev, soft_iface, NULL, NULL, NULL); if (ret) @@ -876,7 +881,7 @@ void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface, batadv_hardif_recalc_extra_skbroom(hard_iface->soft_iface); /* nobody uses this interface anymore */ - if (!bat_priv->num_ifaces) { + if (bat_priv->num_ifaces == 0) { batadv_gw_check_client_stop(bat_priv); if (autodel == BATADV_IF_CLEANUP_AUTO) @@ -912,7 +917,7 @@ batadv_hardif_add_interface(struct net_device *net_dev) if (ret) goto free_if; - hard_iface->if_num = -1; + hard_iface->if_num = 0; hard_iface->net_dev = net_dev; hard_iface->soft_iface = NULL; hard_iface->if_status = BATADV_IF_NOT_IN_USE; diff --git a/net/batman-adv/icmp_socket.c b/net/batman-adv/icmp_socket.c index 7d5e9abb7a65..55c358ad3331 100644 --- a/net/batman-adv/icmp_socket.c +++ b/net/batman-adv/icmp_socket.c @@ -24,6 +24,7 @@ #include <linux/debugfs.h> #include <linux/errno.h> #include <linux/etherdevice.h> +#include <linux/eventpoll.h> #include <linux/export.h> #include <linux/fcntl.h> #include <linux/fs.h> diff --git a/net/batman-adv/log.c b/net/batman-adv/log.c index 52d8a4b848c0..853773e45f79 100644 --- a/net/batman-adv/log.c +++ b/net/batman-adv/log.c @@ -22,6 +22,7 @@ #include <linux/compiler.h> #include <linux/debugfs.h> #include <linux/errno.h> +#include <linux/eventpoll.h> #include <linux/export.h> #include <linux/fcntl.h> #include <linux/fs.h> diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c index 6eaffe50335a..de3a055f7dd8 100644 --- a/net/batman-adv/multicast.c +++ b/net/batman-adv/multicast.c @@ -40,6 +40,7 @@ #include <linux/list.h> #include <linux/lockdep.h> #include <linux/netdevice.h> +#include <linux/netlink.h> #include <linux/printk.h> #include <linux/rculist.h> #include <linux/rcupdate.h> @@ -52,14 +53,20 @@ #include <linux/types.h> #include <linux/workqueue.h> #include <net/addrconf.h> +#include <net/genetlink.h> #include <net/if_inet6.h> #include <net/ip.h> #include <net/ipv6.h> +#include <net/netlink.h> +#include <net/sock.h> #include <uapi/linux/batadv_packet.h> +#include <uapi/linux/batman_adv.h> #include "hard-interface.h" #include "hash.h" #include "log.h" +#include "netlink.h" +#include "soft-interface.h" #include "translation-table.h" #include "tvlv.h" @@ -102,7 +109,36 @@ static struct net_device *batadv_mcast_get_bridge(struct net_device *soft_iface) } /** + * batadv_mcast_addr_is_ipv4() - check if multicast MAC is IPv4 + * @addr: the MAC address to check + * + * Return: True, if MAC address is one reserved for IPv4 multicast, false + * otherwise. + */ +static bool batadv_mcast_addr_is_ipv4(const u8 *addr) +{ + static const u8 prefix[] = {0x01, 0x00, 0x5E}; + + return memcmp(prefix, addr, sizeof(prefix)) == 0; +} + +/** + * batadv_mcast_addr_is_ipv6() - check if multicast MAC is IPv6 + * @addr: the MAC address to check + * + * Return: True, if MAC address is one reserved for IPv6 multicast, false + * otherwise. + */ +static bool batadv_mcast_addr_is_ipv6(const u8 *addr) +{ + static const u8 prefix[] = {0x33, 0x33}; + + return memcmp(prefix, addr, sizeof(prefix)) == 0; +} + +/** * batadv_mcast_mla_softif_get() - get softif multicast listeners + * @bat_priv: the bat priv with all the soft interface information * @dev: the device to collect multicast addresses from * @mcast_list: a list to put found addresses into * @@ -119,9 +155,12 @@ static struct net_device *batadv_mcast_get_bridge(struct net_device *soft_iface) * Return: -ENOMEM on memory allocation error or the number of * items added to the mcast_list otherwise. */ -static int batadv_mcast_mla_softif_get(struct net_device *dev, +static int batadv_mcast_mla_softif_get(struct batadv_priv *bat_priv, + struct net_device *dev, struct hlist_head *mcast_list) { + bool all_ipv4 = bat_priv->mcast.flags & BATADV_MCAST_WANT_ALL_IPV4; + bool all_ipv6 = bat_priv->mcast.flags & BATADV_MCAST_WANT_ALL_IPV6; struct net_device *bridge = batadv_mcast_get_bridge(dev); struct netdev_hw_addr *mc_list_entry; struct batadv_hw_addr *new; @@ -129,6 +168,12 @@ static int batadv_mcast_mla_softif_get(struct net_device *dev, netif_addr_lock_bh(bridge ? bridge : dev); netdev_for_each_mc_addr(mc_list_entry, bridge ? bridge : dev) { + if (all_ipv4 && batadv_mcast_addr_is_ipv4(mc_list_entry->addr)) + continue; + + if (all_ipv6 && batadv_mcast_addr_is_ipv6(mc_list_entry->addr)) + continue; + new = kmalloc(sizeof(*new), GFP_ATOMIC); if (!new) { ret = -ENOMEM; @@ -193,6 +238,7 @@ static void batadv_mcast_mla_br_addr_cpy(char *dst, const struct br_ip *src) /** * batadv_mcast_mla_bridge_get() - get bridged-in multicast listeners + * @bat_priv: the bat priv with all the soft interface information * @dev: a bridge slave whose bridge to collect multicast addresses from * @mcast_list: a list to put found addresses into * @@ -204,10 +250,13 @@ static void batadv_mcast_mla_br_addr_cpy(char *dst, const struct br_ip *src) * Return: -ENOMEM on memory allocation error or the number of * items added to the mcast_list otherwise. */ -static int batadv_mcast_mla_bridge_get(struct net_device *dev, +static int batadv_mcast_mla_bridge_get(struct batadv_priv *bat_priv, + struct net_device *dev, struct hlist_head *mcast_list) { struct list_head bridge_mcast_list = LIST_HEAD_INIT(bridge_mcast_list); + bool all_ipv4 = bat_priv->mcast.flags & BATADV_MCAST_WANT_ALL_IPV4; + bool all_ipv6 = bat_priv->mcast.flags & BATADV_MCAST_WANT_ALL_IPV6; struct br_ip_list *br_ip_entry, *tmp; struct batadv_hw_addr *new; u8 mcast_addr[ETH_ALEN]; @@ -221,6 +270,12 @@ static int batadv_mcast_mla_bridge_get(struct net_device *dev, goto out; list_for_each_entry(br_ip_entry, &bridge_mcast_list, list) { + if (all_ipv4 && br_ip_entry->addr.proto == htons(ETH_P_IP)) + continue; + + if (all_ipv6 && br_ip_entry->addr.proto == htons(ETH_P_IPV6)) + continue; + batadv_mcast_mla_br_addr_cpy(mcast_addr, &br_ip_entry->addr); if (batadv_mcast_mla_is_duplicate(mcast_addr, mcast_list)) continue; @@ -543,8 +598,8 @@ update: bat_priv->mcast.enabled = true; } - return !(mcast_data.flags & - (BATADV_MCAST_WANT_ALL_IPV4 | BATADV_MCAST_WANT_ALL_IPV6)); + return !(mcast_data.flags & BATADV_MCAST_WANT_ALL_IPV4 && + mcast_data.flags & BATADV_MCAST_WANT_ALL_IPV6); } /** @@ -568,11 +623,11 @@ static void __batadv_mcast_mla_update(struct batadv_priv *bat_priv) if (!batadv_mcast_mla_tvlv_update(bat_priv)) goto update; - ret = batadv_mcast_mla_softif_get(soft_iface, &mcast_list); + ret = batadv_mcast_mla_softif_get(bat_priv, soft_iface, &mcast_list); if (ret < 0) goto out; - ret = batadv_mcast_mla_bridge_get(soft_iface, &mcast_list); + ret = batadv_mcast_mla_bridge_get(bat_priv, soft_iface, &mcast_list); if (ret < 0) goto out; @@ -1286,6 +1341,236 @@ int batadv_mcast_flags_seq_print_text(struct seq_file *seq, void *offset) #endif /** + * batadv_mcast_mesh_info_put() - put multicast info into a netlink message + * @msg: buffer for the message + * @bat_priv: the bat priv with all the soft interface information + * + * Return: 0 or error code. + */ +int batadv_mcast_mesh_info_put(struct sk_buff *msg, + struct batadv_priv *bat_priv) +{ + u32 flags = bat_priv->mcast.flags; + u32 flags_priv = BATADV_NO_FLAGS; + + if (bat_priv->mcast.bridged) { + flags_priv |= BATADV_MCAST_FLAGS_BRIDGED; + + if (bat_priv->mcast.querier_ipv4.exists) + flags_priv |= BATADV_MCAST_FLAGS_QUERIER_IPV4_EXISTS; + if (bat_priv->mcast.querier_ipv6.exists) + flags_priv |= BATADV_MCAST_FLAGS_QUERIER_IPV6_EXISTS; + if (bat_priv->mcast.querier_ipv4.shadowing) + flags_priv |= BATADV_MCAST_FLAGS_QUERIER_IPV4_SHADOWING; + if (bat_priv->mcast.querier_ipv6.shadowing) + flags_priv |= BATADV_MCAST_FLAGS_QUERIER_IPV6_SHADOWING; + } + + if (nla_put_u32(msg, BATADV_ATTR_MCAST_FLAGS, flags) || + nla_put_u32(msg, BATADV_ATTR_MCAST_FLAGS_PRIV, flags_priv)) + return -EMSGSIZE; + + return 0; +} + +/** + * batadv_mcast_flags_dump_entry() - dump one entry of the multicast flags table + * to a netlink socket + * @msg: buffer for the message + * @portid: netlink port + * @seq: Sequence number of netlink message + * @orig_node: originator to dump the multicast flags of + * + * Return: 0 or error code. + */ +static int +batadv_mcast_flags_dump_entry(struct sk_buff *msg, u32 portid, u32 seq, + struct batadv_orig_node *orig_node) +{ + void *hdr; + + hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family, + NLM_F_MULTI, BATADV_CMD_GET_MCAST_FLAGS); + if (!hdr) + return -ENOBUFS; + + if (nla_put(msg, BATADV_ATTR_ORIG_ADDRESS, ETH_ALEN, + orig_node->orig)) { + genlmsg_cancel(msg, hdr); + return -EMSGSIZE; + } + + if (test_bit(BATADV_ORIG_CAPA_HAS_MCAST, + &orig_node->capabilities)) { + if (nla_put_u32(msg, BATADV_ATTR_MCAST_FLAGS, + orig_node->mcast_flags)) { + genlmsg_cancel(msg, hdr); + return -EMSGSIZE; + } + } + + genlmsg_end(msg, hdr); + return 0; +} + +/** + * batadv_mcast_flags_dump_bucket() - dump one bucket of the multicast flags + * table to a netlink socket + * @msg: buffer for the message + * @portid: netlink port + * @seq: Sequence number of netlink message + * @head: bucket to dump + * @idx_skip: How many entries to skip + * + * Return: 0 or error code. + */ +static int +batadv_mcast_flags_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq, + struct hlist_head *head, long *idx_skip) +{ + struct batadv_orig_node *orig_node; + long idx = 0; + + rcu_read_lock(); + hlist_for_each_entry_rcu(orig_node, head, hash_entry) { + if (!test_bit(BATADV_ORIG_CAPA_HAS_MCAST, + &orig_node->capa_initialized)) + continue; + + if (idx < *idx_skip) + goto skip; + + if (batadv_mcast_flags_dump_entry(msg, portid, seq, + orig_node)) { + rcu_read_unlock(); + *idx_skip = idx; + + return -EMSGSIZE; + } + +skip: + idx++; + } + rcu_read_unlock(); + + return 0; +} + +/** + * __batadv_mcast_flags_dump() - dump multicast flags table to a netlink socket + * @msg: buffer for the message + * @portid: netlink port + * @seq: Sequence number of netlink message + * @bat_priv: the bat priv with all the soft interface information + * @bucket: current bucket to dump + * @idx: index in current bucket to the next entry to dump + * + * Return: 0 or error code. + */ +static int +__batadv_mcast_flags_dump(struct sk_buff *msg, u32 portid, u32 seq, + struct batadv_priv *bat_priv, long *bucket, long *idx) +{ + struct batadv_hashtable *hash = bat_priv->orig_hash; + long bucket_tmp = *bucket; + struct hlist_head *head; + long idx_tmp = *idx; + + while (bucket_tmp < hash->size) { + head = &hash->table[bucket_tmp]; + + if (batadv_mcast_flags_dump_bucket(msg, portid, seq, head, + &idx_tmp)) + break; + + bucket_tmp++; + idx_tmp = 0; + } + + *bucket = bucket_tmp; + *idx = idx_tmp; + + return msg->len; +} + +/** + * batadv_mcast_netlink_get_primary() - get primary interface from netlink + * callback + * @cb: netlink callback structure + * @primary_if: the primary interface pointer to return the result in + * + * Return: 0 or error code. + */ +static int +batadv_mcast_netlink_get_primary(struct netlink_callback *cb, + struct batadv_hard_iface **primary_if) +{ + struct batadv_hard_iface *hard_iface = NULL; + struct net *net = sock_net(cb->skb->sk); + struct net_device *soft_iface; + struct batadv_priv *bat_priv; + int ifindex; + int ret = 0; + + ifindex = batadv_netlink_get_ifindex(cb->nlh, BATADV_ATTR_MESH_IFINDEX); + if (!ifindex) + return -EINVAL; + + soft_iface = dev_get_by_index(net, ifindex); + if (!soft_iface || !batadv_softif_is_valid(soft_iface)) { + ret = -ENODEV; + goto out; + } + + bat_priv = netdev_priv(soft_iface); + + hard_iface = batadv_primary_if_get_selected(bat_priv); + if (!hard_iface || hard_iface->if_status != BATADV_IF_ACTIVE) { + ret = -ENOENT; + goto out; + } + +out: + if (soft_iface) + dev_put(soft_iface); + + if (!ret && primary_if) + *primary_if = hard_iface; + else + batadv_hardif_put(hard_iface); + + return ret; +} + +/** + * batadv_mcast_flags_dump() - dump multicast flags table to a netlink socket + * @msg: buffer for the message + * @cb: callback structure containing arguments + * + * Return: message length. + */ +int batadv_mcast_flags_dump(struct sk_buff *msg, struct netlink_callback *cb) +{ + struct batadv_hard_iface *primary_if = NULL; + int portid = NETLINK_CB(cb->skb).portid; + struct batadv_priv *bat_priv; + long *bucket = &cb->args[0]; + long *idx = &cb->args[1]; + int ret; + + ret = batadv_mcast_netlink_get_primary(cb, &primary_if); + if (ret) + return ret; + + bat_priv = netdev_priv(primary_if->soft_iface); + ret = __batadv_mcast_flags_dump(msg, portid, cb->nlh->nlmsg_seq, + bat_priv, bucket, idx); + + batadv_hardif_put(primary_if); + return ret; +} + +/** * batadv_mcast_free() - free the multicast optimizations structures * @bat_priv: the bat priv with all the soft interface information */ diff --git a/net/batman-adv/multicast.h b/net/batman-adv/multicast.h index 6b8594e23da3..3b04ab13f0eb 100644 --- a/net/batman-adv/multicast.h +++ b/net/batman-adv/multicast.h @@ -21,6 +21,7 @@ #include "main.h" +struct netlink_callback; struct seq_file; struct sk_buff; @@ -54,6 +55,11 @@ void batadv_mcast_init(struct batadv_priv *bat_priv); int batadv_mcast_flags_seq_print_text(struct seq_file *seq, void *offset); +int batadv_mcast_mesh_info_put(struct sk_buff *msg, + struct batadv_priv *bat_priv); + +int batadv_mcast_flags_dump(struct sk_buff *msg, struct netlink_callback *cb); + void batadv_mcast_free(struct batadv_priv *bat_priv); void batadv_mcast_purge_orig(struct batadv_orig_node *orig_node); @@ -72,6 +78,18 @@ static inline int batadv_mcast_init(struct batadv_priv *bat_priv) return 0; } +static inline int +batadv_mcast_mesh_info_put(struct sk_buff *msg, struct batadv_priv *bat_priv) +{ + return 0; +} + +static inline int batadv_mcast_flags_dump(struct sk_buff *msg, + struct netlink_callback *cb) +{ + return -EOPNOTSUPP; +} + static inline void batadv_mcast_free(struct batadv_priv *bat_priv) { } diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c index 129af56b944d..0d9459b69bdb 100644 --- a/net/batman-adv/netlink.c +++ b/net/batman-adv/netlink.c @@ -45,8 +45,10 @@ #include "bat_algo.h" #include "bridge_loop_avoidance.h" +#include "distributed-arp-table.h" #include "gateway_client.h" #include "hard-interface.h" +#include "multicast.h" #include "originator.h" #include "soft-interface.h" #include "tp_meter.h" @@ -64,39 +66,44 @@ static const struct genl_multicast_group batadv_netlink_mcgrps[] = { }; static const struct nla_policy batadv_netlink_policy[NUM_BATADV_ATTR] = { - [BATADV_ATTR_VERSION] = { .type = NLA_STRING }, - [BATADV_ATTR_ALGO_NAME] = { .type = NLA_STRING }, - [BATADV_ATTR_MESH_IFINDEX] = { .type = NLA_U32 }, - [BATADV_ATTR_MESH_IFNAME] = { .type = NLA_STRING }, - [BATADV_ATTR_MESH_ADDRESS] = { .len = ETH_ALEN }, - [BATADV_ATTR_HARD_IFINDEX] = { .type = NLA_U32 }, - [BATADV_ATTR_HARD_IFNAME] = { .type = NLA_STRING }, - [BATADV_ATTR_HARD_ADDRESS] = { .len = ETH_ALEN }, - [BATADV_ATTR_ORIG_ADDRESS] = { .len = ETH_ALEN }, - [BATADV_ATTR_TPMETER_RESULT] = { .type = NLA_U8 }, - [BATADV_ATTR_TPMETER_TEST_TIME] = { .type = NLA_U32 }, - [BATADV_ATTR_TPMETER_BYTES] = { .type = NLA_U64 }, - [BATADV_ATTR_TPMETER_COOKIE] = { .type = NLA_U32 }, - [BATADV_ATTR_ACTIVE] = { .type = NLA_FLAG }, - [BATADV_ATTR_TT_ADDRESS] = { .len = ETH_ALEN }, - [BATADV_ATTR_TT_TTVN] = { .type = NLA_U8 }, - [BATADV_ATTR_TT_LAST_TTVN] = { .type = NLA_U8 }, - [BATADV_ATTR_TT_CRC32] = { .type = NLA_U32 }, - [BATADV_ATTR_TT_VID] = { .type = NLA_U16 }, - [BATADV_ATTR_TT_FLAGS] = { .type = NLA_U32 }, - [BATADV_ATTR_FLAG_BEST] = { .type = NLA_FLAG }, - [BATADV_ATTR_LAST_SEEN_MSECS] = { .type = NLA_U32 }, - [BATADV_ATTR_NEIGH_ADDRESS] = { .len = ETH_ALEN }, - [BATADV_ATTR_TQ] = { .type = NLA_U8 }, - [BATADV_ATTR_THROUGHPUT] = { .type = NLA_U32 }, - [BATADV_ATTR_BANDWIDTH_UP] = { .type = NLA_U32 }, - [BATADV_ATTR_BANDWIDTH_DOWN] = { .type = NLA_U32 }, - [BATADV_ATTR_ROUTER] = { .len = ETH_ALEN }, - [BATADV_ATTR_BLA_OWN] = { .type = NLA_FLAG }, - [BATADV_ATTR_BLA_ADDRESS] = { .len = ETH_ALEN }, - [BATADV_ATTR_BLA_VID] = { .type = NLA_U16 }, - [BATADV_ATTR_BLA_BACKBONE] = { .len = ETH_ALEN }, - [BATADV_ATTR_BLA_CRC] = { .type = NLA_U16 }, + [BATADV_ATTR_VERSION] = { .type = NLA_STRING }, + [BATADV_ATTR_ALGO_NAME] = { .type = NLA_STRING }, + [BATADV_ATTR_MESH_IFINDEX] = { .type = NLA_U32 }, + [BATADV_ATTR_MESH_IFNAME] = { .type = NLA_STRING }, + [BATADV_ATTR_MESH_ADDRESS] = { .len = ETH_ALEN }, + [BATADV_ATTR_HARD_IFINDEX] = { .type = NLA_U32 }, + [BATADV_ATTR_HARD_IFNAME] = { .type = NLA_STRING }, + [BATADV_ATTR_HARD_ADDRESS] = { .len = ETH_ALEN }, + [BATADV_ATTR_ORIG_ADDRESS] = { .len = ETH_ALEN }, + [BATADV_ATTR_TPMETER_RESULT] = { .type = NLA_U8 }, + [BATADV_ATTR_TPMETER_TEST_TIME] = { .type = NLA_U32 }, + [BATADV_ATTR_TPMETER_BYTES] = { .type = NLA_U64 }, + [BATADV_ATTR_TPMETER_COOKIE] = { .type = NLA_U32 }, + [BATADV_ATTR_ACTIVE] = { .type = NLA_FLAG }, + [BATADV_ATTR_TT_ADDRESS] = { .len = ETH_ALEN }, + [BATADV_ATTR_TT_TTVN] = { .type = NLA_U8 }, + [BATADV_ATTR_TT_LAST_TTVN] = { .type = NLA_U8 }, + [BATADV_ATTR_TT_CRC32] = { .type = NLA_U32 }, + [BATADV_ATTR_TT_VID] = { .type = NLA_U16 }, + [BATADV_ATTR_TT_FLAGS] = { .type = NLA_U32 }, + [BATADV_ATTR_FLAG_BEST] = { .type = NLA_FLAG }, + [BATADV_ATTR_LAST_SEEN_MSECS] = { .type = NLA_U32 }, + [BATADV_ATTR_NEIGH_ADDRESS] = { .len = ETH_ALEN }, + [BATADV_ATTR_TQ] = { .type = NLA_U8 }, + [BATADV_ATTR_THROUGHPUT] = { .type = NLA_U32 }, + [BATADV_ATTR_BANDWIDTH_UP] = { .type = NLA_U32 }, + [BATADV_ATTR_BANDWIDTH_DOWN] = { .type = NLA_U32 }, + [BATADV_ATTR_ROUTER] = { .len = ETH_ALEN }, + [BATADV_ATTR_BLA_OWN] = { .type = NLA_FLAG }, + [BATADV_ATTR_BLA_ADDRESS] = { .len = ETH_ALEN }, + [BATADV_ATTR_BLA_VID] = { .type = NLA_U16 }, + [BATADV_ATTR_BLA_BACKBONE] = { .len = ETH_ALEN }, + [BATADV_ATTR_BLA_CRC] = { .type = NLA_U16 }, + [BATADV_ATTR_DAT_CACHE_IP4ADDRESS] = { .type = NLA_U32 }, + [BATADV_ATTR_DAT_CACHE_HWADDRESS] = { .len = ETH_ALEN }, + [BATADV_ATTR_DAT_CACHE_VID] = { .type = NLA_U16 }, + [BATADV_ATTR_MCAST_FLAGS] = { .type = NLA_U32 }, + [BATADV_ATTR_MCAST_FLAGS_PRIV] = { .type = NLA_U32 }, }; /** @@ -147,6 +154,9 @@ batadv_netlink_mesh_info_put(struct sk_buff *msg, struct net_device *soft_iface) goto out; #endif + if (batadv_mcast_mesh_info_put(msg, bat_priv)) + goto out; + primary_if = batadv_primary_if_get_selected(bat_priv); if (primary_if && primary_if->if_status == BATADV_IF_ACTIVE) { hard_iface = primary_if->net_dev; @@ -604,6 +614,18 @@ static const struct genl_ops batadv_netlink_ops[] = { .policy = batadv_netlink_policy, .dumpit = batadv_bla_backbone_dump, }, + { + .cmd = BATADV_CMD_GET_DAT_CACHE, + .flags = GENL_ADMIN_PERM, + .policy = batadv_netlink_policy, + .dumpit = batadv_dat_cache_dump, + }, + { + .cmd = BATADV_CMD_GET_MCAST_FLAGS, + .flags = GENL_ADMIN_PERM, + .policy = batadv_netlink_policy, + .dumpit = batadv_mcast_flags_dump, + }, }; diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index 2a51a0cbb82a..716e5b43acfa 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -1569,7 +1569,7 @@ int batadv_orig_dump(struct sk_buff *msg, struct netlink_callback *cb) * Return: 0 on success or negative error number in case of failure */ int batadv_orig_hash_add_if(struct batadv_hard_iface *hard_iface, - int max_if_num) + unsigned int max_if_num) { struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); struct batadv_algo_ops *bao = bat_priv->algo_ops; @@ -1611,7 +1611,7 @@ err: * Return: 0 on success or negative error number in case of failure */ int batadv_orig_hash_del_if(struct batadv_hard_iface *hard_iface, - int max_if_num) + unsigned int max_if_num) { struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); struct batadv_hashtable *hash = bat_priv->orig_hash; diff --git a/net/batman-adv/originator.h b/net/batman-adv/originator.h index f3601ab0872e..3b3f59b881e1 100644 --- a/net/batman-adv/originator.h +++ b/net/batman-adv/originator.h @@ -73,9 +73,9 @@ int batadv_orig_seq_print_text(struct seq_file *seq, void *offset); int batadv_orig_dump(struct sk_buff *msg, struct netlink_callback *cb); int batadv_orig_hardif_seq_print_text(struct seq_file *seq, void *offset); int batadv_orig_hash_add_if(struct batadv_hard_iface *hard_iface, - int max_if_num); + unsigned int max_if_num); int batadv_orig_hash_del_if(struct batadv_hard_iface *hard_iface, - int max_if_num); + unsigned int max_if_num); struct batadv_orig_node_vlan * batadv_orig_node_vlan_new(struct batadv_orig_node *orig_node, unsigned short vid); diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index 289df027ecdd..cc3ed93a6d51 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -759,6 +759,7 @@ free_skb: /** * batadv_reroute_unicast_packet() - update the unicast header for re-routing * @bat_priv: the bat priv with all the soft interface information + * @skb: unicast packet to process * @unicast_packet: the unicast header to be updated * @dst_addr: the payload destination * @vid: VLAN identifier @@ -770,7 +771,7 @@ free_skb: * Return: true if the packet header has been updated, false otherwise */ static bool -batadv_reroute_unicast_packet(struct batadv_priv *bat_priv, +batadv_reroute_unicast_packet(struct batadv_priv *bat_priv, struct sk_buff *skb, struct batadv_unicast_packet *unicast_packet, u8 *dst_addr, unsigned short vid) { @@ -799,8 +800,10 @@ batadv_reroute_unicast_packet(struct batadv_priv *bat_priv, } /* update the packet header */ + skb_postpull_rcsum(skb, unicast_packet, sizeof(*unicast_packet)); ether_addr_copy(unicast_packet->dest, orig_addr); unicast_packet->ttvn = orig_ttvn; + skb_postpush_rcsum(skb, unicast_packet, sizeof(*unicast_packet)); ret = true; out: @@ -841,7 +844,7 @@ static bool batadv_check_unicast_ttvn(struct batadv_priv *bat_priv, * the packet to */ if (batadv_tt_local_client_is_roaming(bat_priv, ethhdr->h_dest, vid)) { - if (batadv_reroute_unicast_packet(bat_priv, unicast_packet, + if (batadv_reroute_unicast_packet(bat_priv, skb, unicast_packet, ethhdr->h_dest, vid)) batadv_dbg_ratelimited(BATADV_DBG_TT, bat_priv, @@ -887,7 +890,7 @@ static bool batadv_check_unicast_ttvn(struct batadv_priv *bat_priv, * destination can possibly be updated and forwarded towards the new * target host */ - if (batadv_reroute_unicast_packet(bat_priv, unicast_packet, + if (batadv_reroute_unicast_packet(bat_priv, skb, unicast_packet, ethhdr->h_dest, vid)) { batadv_dbg_ratelimited(BATADV_DBG_TT, bat_priv, "Rerouting unicast packet to %pM (dst=%pM): TTVN mismatch old_ttvn=%u new_ttvn=%u\n", @@ -910,12 +913,14 @@ static bool batadv_check_unicast_ttvn(struct batadv_priv *bat_priv, if (!primary_if) return false; + /* update the packet header */ + skb_postpull_rcsum(skb, unicast_packet, sizeof(*unicast_packet)); ether_addr_copy(unicast_packet->dest, primary_if->net_dev->dev_addr); + unicast_packet->ttvn = curr_ttvn; + skb_postpush_rcsum(skb, unicast_packet, sizeof(*unicast_packet)); batadv_hardif_put(primary_if); - unicast_packet->ttvn = curr_ttvn; - return true; } @@ -968,14 +973,10 @@ int batadv_recv_unicast_packet(struct sk_buff *skb, struct batadv_orig_node *orig_node = NULL, *orig_node_gw = NULL; int check, hdr_size = sizeof(*unicast_packet); enum batadv_subtype subtype; - struct ethhdr *ethhdr; int ret = NET_RX_DROP; bool is4addr, is_gw; unicast_packet = (struct batadv_unicast_packet *)skb->data; - unicast_4addr_packet = (struct batadv_unicast_4addr_packet *)skb->data; - ethhdr = eth_hdr(skb); - is4addr = unicast_packet->packet_type == BATADV_UNICAST_4ADDR; /* the caller function should have already pulled 2 bytes */ if (is4addr) @@ -995,12 +996,14 @@ int batadv_recv_unicast_packet(struct sk_buff *skb, if (!batadv_check_unicast_ttvn(bat_priv, skb, hdr_size)) goto free_skb; + unicast_packet = (struct batadv_unicast_packet *)skb->data; + /* packet for me */ if (batadv_is_my_mac(bat_priv, unicast_packet->dest)) { /* If this is a unicast packet from another backgone gw, * drop it. */ - orig_addr_gw = ethhdr->h_source; + orig_addr_gw = eth_hdr(skb)->h_source; orig_node_gw = batadv_orig_hash_find(bat_priv, orig_addr_gw); if (orig_node_gw) { is_gw = batadv_bla_is_backbone_gw(skb, orig_node_gw, @@ -1015,6 +1018,8 @@ int batadv_recv_unicast_packet(struct sk_buff *skb, } if (is4addr) { + unicast_4addr_packet = + (struct batadv_unicast_4addr_packet *)skb->data; subtype = unicast_4addr_packet->subtype; batadv_dat_inc_counter(bat_priv, subtype); diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index c95e2b2677fd..edeffcb9f3a2 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -459,13 +459,7 @@ void batadv_interface_rx(struct net_device *soft_iface, /* skb->dev & skb->pkt_type are set here */ skb->protocol = eth_type_trans(skb, soft_iface); - - /* should not be necessary anymore as we use skb_pull_rcsum() - * TODO: please verify this and remove this TODO - * -- Dec 21st 2009, Simon Wunderlich - */ - - /* skb->ip_summed = CHECKSUM_UNNECESSARY; */ + skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); batadv_inc_counter(bat_priv, BATADV_CNT_RX); batadv_add_counter(bat_priv, BATADV_CNT_RX_BYTES, diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index 4a3b8837e1b5..476b052ad982 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -167,7 +167,7 @@ struct batadv_hard_iface { struct list_head list; /** @if_num: identificator of the interface */ - s16 if_num; + unsigned int if_num; /** @if_status: status of the interface for batman-adv */ char if_status; @@ -1596,7 +1596,7 @@ struct batadv_priv { atomic_t batman_queue_left; /** @num_ifaces: number of interfaces assigned to this mesh interface */ - char num_ifaces; + unsigned int num_ifaces; /** @mesh_obj: kobject for sysfs mesh subdirectory */ struct kobject *mesh_obj; @@ -2186,15 +2186,16 @@ struct batadv_algo_orig_ops { * orig_node due to a new hard-interface being added into the mesh * (optional) */ - int (*add_if)(struct batadv_orig_node *orig_node, int max_if_num); + int (*add_if)(struct batadv_orig_node *orig_node, + unsigned int max_if_num); /** * @del_if: ask the routing algorithm to apply the needed changes to the * orig_node due to an hard-interface being removed from the mesh * (optional) */ - int (*del_if)(struct batadv_orig_node *orig_node, int max_if_num, - int del_if_num); + int (*del_if)(struct batadv_orig_node *orig_node, + unsigned int max_if_num, unsigned int del_if_num); #ifdef CONFIG_BATMAN_ADV_DEBUGFS /** @print: print the originator table (optional) */ diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c index 5f3074cb6b4d..5e44d842cc5d 100644 --- a/net/bluetooth/rfcomm/tty.c +++ b/net/bluetooth/rfcomm/tty.c @@ -210,8 +210,8 @@ static ssize_t show_channel(struct device *tty_dev, struct device_attribute *att return sprintf(buf, "%d\n", dev->channel); } -static DEVICE_ATTR(address, S_IRUGO, show_address, NULL); -static DEVICE_ATTR(channel, S_IRUGO, show_channel, NULL); +static DEVICE_ATTR(address, 0444, show_address, NULL); +static DEVICE_ATTR(channel, 0444, show_channel, NULL); static struct rfcomm_dev *__rfcomm_dev_add(struct rfcomm_dev_req *req, struct rfcomm_dlc *dlc) diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c index 01117ae84f1d..a2ddae2f37d7 100644 --- a/net/bluetooth/smp.c +++ b/net/bluetooth/smp.c @@ -2296,8 +2296,14 @@ static u8 smp_cmd_security_req(struct l2cap_conn *conn, struct sk_buff *skb) else sec_level = authreq_to_seclevel(auth); - if (smp_sufficient_security(hcon, sec_level, SMP_USE_LTK)) + if (smp_sufficient_security(hcon, sec_level, SMP_USE_LTK)) { + /* If link is already encrypted with sufficient security we + * still need refresh encryption as per Core Spec 5.0 Vol 3, + * Part H 2.4.6 + */ + smp_ltk_encrypt(conn, hcon->sec_level); return 0; + } if (sec_level > hcon->pending_sec_level) hcon->pending_sec_level = sec_level; diff --git a/net/bridge/br.c b/net/bridge/br.c index 7770481a6506..26e1616b2c90 100644 --- a/net/bridge/br.c +++ b/net/bridge/br.c @@ -52,7 +52,7 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v switch (event) { case NETDEV_CHANGEMTU: - dev_set_mtu(br->dev, br_min_mtu(br)); + dev_set_mtu(br->dev, br_mtu(br)); break; case NETDEV_CHANGEADDR: @@ -188,7 +188,6 @@ static void __net_exit br_net_exit(struct net *net) static struct pernet_operations br_net_ops = { .exit = br_net_exit, - .async = true, }; static const struct stp_proto br_stp_proto = { diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 1285ca30ab0a..278fc999d355 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -224,7 +224,7 @@ static void br_get_stats64(struct net_device *dev, static int br_change_mtu(struct net_device *dev, int new_mtu) { struct net_bridge *br = netdev_priv(dev); - if (new_mtu > br_min_mtu(br)) + if (new_mtu > br_mtu(br)) return -EINVAL; dev->mtu = new_mtu; diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 9ba4ed65c52b..87b2afd455c7 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -424,8 +424,18 @@ int br_del_bridge(struct net *net, const char *name) return ret; } +static bool min_mtu(int a, int b) +{ + return a < b ? 1 : 0; +} + +static bool max_mtu(int a, int b) +{ + return a > b ? 1 : 0; +} + /* MTU of the bridge pseudo-device: ETH_DATA_LEN or the minimum of the ports */ -int br_min_mtu(const struct net_bridge *br) +static int __br_mtu(const struct net_bridge *br, bool (compare_fn)(int, int)) { const struct net_bridge_port *p; int mtu = 0; @@ -436,13 +446,21 @@ int br_min_mtu(const struct net_bridge *br) mtu = ETH_DATA_LEN; else { list_for_each_entry(p, &br->port_list, list) { - if (!mtu || p->dev->mtu < mtu) + if (!mtu || compare_fn(p->dev->mtu, mtu)) mtu = p->dev->mtu; } } return mtu; } +int br_mtu(const struct net_bridge *br) +{ + if (br_vlan_enabled(br->dev)) + return __br_mtu(br, max_mtu); + else + return __br_mtu(br, min_mtu); +} + static void br_set_gso_limits(struct net_bridge *br) { unsigned int gso_max_size = GSO_MAX_SIZE; @@ -594,7 +612,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev, if (changed_addr) call_netdevice_notifiers(NETDEV_CHANGEADDR, br->dev); - dev_set_mtu(br->dev, br_min_mtu(br)); + dev_set_mtu(br->dev, br_mtu(br)); br_set_gso_limits(br); kobject_uevent(&p->kobj, KOBJ_ADD); @@ -641,7 +659,7 @@ int br_del_if(struct net_bridge *br, struct net_device *dev) */ del_nbp(p); - dev_set_mtu(br->dev, br_min_mtu(br)); + dev_set_mtu(br->dev, br_mtu(br)); br_set_gso_limits(br); spin_lock_bh(&br->lock); diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index 484f54150525..9b16eaf33819 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -214,7 +214,7 @@ static int br_validate_ipv4(struct net *net, struct sk_buff *skb) iph = ip_hdr(skb); if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl))) - goto inhdr_error; + goto csum_error; len = ntohs(iph->tot_len); if (skb->len < len) { @@ -236,6 +236,8 @@ static int br_validate_ipv4(struct net *net, struct sk_buff *skb) */ return 0; +csum_error: + __IP_INC_STATS(net, IPSTATS_MIB_CSUMERRORS); inhdr_error: __IP_INC_STATS(net, IPSTATS_MIB_INHDRERRORS); drop: @@ -967,7 +969,6 @@ static struct pernet_operations brnf_net_ops __read_mostly = { .exit = brnf_exit_net, .id = &brnf_net_id, .size = sizeof(struct brnf_net), - .async = true, }; static struct notifier_block brnf_notifier __read_mostly = { diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 8e13a64d8c99..048d5b51813b 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -578,7 +578,7 @@ int br_del_bridge(struct net *net, const char *name); int br_add_if(struct net_bridge *br, struct net_device *dev, struct netlink_ext_ack *extack); int br_del_if(struct net_bridge *br, struct net_device *dev); -int br_min_mtu(const struct net_bridge *br); +int br_mtu(const struct net_bridge *br); netdev_features_t br_features_recompute(struct net_bridge *br, netdev_features_t features); void br_port_flags_change(struct net_bridge_port *port, unsigned long mask); diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c index b1be0dcfba6b..0318a69888d4 100644 --- a/net/bridge/br_sysfs_br.c +++ b/net/bridge/br_sysfs_br.c @@ -893,7 +893,7 @@ static ssize_t brforward_read(struct file *filp, struct kobject *kobj, static struct bin_attribute bridge_forward = { .attr = { .name = SYSFS_BRIDGE_FDB, - .mode = S_IRUGO, }, + .mode = 0444, }, .read = brforward_read, }; diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c index 126a8ea73c96..fd31ad83ec7b 100644 --- a/net/bridge/br_sysfs_if.c +++ b/net/bridge/br_sysfs_if.c @@ -44,7 +44,7 @@ static int store_##_name(struct net_bridge_port *p, unsigned long v) \ { \ return store_flag(p, v, _mask); \ } \ -static BRPORT_ATTR(_name, S_IRUGO | S_IWUSR, \ +static BRPORT_ATTR(_name, 0644, \ show_##_name, store_##_name) static int store_flag(struct net_bridge_port *p, unsigned long v, @@ -71,7 +71,7 @@ static ssize_t show_path_cost(struct net_bridge_port *p, char *buf) return sprintf(buf, "%d\n", p->path_cost); } -static BRPORT_ATTR(path_cost, S_IRUGO | S_IWUSR, +static BRPORT_ATTR(path_cost, 0644, show_path_cost, br_stp_set_path_cost); static ssize_t show_priority(struct net_bridge_port *p, char *buf) @@ -79,91 +79,91 @@ static ssize_t show_priority(struct net_bridge_port *p, char *buf) return sprintf(buf, "%d\n", p->priority); } -static BRPORT_ATTR(priority, S_IRUGO | S_IWUSR, +static BRPORT_ATTR(priority, 0644, show_priority, br_stp_set_port_priority); static ssize_t show_designated_root(struct net_bridge_port *p, char *buf) { return br_show_bridge_id(buf, &p->designated_root); } -static BRPORT_ATTR(designated_root, S_IRUGO, show_designated_root, NULL); +static BRPORT_ATTR(designated_root, 0444, show_designated_root, NULL); static ssize_t show_designated_bridge(struct net_bridge_port *p, char *buf) { return br_show_bridge_id(buf, &p->designated_bridge); } -static BRPORT_ATTR(designated_bridge, S_IRUGO, show_designated_bridge, NULL); +static BRPORT_ATTR(designated_bridge, 0444, show_designated_bridge, NULL); static ssize_t show_designated_port(struct net_bridge_port *p, char *buf) { return sprintf(buf, "%d\n", p->designated_port); } -static BRPORT_ATTR(designated_port, S_IRUGO, show_designated_port, NULL); +static BRPORT_ATTR(designated_port, 0444, show_designated_port, NULL); static ssize_t show_designated_cost(struct net_bridge_port *p, char *buf) { return sprintf(buf, "%d\n", p->designated_cost); } -static BRPORT_ATTR(designated_cost, S_IRUGO, show_designated_cost, NULL); +static BRPORT_ATTR(designated_cost, 0444, show_designated_cost, NULL); static ssize_t show_port_id(struct net_bridge_port *p, char *buf) { return sprintf(buf, "0x%x\n", p->port_id); } -static BRPORT_ATTR(port_id, S_IRUGO, show_port_id, NULL); +static BRPORT_ATTR(port_id, 0444, show_port_id, NULL); static ssize_t show_port_no(struct net_bridge_port *p, char *buf) { return sprintf(buf, "0x%x\n", p->port_no); } -static BRPORT_ATTR(port_no, S_IRUGO, show_port_no, NULL); +static BRPORT_ATTR(port_no, 0444, show_port_no, NULL); static ssize_t show_change_ack(struct net_bridge_port *p, char *buf) { return sprintf(buf, "%d\n", p->topology_change_ack); } -static BRPORT_ATTR(change_ack, S_IRUGO, show_change_ack, NULL); +static BRPORT_ATTR(change_ack, 0444, show_change_ack, NULL); static ssize_t show_config_pending(struct net_bridge_port *p, char *buf) { return sprintf(buf, "%d\n", p->config_pending); } -static BRPORT_ATTR(config_pending, S_IRUGO, show_config_pending, NULL); +static BRPORT_ATTR(config_pending, 0444, show_config_pending, NULL); static ssize_t show_port_state(struct net_bridge_port *p, char *buf) { return sprintf(buf, "%d\n", p->state); } -static BRPORT_ATTR(state, S_IRUGO, show_port_state, NULL); +static BRPORT_ATTR(state, 0444, show_port_state, NULL); static ssize_t show_message_age_timer(struct net_bridge_port *p, char *buf) { return sprintf(buf, "%ld\n", br_timer_value(&p->message_age_timer)); } -static BRPORT_ATTR(message_age_timer, S_IRUGO, show_message_age_timer, NULL); +static BRPORT_ATTR(message_age_timer, 0444, show_message_age_timer, NULL); static ssize_t show_forward_delay_timer(struct net_bridge_port *p, char *buf) { return sprintf(buf, "%ld\n", br_timer_value(&p->forward_delay_timer)); } -static BRPORT_ATTR(forward_delay_timer, S_IRUGO, show_forward_delay_timer, NULL); +static BRPORT_ATTR(forward_delay_timer, 0444, show_forward_delay_timer, NULL); static ssize_t show_hold_timer(struct net_bridge_port *p, char *buf) { return sprintf(buf, "%ld\n", br_timer_value(&p->hold_timer)); } -static BRPORT_ATTR(hold_timer, S_IRUGO, show_hold_timer, NULL); +static BRPORT_ATTR(hold_timer, 0444, show_hold_timer, NULL); static int store_flush(struct net_bridge_port *p, unsigned long v) { br_fdb_delete_by_port(p->br, p, 0, 0); // Don't delete local entry return 0; } -static BRPORT_ATTR(flush, S_IWUSR, NULL, store_flush); +static BRPORT_ATTR(flush, 0200, NULL, store_flush); static ssize_t show_group_fwd_mask(struct net_bridge_port *p, char *buf) { @@ -179,7 +179,7 @@ static int store_group_fwd_mask(struct net_bridge_port *p, return 0; } -static BRPORT_ATTR(group_fwd_mask, S_IRUGO | S_IWUSR, show_group_fwd_mask, +static BRPORT_ATTR(group_fwd_mask, 0644, show_group_fwd_mask, store_group_fwd_mask); BRPORT_ATTR_FLAG(hairpin_mode, BR_HAIRPIN_MODE); @@ -204,7 +204,7 @@ static int store_multicast_router(struct net_bridge_port *p, { return br_multicast_set_port_router(p, v); } -static BRPORT_ATTR(multicast_router, S_IRUGO | S_IWUSR, show_multicast_router, +static BRPORT_ATTR(multicast_router, 0644, show_multicast_router, store_multicast_router); BRPORT_ATTR_FLAG(multicast_fast_leave, BR_MULTICAST_FAST_LEAVE); diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index 51935270c651..9896f4975353 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -168,6 +168,8 @@ static struct net_bridge_vlan *br_vlan_get_master(struct net_bridge *br, u16 vid masterv = br_vlan_find(vg, vid); if (WARN_ON(!masterv)) return NULL; + refcount_set(&masterv->refcnt, 1); + return masterv; } refcount_inc(&masterv->refcnt); diff --git a/net/bridge/netfilter/ebt_among.c b/net/bridge/netfilter/ebt_among.c index ce7152a12bd8..620e54f08296 100644 --- a/net/bridge/netfilter/ebt_among.c +++ b/net/bridge/netfilter/ebt_among.c @@ -172,18 +172,69 @@ ebt_among_mt(const struct sk_buff *skb, struct xt_action_param *par) return true; } +static bool poolsize_invalid(const struct ebt_mac_wormhash *w) +{ + return w && w->poolsize >= (INT_MAX / sizeof(struct ebt_mac_wormhash_tuple)); +} + +static bool wormhash_offset_invalid(int off, unsigned int len) +{ + if (off == 0) /* not present */ + return false; + + if (off < (int)sizeof(struct ebt_among_info) || + off % __alignof__(struct ebt_mac_wormhash)) + return true; + + off += sizeof(struct ebt_mac_wormhash); + + return off > len; +} + +static bool wormhash_sizes_valid(const struct ebt_mac_wormhash *wh, int a, int b) +{ + if (a == 0) + a = sizeof(struct ebt_among_info); + + return ebt_mac_wormhash_size(wh) + a == b; +} + static int ebt_among_mt_check(const struct xt_mtchk_param *par) { const struct ebt_among_info *info = par->matchinfo; const struct ebt_entry_match *em = container_of(par->matchinfo, const struct ebt_entry_match, data); - int expected_length = sizeof(struct ebt_among_info); + unsigned int expected_length = sizeof(struct ebt_among_info); const struct ebt_mac_wormhash *wh_dst, *wh_src; int err; + if (expected_length > em->match_size) + return -EINVAL; + + if (wormhash_offset_invalid(info->wh_dst_ofs, em->match_size) || + wormhash_offset_invalid(info->wh_src_ofs, em->match_size)) + return -EINVAL; + wh_dst = ebt_among_wh_dst(info); - wh_src = ebt_among_wh_src(info); + if (poolsize_invalid(wh_dst)) + return -EINVAL; + expected_length += ebt_mac_wormhash_size(wh_dst); + if (expected_length > em->match_size) + return -EINVAL; + + wh_src = ebt_among_wh_src(info); + if (poolsize_invalid(wh_src)) + return -EINVAL; + + if (info->wh_src_ofs < info->wh_dst_ofs) { + if (!wormhash_sizes_valid(wh_src, info->wh_src_ofs, info->wh_dst_ofs)) + return -EINVAL; + } else { + if (!wormhash_sizes_valid(wh_dst, info->wh_dst_ofs, info->wh_src_ofs)) + return -EINVAL; + } + expected_length += ebt_mac_wormhash_size(wh_src); if (em->match_size != EBT_ALIGN(expected_length)) { diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c index f070b5e5b9dd..276b60262981 100644 --- a/net/bridge/netfilter/ebtable_broute.c +++ b/net/bridge/netfilter/ebtable_broute.c @@ -77,7 +77,6 @@ static void __net_exit broute_net_exit(struct net *net) static struct pernet_operations broute_net_ops = { .init = broute_net_init, .exit = broute_net_exit, - .async = true, }; static int __init ebtable_broute_init(void) diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c index 4151afc8efcc..c41da5fac84f 100644 --- a/net/bridge/netfilter/ebtable_filter.c +++ b/net/bridge/netfilter/ebtable_filter.c @@ -105,7 +105,6 @@ static void __net_exit frame_filter_net_exit(struct net *net) static struct pernet_operations frame_filter_net_ops = { .init = frame_filter_net_init, .exit = frame_filter_net_exit, - .async = true, }; static int __init ebtable_filter_init(void) diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c index b8da2dfe2ec5..08df7406ecb3 100644 --- a/net/bridge/netfilter/ebtable_nat.c +++ b/net/bridge/netfilter/ebtable_nat.c @@ -105,7 +105,6 @@ static void __net_exit frame_nat_net_exit(struct net *net) static struct pernet_operations frame_nat_net_ops = { .init = frame_nat_net_init, .exit = frame_nat_net_exit, - .async = true, }; static int __init ebtable_nat_init(void) diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index a8cb543e3296..032e0fe45940 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -1643,7 +1643,8 @@ static int compat_match_to_user(struct ebt_entry_match *m, void __user **dstptr, int off = ebt_compat_match_offset(match, m->match_size); compat_uint_t msize = m->match_size - off; - BUG_ON(off >= m->match_size); + if (WARN_ON(off >= m->match_size)) + return -EINVAL; if (copy_to_user(cm->u.name, match->name, strlen(match->name) + 1) || put_user(match->revision, &cm->u.revision) || @@ -1674,7 +1675,8 @@ static int compat_target_to_user(struct ebt_entry_target *t, int off = xt_compat_target_offset(target); compat_uint_t tsize = t->target_size - off; - BUG_ON(off >= t->target_size); + if (WARN_ON(off >= t->target_size)) + return -EINVAL; if (copy_to_user(cm->u.name, target->name, strlen(target->name) + 1) || put_user(target->revision, &cm->u.revision) || @@ -1910,7 +1912,8 @@ static int ebt_buf_add(struct ebt_entries_buf_state *state, if (state->buf_kern_start == NULL) goto count_only; - BUG_ON(state->buf_kern_offset + sz > state->buf_kern_len); + if (WARN_ON(state->buf_kern_offset + sz > state->buf_kern_len)) + return -EINVAL; memcpy(state->buf_kern_start + state->buf_kern_offset, data, sz); @@ -1923,7 +1926,8 @@ static int ebt_buf_add_pad(struct ebt_entries_buf_state *state, unsigned int sz) { char *b = state->buf_kern_start; - BUG_ON(b && state->buf_kern_offset > state->buf_kern_len); + if (WARN_ON(b && state->buf_kern_offset > state->buf_kern_len)) + return -EINVAL; if (b != NULL && sz > 0) memset(b + state->buf_kern_offset, 0, sz); @@ -2002,8 +2006,10 @@ static int compat_mtw_from_user(struct compat_ebt_entry_mwt *mwt, pad = XT_ALIGN(size_kern) - size_kern; if (pad > 0 && dst) { - BUG_ON(state->buf_kern_len <= pad); - BUG_ON(state->buf_kern_offset - (match_size + off) + size_kern > state->buf_kern_len - pad); + if (WARN_ON(state->buf_kern_len <= pad)) + return -EINVAL; + if (WARN_ON(state->buf_kern_offset - (match_size + off) + size_kern > state->buf_kern_len - pad)) + return -EINVAL; memset(dst + size_kern, 0, pad); } return off + match_size; @@ -2053,7 +2059,8 @@ static int ebt_size_mwt(struct compat_ebt_entry_mwt *match32, if (ret < 0) return ret; - BUG_ON(ret < match32->match_size); + if (WARN_ON(ret < match32->match_size)) + return -EINVAL; growth += ret - match32->match_size; growth += ebt_compat_entry_padsize(); @@ -2063,7 +2070,9 @@ static int ebt_size_mwt(struct compat_ebt_entry_mwt *match32, if (match_kern) match_kern->match_size = ret; - WARN_ON(type == EBT_COMPAT_TARGET && size_left); + if (WARN_ON(type == EBT_COMPAT_TARGET && size_left)) + return -EINVAL; + match32 = (struct compat_ebt_entry_mwt *) buf; } @@ -2119,6 +2128,19 @@ static int size_entry_mwt(struct ebt_entry *entry, const unsigned char *base, * * offsets are relative to beginning of struct ebt_entry (i.e., 0). */ + for (i = 0; i < 4 ; ++i) { + if (offsets[i] > *total) + return -EINVAL; + + if (i < 3 && offsets[i] == *total) + return -EINVAL; + + if (i == 0) + continue; + if (offsets[i-1] > offsets[i]) + return -EINVAL; + } + for (i = 0, j = 1 ; j < 4 ; j++, i++) { struct compat_ebt_entry_mwt *match32; unsigned int size; @@ -2150,7 +2172,8 @@ static int size_entry_mwt(struct ebt_entry *entry, const unsigned char *base, startoff = state->buf_user_offset - startoff; - BUG_ON(*total < startoff); + if (WARN_ON(*total < startoff)) + return -EINVAL; *total -= startoff; return 0; } @@ -2279,7 +2302,8 @@ static int compat_do_replace(struct net *net, void __user *user, state.buf_kern_len = size64; ret = compat_copy_entries(entries_tmp, tmp.entries_size, &state); - BUG_ON(ret < 0); /* parses same data again */ + if (WARN_ON(ret < 0)) + goto out_unlock; vfree(entries_tmp); tmp.entries_size = size64; diff --git a/net/bridge/netfilter/nf_log_bridge.c b/net/bridge/netfilter/nf_log_bridge.c index 91bfc2ac055a..bd2b3c78f59b 100644 --- a/net/bridge/netfilter/nf_log_bridge.c +++ b/net/bridge/netfilter/nf_log_bridge.c @@ -48,7 +48,6 @@ static void __net_exit nf_log_bridge_net_exit(struct net *net) static struct pernet_operations nf_log_bridge_net_ops = { .init = nf_log_bridge_net_init, .exit = nf_log_bridge_net_exit, - .async = true, }; static int __init nf_log_bridge_init(void) diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c index 7a78268cc572..e0adcd123f48 100644 --- a/net/caif/caif_dev.c +++ b/net/caif/caif_dev.c @@ -544,7 +544,6 @@ static struct pernet_operations caif_net_ops = { .exit = caif_exit_net, .id = &caif_net_id, .size = sizeof(struct caif_net), - .async = true, }; /* Initialize Caif devices list */ diff --git a/net/can/af_can.c b/net/can/af_can.c index 6da324550eec..1684ba5b51eb 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -72,7 +72,7 @@ MODULE_AUTHOR("Urs Thuermann <urs.thuermann@volkswagen.de>, " MODULE_ALIAS_NETPROTO(PF_CAN); static int stats_timer __read_mostly = 1; -module_param(stats_timer, int, S_IRUGO); +module_param(stats_timer, int, 0444); MODULE_PARM_DESC(stats_timer, "enable timer for statistics (default:on)"); static struct kmem_cache *rcv_cache __read_mostly; diff --git a/net/can/bcm.c b/net/can/bcm.c index 26730d39e048..ac5e5e34fee3 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -1717,7 +1717,6 @@ static void canbcm_pernet_exit(struct net *net) static struct pernet_operations canbcm_pernet_ops __read_mostly = { .init = canbcm_pernet_init, .exit = canbcm_pernet_exit, - .async = true, }; static int __init bcm_module_init(void) diff --git a/net/can/gw.c b/net/can/gw.c index 08e97668d5cf..faa3da88a127 100644 --- a/net/can/gw.c +++ b/net/can/gw.c @@ -72,7 +72,7 @@ MODULE_ALIAS(CAN_GW_NAME); #define CGW_DEFAULT_HOPS 1 static unsigned int max_hops __read_mostly = CGW_DEFAULT_HOPS; -module_param(max_hops, uint, S_IRUGO); +module_param(max_hops, uint, 0444); MODULE_PARM_DESC(max_hops, "maximum " CAN_GW_NAME " routing hops for CAN frames " "(valid values: " __stringify(CGW_MIN_HOPS) "-" @@ -1010,7 +1010,6 @@ static void __net_exit cangw_pernet_exit(struct net *net) static struct pernet_operations cangw_pernet_ops = { .init = cangw_pernet_init, .exit = cangw_pernet_exit, - .async = true, }; static __init int cgw_module_init(void) diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c index 1e492ef2a33d..4adf07826f4a 100644 --- a/net/ceph/ceph_common.c +++ b/net/ceph/ceph_common.c @@ -54,7 +54,7 @@ static const struct kernel_param_ops param_ops_supported_features = { .get = param_get_supported_features, }; module_param_cb(supported_features, ¶m_ops_supported_features, NULL, - S_IRUGO); + 0444); const char *ceph_msg_type_name(int type) { @@ -418,6 +418,7 @@ ceph_parse_options(char *options, const char *dev_name, opt->flags |= CEPH_OPT_FSID; break; case Opt_name: + kfree(opt->name); opt->name = kstrndup(argstr[0].from, argstr[0].to-argstr[0].from, GFP_KERNEL); @@ -427,6 +428,9 @@ ceph_parse_options(char *options, const char *dev_name, } break; case Opt_secret: + ceph_crypto_key_destroy(opt->key); + kfree(opt->key); + opt->key = kzalloc(sizeof(*opt->key), GFP_KERNEL); if (!opt->key) { err = -ENOMEM; @@ -437,6 +441,9 @@ ceph_parse_options(char *options, const char *dev_name, goto out; break; case Opt_key: + ceph_crypto_key_destroy(opt->key); + kfree(opt->key); + opt->key = kzalloc(sizeof(*opt->key), GFP_KERNEL); if (!opt->key) { err = -ENOMEM; diff --git a/net/core/dev.c b/net/core/dev.c index 8b51f923ce99..07da7add4845 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1571,6 +1571,27 @@ static void dev_disable_gro_hw(struct net_device *dev) netdev_WARN(dev, "failed to disable GRO_HW!\n"); } +const char *netdev_cmd_to_name(enum netdev_cmd cmd) +{ +#define N(val) \ + case NETDEV_##val: \ + return "NETDEV_" __stringify(val); + switch (cmd) { + N(UP) N(DOWN) N(REBOOT) N(CHANGE) N(REGISTER) N(UNREGISTER) + N(CHANGEMTU) N(CHANGEADDR) N(GOING_DOWN) N(CHANGENAME) N(FEAT_CHANGE) + N(BONDING_FAILOVER) N(PRE_UP) N(PRE_TYPE_CHANGE) N(POST_TYPE_CHANGE) + N(POST_INIT) N(RELEASE) N(NOTIFY_PEERS) N(JOIN) N(CHANGEUPPER) + N(RESEND_IGMP) N(PRECHANGEMTU) N(CHANGEINFODATA) N(BONDING_INFO) + N(PRECHANGEUPPER) N(CHANGELOWERSTATE) N(UDP_TUNNEL_PUSH_INFO) + N(UDP_TUNNEL_DROP_INFO) N(CHANGE_TX_QUEUE_LEN) + N(CVLAN_FILTER_PUSH_INFO) N(CVLAN_FILTER_DROP_INFO) + N(SVLAN_FILTER_PUSH_INFO) N(SVLAN_FILTER_DROP_INFO) + }; +#undef N + return "UNKNOWN_NETDEV_EVENT"; +} +EXPORT_SYMBOL_GPL(netdev_cmd_to_name); + static int call_netdevice_notifier(struct notifier_block *nb, unsigned long val, struct net_device *dev) { @@ -1604,12 +1625,15 @@ int register_netdevice_notifier(struct notifier_block *nb) struct net *net; int err; + /* Close race with setup_net() and cleanup_net() */ + down_write(&pernet_ops_rwsem); rtnl_lock(); err = raw_notifier_chain_register(&netdev_chain, nb); if (err) goto unlock; if (dev_boot_phase) goto unlock; + down_read(&net_rwsem); for_each_net(net) { for_each_netdev(net, dev) { err = call_netdevice_notifier(nb, NETDEV_REGISTER, dev); @@ -1623,9 +1647,11 @@ int register_netdevice_notifier(struct notifier_block *nb) call_netdevice_notifier(nb, NETDEV_UP, dev); } } + up_read(&net_rwsem); unlock: rtnl_unlock(); + up_write(&pernet_ops_rwsem); return err; rollback: @@ -1645,6 +1671,7 @@ rollback: } outroll: + up_read(&net_rwsem); raw_notifier_chain_unregister(&netdev_chain, nb); goto unlock; } @@ -1670,11 +1697,14 @@ int unregister_netdevice_notifier(struct notifier_block *nb) struct net *net; int err; + /* Close race with setup_net() and cleanup_net() */ + down_write(&pernet_ops_rwsem); rtnl_lock(); err = raw_notifier_chain_unregister(&netdev_chain, nb); if (err) goto unlock; + down_read(&net_rwsem); for_each_net(net) { for_each_netdev(net, dev) { if (dev->flags & IFF_UP) { @@ -1685,8 +1715,10 @@ int unregister_netdevice_notifier(struct notifier_block *nb) call_netdevice_notifier(nb, NETDEV_UNREGISTER, dev); } } + up_read(&net_rwsem); unlock: rtnl_unlock(); + up_write(&pernet_ops_rwsem); return err; } EXPORT_SYMBOL(unregister_netdevice_notifier); @@ -3278,15 +3310,23 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, #if IS_ENABLED(CONFIG_CGROUP_NET_PRIO) static void skb_update_prio(struct sk_buff *skb) { - struct netprio_map *map = rcu_dereference_bh(skb->dev->priomap); + const struct netprio_map *map; + const struct sock *sk; + unsigned int prioidx; - if (!skb->priority && skb->sk && map) { - unsigned int prioidx = - sock_cgroup_prioidx(&skb->sk->sk_cgrp_data); + if (skb->priority) + return; + map = rcu_dereference_bh(skb->dev->priomap); + if (!map) + return; + sk = skb_to_full_sk(skb); + if (!sk) + return; - if (prioidx < map->priomap_len) - skb->priority = map->priomap[prioidx]; - } + prioidx = sock_cgroup_prioidx(&sk->sk_cgrp_data); + + if (prioidx < map->priomap_len) + skb->priority = map->priomap[prioidx]; } #else #define skb_update_prio(skb) @@ -4351,6 +4391,9 @@ int netdev_rx_handler_register(struct net_device *dev, if (netdev_is_rx_handler_busy(dev)) return -EBUSY; + if (dev->priv_flags & IFF_NO_RX_HANDLER) + return -EINVAL; + /* Note: rx_handler_data must be set before rx_handler */ rcu_assign_pointer(dev->rx_handler_data, rx_handler_data); rcu_assign_pointer(dev->rx_handler, rx_handler); @@ -6396,6 +6439,7 @@ static int __netdev_upper_dev_link(struct net_device *dev, .linking = true, .upper_info = upper_info, }; + struct net_device *master_dev; int ret = 0; ASSERT_RTNL(); @@ -6407,11 +6451,14 @@ static int __netdev_upper_dev_link(struct net_device *dev, if (netdev_has_upper_dev(upper_dev, dev)) return -EBUSY; - if (netdev_has_upper_dev(dev, upper_dev)) - return -EEXIST; - - if (master && netdev_master_upper_dev_get(dev)) - return -EBUSY; + if (!master) { + if (netdev_has_upper_dev(dev, upper_dev)) + return -EEXIST; + } else { + master_dev = netdev_master_upper_dev_get(dev); + if (master_dev) + return master_dev == upper_dev ? -EEXIST : -EBUSY; + } ret = call_netdevice_notifiers_info(NETDEV_PRECHANGEUPPER, &changeupper_info.info); @@ -7542,10 +7589,17 @@ static netdev_features_t netdev_fix_features(struct net_device *dev, } } - /* LRO feature cannot be combined with RX-FCS */ - if ((features & NETIF_F_LRO) && (features & NETIF_F_RXFCS)) { - netdev_dbg(dev, "Dropping LRO feature since RX-FCS is requested.\n"); - features &= ~NETIF_F_LRO; + /* LRO/HW-GRO features cannot be combined with RX-FCS */ + if (features & NETIF_F_RXFCS) { + if (features & NETIF_F_LRO) { + netdev_dbg(dev, "Dropping LRO feature since RX-FCS is requested.\n"); + features &= ~NETIF_F_LRO; + } + + if (features & NETIF_F_GRO_HW) { + netdev_dbg(dev, "Dropping HW-GRO feature since RX-FCS is requested.\n"); + features &= ~NETIF_F_GRO_HW; + } } return features; @@ -7619,6 +7673,24 @@ sync_lower: } } + if (diff & NETIF_F_HW_VLAN_CTAG_FILTER) { + if (features & NETIF_F_HW_VLAN_CTAG_FILTER) { + dev->features = features; + err |= vlan_get_rx_ctag_filter_info(dev); + } else { + vlan_drop_rx_ctag_filter_info(dev); + } + } + + if (diff & NETIF_F_HW_VLAN_STAG_FILTER) { + if (features & NETIF_F_HW_VLAN_STAG_FILTER) { + dev->features = features; + err |= vlan_get_rx_stag_filter_info(dev); + } else { + vlan_drop_rx_stag_filter_info(dev); + } + } + dev->features = features; } @@ -8004,7 +8076,8 @@ int register_netdev(struct net_device *dev) { int err; - rtnl_lock(); + if (rtnl_lock_killable()) + return -EINTR; err = register_netdevice(dev); rtnl_unlock(); return err; @@ -8054,7 +8127,6 @@ static void netdev_wait_allrefs(struct net_device *dev) rcu_barrier(); rtnl_lock(); - call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev); if (test_bit(__LINK_STATE_LINKWATCH_PENDING, &dev->state)) { /* We must not have linkwatch events @@ -8126,10 +8198,6 @@ void netdev_run_todo(void) = list_first_entry(&list, struct net_device, todo_list); list_del(&dev->todo_list); - rtnl_lock(); - call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev); - __rtnl_unlock(); - if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) { pr_err("network todo '%s' but state %d\n", dev->name, dev->reg_state); @@ -8571,7 +8639,6 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char */ call_netdevice_notifiers(NETDEV_UNREGISTER, dev); rcu_barrier(); - call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev); new_nsid = peernet2id_alloc(dev_net(dev), net); /* If there is an ifindex conflict assign a new one */ @@ -8847,7 +8914,6 @@ static void __net_exit netdev_exit(struct net *net) static struct pernet_operations __net_initdata netdev_net_ops = { .init = netdev_init, .exit = netdev_exit, - .async = true, }; static void __net_exit default_device_exit(struct net *net) @@ -8948,7 +9014,6 @@ static void __net_exit default_device_exit_batch(struct list_head *net_list) static struct pernet_operations __net_initdata default_device_ops = { .exit = default_device_exit, .exit_batch = default_device_exit_batch, - .async = true, }; /* diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c index 0ab1af04296c..a04e1e88bf3a 100644 --- a/net/core/dev_ioctl.c +++ b/net/core/dev_ioctl.c @@ -402,8 +402,6 @@ int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, bool *need_c if (colon) *colon = 0; - dev_load(net, ifr->ifr_name); - /* * See which interface the caller is talking about. */ @@ -423,6 +421,7 @@ int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, bool *need_c case SIOCGIFMAP: case SIOCGIFINDEX: case SIOCGIFTXQLEN: + dev_load(net, ifr->ifr_name); rcu_read_lock(); ret = dev_ifsioc_locked(net, ifr, cmd); rcu_read_unlock(); @@ -431,6 +430,7 @@ int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, bool *need_c return ret; case SIOCETHTOOL: + dev_load(net, ifr->ifr_name); rtnl_lock(); ret = dev_ethtool(net, ifr); rtnl_unlock(); @@ -447,6 +447,7 @@ int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, bool *need_c case SIOCGMIIPHY: case SIOCGMIIREG: case SIOCSIFNAME: + dev_load(net, ifr->ifr_name); if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) return -EPERM; rtnl_lock(); @@ -494,6 +495,7 @@ int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, bool *need_c /* fall through */ case SIOCBONDSLAVEINFOQUERY: case SIOCBONDINFOQUERY: + dev_load(net, ifr->ifr_name); rtnl_lock(); ret = dev_ifsioc(net, ifr, cmd); rtnl_unlock(); @@ -518,6 +520,7 @@ int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, bool *need_c cmd == SIOCGHWTSTAMP || (cmd >= SIOCDEVPRIVATE && cmd <= SIOCDEVPRIVATE + 15)) { + dev_load(net, ifr->ifr_name); rtnl_lock(); ret = dev_ifsioc(net, ifr, cmd); rtnl_unlock(); diff --git a/net/core/devlink.c b/net/core/devlink.c index 88e846779269..9236e421bd62 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -1695,10 +1695,11 @@ static int devlink_dpipe_table_put(struct sk_buff *skb, goto nla_put_failure; if (table->resource_valid) { - nla_put_u64_64bit(skb, DEVLINK_ATTR_DPIPE_TABLE_RESOURCE_ID, - table->resource_id, DEVLINK_ATTR_PAD); - nla_put_u64_64bit(skb, DEVLINK_ATTR_DPIPE_TABLE_RESOURCE_UNITS, - table->resource_units, DEVLINK_ATTR_PAD); + if (nla_put_u64_64bit(skb, DEVLINK_ATTR_DPIPE_TABLE_RESOURCE_ID, + table->resource_id, DEVLINK_ATTR_PAD) || + nla_put_u64_64bit(skb, DEVLINK_ATTR_DPIPE_TABLE_RESOURCE_UNITS, + table->resource_units, DEVLINK_ATTR_PAD)) + goto nla_put_failure; } if (devlink_dpipe_matches_put(table, skb)) goto nla_put_failure; @@ -1797,7 +1798,7 @@ send_done: if (!nlh) { err = devlink_dpipe_send_and_alloc_skb(&skb, info); if (err) - goto err_skb_send_alloc; + return err; goto send_done; } @@ -1806,7 +1807,6 @@ send_done: nla_put_failure: err = -EMSGSIZE; err_table_put: -err_skb_send_alloc: genlmsg_cancel(skb, hdr); nlmsg_free(skb); return err; @@ -2072,7 +2072,7 @@ static int devlink_dpipe_entries_fill(struct genl_info *info, table->counters_enabled, &dump_ctx); if (err) - goto err_entries_dump; + return err; send_done: nlh = nlmsg_put(dump_ctx.skb, info->snd_portid, info->snd_seq, @@ -2080,16 +2080,10 @@ send_done: if (!nlh) { err = devlink_dpipe_send_and_alloc_skb(&dump_ctx.skb, info); if (err) - goto err_skb_send_alloc; + return err; goto send_done; } return genlmsg_reply(dump_ctx.skb, info); - -err_entries_dump: -err_skb_send_alloc: - genlmsg_cancel(dump_ctx.skb, dump_ctx.hdr); - nlmsg_free(dump_ctx.skb); - return err; } static int devlink_nl_cmd_dpipe_entries_get(struct sk_buff *skb, @@ -2228,7 +2222,7 @@ send_done: if (!nlh) { err = devlink_dpipe_send_and_alloc_skb(&skb, info); if (err) - goto err_skb_send_alloc; + return err; goto send_done; } return genlmsg_reply(skb, info); @@ -2236,7 +2230,6 @@ send_done: nla_put_failure: err = -EMSGSIZE; err_table_put: -err_skb_send_alloc: genlmsg_cancel(skb, hdr); nlmsg_free(skb); return err; @@ -2332,7 +2325,7 @@ devlink_resource_validate_children(struct devlink_resource *resource) list_for_each_entry(child_resource, &resource->resource_list, list) parts_size += child_resource->size_new; - if (parts_size > resource->size) + if (parts_size > resource->size_new) size_valid = false; out: resource->size_valid = size_valid; @@ -2345,17 +2338,17 @@ devlink_resource_validate_size(struct devlink_resource *resource, u64 size, u64 reminder; int err = 0; - if (size > resource->size_params->size_max) { + if (size > resource->size_params.size_max) { NL_SET_ERR_MSG_MOD(extack, "Size larger than maximum"); err = -EINVAL; } - if (size < resource->size_params->size_min) { + if (size < resource->size_params.size_min) { NL_SET_ERR_MSG_MOD(extack, "Size smaller than minimum"); err = -EINVAL; } - div64_u64_rem(size, resource->size_params->size_granularity, &reminder); + div64_u64_rem(size, resource->size_params.size_granularity, &reminder); if (reminder) { NL_SET_ERR_MSG_MOD(extack, "Wrong granularity"); err = -EINVAL; @@ -2394,20 +2387,22 @@ static int devlink_nl_cmd_resource_set(struct sk_buff *skb, return 0; } -static void +static int devlink_resource_size_params_put(struct devlink_resource *resource, struct sk_buff *skb) { struct devlink_resource_size_params *size_params; - size_params = resource->size_params; - nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_SIZE_GRAN, - size_params->size_granularity, DEVLINK_ATTR_PAD); - nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_SIZE_MAX, - size_params->size_max, DEVLINK_ATTR_PAD); - nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_SIZE_MIN, - size_params->size_min, DEVLINK_ATTR_PAD); - nla_put_u8(skb, DEVLINK_ATTR_RESOURCE_UNIT, size_params->unit); + size_params = &resource->size_params; + if (nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_SIZE_GRAN, + size_params->size_granularity, DEVLINK_ATTR_PAD) || + nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_SIZE_MAX, + size_params->size_max, DEVLINK_ATTR_PAD) || + nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_SIZE_MIN, + size_params->size_min, DEVLINK_ATTR_PAD) || + nla_put_u8(skb, DEVLINK_ATTR_RESOURCE_UNIT, size_params->unit)) + return -EMSGSIZE; + return 0; } static int devlink_resource_put(struct devlink *devlink, struct sk_buff *skb, @@ -2431,10 +2426,12 @@ static int devlink_resource_put(struct devlink *devlink, struct sk_buff *skb, nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_SIZE_NEW, resource->size_new, DEVLINK_ATTR_PAD); if (resource->resource_ops && resource->resource_ops->occ_get) - nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_OCC, - resource->resource_ops->occ_get(devlink), - DEVLINK_ATTR_PAD); - devlink_resource_size_params_put(resource, skb); + if (nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_OCC, + resource->resource_ops->occ_get(devlink), + DEVLINK_ATTR_PAD)) + goto nla_put_failure; + if (devlink_resource_size_params_put(resource, skb)) + goto nla_put_failure; if (list_empty(&resource->resource_list)) goto out; @@ -2739,22 +2736,22 @@ static const struct genl_ops devlink_nl_ops[] = { .cmd = DEVLINK_CMD_DPIPE_TABLE_GET, .doit = devlink_nl_cmd_dpipe_table_get, .policy = devlink_nl_policy, - .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, + /* can be retrieved by unprivileged users */ }, { .cmd = DEVLINK_CMD_DPIPE_ENTRIES_GET, .doit = devlink_nl_cmd_dpipe_entries_get, .policy = devlink_nl_policy, - .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, + /* can be retrieved by unprivileged users */ }, { .cmd = DEVLINK_CMD_DPIPE_HEADERS_GET, .doit = devlink_nl_cmd_dpipe_headers_get, .policy = devlink_nl_policy, - .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, + /* can be retrieved by unprivileged users */ }, { .cmd = DEVLINK_CMD_DPIPE_TABLE_COUNTERS_SET, @@ -2774,8 +2771,8 @@ static const struct genl_ops devlink_nl_ops[] = { .cmd = DEVLINK_CMD_RESOURCE_DUMP, .doit = devlink_nl_cmd_resource_dump, .policy = devlink_nl_policy, - .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, + /* can be retrieved by unprivileged users */ }, { .cmd = DEVLINK_CMD_RELOAD, @@ -3169,17 +3166,19 @@ EXPORT_SYMBOL_GPL(devlink_dpipe_table_unregister); */ int devlink_resource_register(struct devlink *devlink, const char *resource_name, - bool top_hierarchy, u64 resource_size, u64 resource_id, u64 parent_resource_id, - struct devlink_resource_size_params *size_params, + const struct devlink_resource_size_params *size_params, const struct devlink_resource_ops *resource_ops) { struct devlink_resource *resource; struct list_head *resource_list; + bool top_hierarchy; int err = 0; + top_hierarchy = parent_resource_id == DEVLINK_RESOURCE_ID_PARENT_TOP; + mutex_lock(&devlink->lock); resource = devlink_resource_find(devlink, NULL, resource_id); if (resource) { @@ -3216,7 +3215,8 @@ int devlink_resource_register(struct devlink *devlink, resource->id = resource_id; resource->resource_ops = resource_ops; resource->size_valid = true; - resource->size_params = size_params; + memcpy(&resource->size_params, size_params, + sizeof(resource->size_params)); INIT_LIST_HEAD(&resource->resource_list); list_add_tail(&resource->list, resource_list); out: diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 494e6a5d7306..eb55252ca1fb 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -22,6 +22,7 @@ #include <linux/bitops.h> #include <linux/uaccess.h> #include <linux/vmalloc.h> +#include <linux/sfp.h> #include <linux/slab.h> #include <linux/rtnetlink.h> #include <linux/sched/signal.h> @@ -121,6 +122,7 @@ tunable_strings[__ETHTOOL_TUNABLE_COUNT][ETH_GSTRING_LEN] = { [ETHTOOL_ID_UNSPEC] = "Unspec", [ETHTOOL_RX_COPYBREAK] = "rx-copybreak", [ETHTOOL_TX_COPYBREAK] = "tx-copybreak", + [ETHTOOL_PFC_PREVENTION_TOUT] = "pfc-prevention-tout", }; static const char @@ -1022,6 +1024,15 @@ static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev, if (copy_from_user(&info, useraddr, info_size)) return -EFAULT; + /* If FLOW_RSS was requested then user-space must be using the + * new definition, as FLOW_RSS is newer. + */ + if (cmd == ETHTOOL_GRXFH && info.flow_type & FLOW_RSS) { + info_size = sizeof(info); + if (copy_from_user(&info, useraddr, info_size)) + return -EFAULT; + } + if (info.cmd == ETHTOOL_GRXCLSRLALL) { if (info.rule_cnt > 0) { if (info.rule_cnt <= KMALLOC_MAX_SIZE / sizeof(u32)) @@ -1251,9 +1262,11 @@ static noinline_for_stack int ethtool_get_rxfh(struct net_device *dev, user_key_size = rxfh.key_size; /* Check that reserved fields are 0 for now */ - if (rxfh.rss_context || rxfh.rsvd8[0] || rxfh.rsvd8[1] || - rxfh.rsvd8[2] || rxfh.rsvd32) + if (rxfh.rsvd8[0] || rxfh.rsvd8[1] || rxfh.rsvd8[2] || rxfh.rsvd32) return -EINVAL; + /* Most drivers don't handle rss_context, check it's 0 as well */ + if (rxfh.rss_context && !ops->get_rxfh_context) + return -EOPNOTSUPP; rxfh.indir_size = dev_indir_size; rxfh.key_size = dev_key_size; @@ -1276,7 +1289,12 @@ static noinline_for_stack int ethtool_get_rxfh(struct net_device *dev, if (user_key_size) hkey = rss_config + indir_bytes; - ret = dev->ethtool_ops->get_rxfh(dev, indir, hkey, &dev_hfunc); + if (rxfh.rss_context) + ret = dev->ethtool_ops->get_rxfh_context(dev, indir, hkey, + &dev_hfunc, + rxfh.rss_context); + else + ret = dev->ethtool_ops->get_rxfh(dev, indir, hkey, &dev_hfunc); if (ret) goto out; @@ -1306,6 +1324,7 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, u8 *hkey = NULL; u8 *rss_config; u32 rss_cfg_offset = offsetof(struct ethtool_rxfh, rss_config[0]); + bool delete = false; if (!ops->get_rxnfc || !ops->set_rxfh) return -EOPNOTSUPP; @@ -1319,9 +1338,11 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, return -EFAULT; /* Check that reserved fields are 0 for now */ - if (rxfh.rss_context || rxfh.rsvd8[0] || rxfh.rsvd8[1] || - rxfh.rsvd8[2] || rxfh.rsvd32) + if (rxfh.rsvd8[0] || rxfh.rsvd8[1] || rxfh.rsvd8[2] || rxfh.rsvd32) return -EINVAL; + /* Most drivers don't handle rss_context, check it's 0 as well */ + if (rxfh.rss_context && !ops->set_rxfh_context) + return -EOPNOTSUPP; /* If either indir, hash key or function is valid, proceed further. * Must request at least one change: indir size, hash key or function. @@ -1346,7 +1367,8 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, if (ret) goto out; - /* rxfh.indir_size == 0 means reset the indir table to default. + /* rxfh.indir_size == 0 means reset the indir table to default (master + * context) or delete the context (other RSS contexts). * rxfh.indir_size == ETH_RXFH_INDIR_NO_CHANGE means leave it unchanged. */ if (rxfh.indir_size && @@ -1359,9 +1381,13 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, if (ret) goto out; } else if (rxfh.indir_size == 0) { - indir = (u32 *)rss_config; - for (i = 0; i < dev_indir_size; i++) - indir[i] = ethtool_rxfh_indir_default(i, rx_rings.data); + if (rxfh.rss_context == 0) { + indir = (u32 *)rss_config; + for (i = 0; i < dev_indir_size; i++) + indir[i] = ethtool_rxfh_indir_default(i, rx_rings.data); + } else { + delete = true; + } } if (rxfh.key_size) { @@ -1374,15 +1400,25 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, } } - ret = ops->set_rxfh(dev, indir, hkey, rxfh.hfunc); + if (rxfh.rss_context) + ret = ops->set_rxfh_context(dev, indir, hkey, rxfh.hfunc, + &rxfh.rss_context, delete); + else + ret = ops->set_rxfh(dev, indir, hkey, rxfh.hfunc); if (ret) goto out; - /* indicate whether rxfh was set to default */ - if (rxfh.indir_size == 0) - dev->priv_flags &= ~IFF_RXFH_CONFIGURED; - else if (rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE) - dev->priv_flags |= IFF_RXFH_CONFIGURED; + if (copy_to_user(useraddr + offsetof(struct ethtool_rxfh, rss_context), + &rxfh.rss_context, sizeof(rxfh.rss_context))) + ret = -EFAULT; + + if (!rxfh.rss_context) { + /* indicate whether rxfh was set to default */ + if (rxfh.indir_size == 0) + dev->priv_flags &= ~IFF_RXFH_CONFIGURED; + else if (rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE) + dev->priv_flags |= IFF_RXFH_CONFIGURED; + } out: kfree(rss_config); @@ -2210,6 +2246,9 @@ static int __ethtool_get_module_info(struct net_device *dev, const struct ethtool_ops *ops = dev->ethtool_ops; struct phy_device *phydev = dev->phydev; + if (dev->sfp_bus) + return sfp_get_module_info(dev->sfp_bus, modinfo); + if (phydev && phydev->drv && phydev->drv->module_info) return phydev->drv->module_info(phydev, modinfo); @@ -2244,6 +2283,9 @@ static int __ethtool_get_module_eeprom(struct net_device *dev, const struct ethtool_ops *ops = dev->ethtool_ops; struct phy_device *phydev = dev->phydev; + if (dev->sfp_bus) + return sfp_get_module_eeprom(dev->sfp_bus, ee, data); + if (phydev && phydev->drv && phydev->drv->module_eeprom) return phydev->drv->module_eeprom(phydev, ee, data); @@ -2277,6 +2319,11 @@ static int ethtool_tunable_valid(const struct ethtool_tunable *tuna) tuna->type_id != ETHTOOL_TUNABLE_U32) return -EINVAL; break; + case ETHTOOL_PFC_PREVENTION_TOUT: + if (tuna->len != sizeof(u16) || + tuna->type_id != ETHTOOL_TUNABLE_U16) + return -EINVAL; + break; default: return -EINVAL; } @@ -2520,11 +2567,14 @@ static int set_phy_tunable(struct net_device *dev, void __user *useraddr) static int ethtool_get_fecparam(struct net_device *dev, void __user *useraddr) { struct ethtool_fecparam fecparam = { ETHTOOL_GFECPARAM }; + int rc; if (!dev->ethtool_ops->get_fecparam) return -EOPNOTSUPP; - dev->ethtool_ops->get_fecparam(dev, &fecparam); + rc = dev->ethtool_ops->get_fecparam(dev, &fecparam); + if (rc) + return rc; if (copy_to_user(useraddr, &fecparam, sizeof(fecparam))) return -EFAULT; diff --git a/net/core/fib_notifier.c b/net/core/fib_notifier.c index 5ace0705a3f9..13a40b831d6d 100644 --- a/net/core/fib_notifier.c +++ b/net/core/fib_notifier.c @@ -13,16 +13,22 @@ int call_fib_notifier(struct notifier_block *nb, struct net *net, enum fib_event_type event_type, struct fib_notifier_info *info) { + int err; + info->net = net; - return nb->notifier_call(nb, event_type, info); + err = nb->notifier_call(nb, event_type, info); + return notifier_to_errno(err); } EXPORT_SYMBOL(call_fib_notifier); int call_fib_notifiers(struct net *net, enum fib_event_type event_type, struct fib_notifier_info *info) { + int err; + info->net = net; - return atomic_notifier_call_chain(&fib_chain, event_type, info); + err = atomic_notifier_call_chain(&fib_chain, event_type, info); + return notifier_to_errno(err); } EXPORT_SYMBOL(call_fib_notifiers); @@ -33,6 +39,7 @@ static unsigned int fib_seq_sum(void) struct net *net; rtnl_lock(); + down_read(&net_rwsem); for_each_net(net) { rcu_read_lock(); list_for_each_entry_rcu(ops, &net->fib_notifier_ops, list) { @@ -43,6 +50,7 @@ static unsigned int fib_seq_sum(void) } rcu_read_unlock(); } + up_read(&net_rwsem); rtnl_unlock(); return fib_seq; @@ -171,7 +179,6 @@ static void __net_exit fib_notifier_net_exit(struct net *net) static struct pernet_operations fib_notifier_net_ops = { .init = fib_notifier_net_init, .exit = fib_notifier_net_exit, - .async = true, }; static int __init fib_notifier_init(void) diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index f6f04fc0f629..33958f84c173 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -631,6 +631,11 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh, if (err < 0) goto errout_free; + err = call_fib_rule_notifiers(net, FIB_EVENT_RULE_ADD, rule, ops, + extack); + if (err < 0) + goto errout_free; + list_for_each_entry(r, &ops->rules_list, list) { if (r->pref > rule->pref) break; @@ -667,7 +672,6 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh, if (rule->tun_id) ip_tunnel_need_metadata(); - call_fib_rule_notifiers(net, FIB_EVENT_RULE_ADD, rule, ops, extack); notify_rule_change(RTM_NEWRULE, rule, ops, nlh, NETLINK_CB(skb).portid); flush_route_cache(ops); rules_ops_put(ops); @@ -1130,7 +1134,6 @@ static void __net_exit fib_rules_net_exit(struct net *net) static struct pernet_operations fib_rules_net_ops = { .init = fib_rules_net_init, .exit = fib_rules_net_exit, - .async = true, }; static int __init fib_rules_init(void) diff --git a/net/core/filter.c b/net/core/filter.c index 33edfa8372fd..00c711c5f1a2 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1890,6 +1890,202 @@ static const struct bpf_func_proto bpf_sk_redirect_map_proto = { .arg4_type = ARG_ANYTHING, }; +BPF_CALL_4(bpf_msg_redirect_map, struct sk_msg_buff *, msg, + struct bpf_map *, map, u32, key, u64, flags) +{ + /* If user passes invalid input drop the packet. */ + if (unlikely(flags)) + return SK_DROP; + + msg->key = key; + msg->flags = flags; + msg->map = map; + + return SK_PASS; +} + +struct sock *do_msg_redirect_map(struct sk_msg_buff *msg) +{ + struct sock *sk = NULL; + + if (msg->map) { + sk = __sock_map_lookup_elem(msg->map, msg->key); + + msg->key = 0; + msg->map = NULL; + } + + return sk; +} + +static const struct bpf_func_proto bpf_msg_redirect_map_proto = { + .func = bpf_msg_redirect_map, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_CONST_MAP_PTR, + .arg3_type = ARG_ANYTHING, + .arg4_type = ARG_ANYTHING, +}; + +BPF_CALL_2(bpf_msg_apply_bytes, struct sk_msg_buff *, msg, u32, bytes) +{ + msg->apply_bytes = bytes; + return 0; +} + +static const struct bpf_func_proto bpf_msg_apply_bytes_proto = { + .func = bpf_msg_apply_bytes, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, +}; + +BPF_CALL_2(bpf_msg_cork_bytes, struct sk_msg_buff *, msg, u32, bytes) +{ + msg->cork_bytes = bytes; + return 0; +} + +static const struct bpf_func_proto bpf_msg_cork_bytes_proto = { + .func = bpf_msg_cork_bytes, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, +}; + +BPF_CALL_4(bpf_msg_pull_data, + struct sk_msg_buff *, msg, u32, start, u32, end, u64, flags) +{ + unsigned int len = 0, offset = 0, copy = 0; + struct scatterlist *sg = msg->sg_data; + int first_sg, last_sg, i, shift; + unsigned char *p, *to, *from; + int bytes = end - start; + struct page *page; + + if (unlikely(flags || end <= start)) + return -EINVAL; + + /* First find the starting scatterlist element */ + i = msg->sg_start; + do { + len = sg[i].length; + offset += len; + if (start < offset + len) + break; + i++; + if (i == MAX_SKB_FRAGS) + i = 0; + } while (i != msg->sg_end); + + if (unlikely(start >= offset + len)) + return -EINVAL; + + if (!msg->sg_copy[i] && bytes <= len) + goto out; + + first_sg = i; + + /* At this point we need to linearize multiple scatterlist + * elements or a single shared page. Either way we need to + * copy into a linear buffer exclusively owned by BPF. Then + * place the buffer in the scatterlist and fixup the original + * entries by removing the entries now in the linear buffer + * and shifting the remaining entries. For now we do not try + * to copy partial entries to avoid complexity of running out + * of sg_entry slots. The downside is reading a single byte + * will copy the entire sg entry. + */ + do { + copy += sg[i].length; + i++; + if (i == MAX_SKB_FRAGS) + i = 0; + if (bytes < copy) + break; + } while (i != msg->sg_end); + last_sg = i; + + if (unlikely(copy < end - start)) + return -EINVAL; + + page = alloc_pages(__GFP_NOWARN | GFP_ATOMIC, get_order(copy)); + if (unlikely(!page)) + return -ENOMEM; + p = page_address(page); + offset = 0; + + i = first_sg; + do { + from = sg_virt(&sg[i]); + len = sg[i].length; + to = p + offset; + + memcpy(to, from, len); + offset += len; + sg[i].length = 0; + put_page(sg_page(&sg[i])); + + i++; + if (i == MAX_SKB_FRAGS) + i = 0; + } while (i != last_sg); + + sg[first_sg].length = copy; + sg_set_page(&sg[first_sg], page, copy, 0); + + /* To repair sg ring we need to shift entries. If we only + * had a single entry though we can just replace it and + * be done. Otherwise walk the ring and shift the entries. + */ + shift = last_sg - first_sg - 1; + if (!shift) + goto out; + + i = first_sg + 1; + do { + int move_from; + + if (i + shift >= MAX_SKB_FRAGS) + move_from = i + shift - MAX_SKB_FRAGS; + else + move_from = i + shift; + + if (move_from == msg->sg_end) + break; + + sg[i] = sg[move_from]; + sg[move_from].length = 0; + sg[move_from].page_link = 0; + sg[move_from].offset = 0; + + i++; + if (i == MAX_SKB_FRAGS) + i = 0; + } while (1); + msg->sg_end -= shift; + if (msg->sg_end < 0) + msg->sg_end += MAX_SKB_FRAGS; +out: + msg->data = sg_virt(&sg[i]) + start - offset; + msg->data_end = msg->data + bytes; + + return 0; +} + +static const struct bpf_func_proto bpf_msg_pull_data_proto = { + .func = bpf_msg_pull_data, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, + .arg3_type = ARG_ANYTHING, + .arg4_type = ARG_ANYTHING, +}; + BPF_CALL_1(bpf_get_cgroup_classid, const struct sk_buff *, skb) { return task_get_classid(skb); @@ -2087,6 +2283,10 @@ static int bpf_skb_proto_4_to_6(struct sk_buff *skb) u32 off = skb_mac_header_len(skb); int ret; + /* SCTP uses GSO_BY_FRAGS, thus cannot adjust it. */ + if (skb_is_gso(skb) && unlikely(skb_is_gso_sctp(skb))) + return -ENOTSUPP; + ret = skb_cow(skb, len_diff); if (unlikely(ret < 0)) return ret; @@ -2096,19 +2296,21 @@ static int bpf_skb_proto_4_to_6(struct sk_buff *skb) return ret; if (skb_is_gso(skb)) { + struct skb_shared_info *shinfo = skb_shinfo(skb); + /* SKB_GSO_TCPV4 needs to be changed into * SKB_GSO_TCPV6. */ - if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4) { - skb_shinfo(skb)->gso_type &= ~SKB_GSO_TCPV4; - skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV6; + if (shinfo->gso_type & SKB_GSO_TCPV4) { + shinfo->gso_type &= ~SKB_GSO_TCPV4; + shinfo->gso_type |= SKB_GSO_TCPV6; } /* Due to IPv6 header, MSS needs to be downgraded. */ - skb_shinfo(skb)->gso_size -= len_diff; + skb_decrease_gso_size(shinfo, len_diff); /* Header must be checked, and gso_segs recomputed. */ - skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY; - skb_shinfo(skb)->gso_segs = 0; + shinfo->gso_type |= SKB_GSO_DODGY; + shinfo->gso_segs = 0; } skb->protocol = htons(ETH_P_IPV6); @@ -2123,6 +2325,10 @@ static int bpf_skb_proto_6_to_4(struct sk_buff *skb) u32 off = skb_mac_header_len(skb); int ret; + /* SCTP uses GSO_BY_FRAGS, thus cannot adjust it. */ + if (skb_is_gso(skb) && unlikely(skb_is_gso_sctp(skb))) + return -ENOTSUPP; + ret = skb_unclone(skb, GFP_ATOMIC); if (unlikely(ret < 0)) return ret; @@ -2132,19 +2338,21 @@ static int bpf_skb_proto_6_to_4(struct sk_buff *skb) return ret; if (skb_is_gso(skb)) { + struct skb_shared_info *shinfo = skb_shinfo(skb); + /* SKB_GSO_TCPV6 needs to be changed into * SKB_GSO_TCPV4. */ - if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) { - skb_shinfo(skb)->gso_type &= ~SKB_GSO_TCPV6; - skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV4; + if (shinfo->gso_type & SKB_GSO_TCPV6) { + shinfo->gso_type &= ~SKB_GSO_TCPV6; + shinfo->gso_type |= SKB_GSO_TCPV4; } /* Due to IPv4 header, MSS can be upgraded. */ - skb_shinfo(skb)->gso_size += len_diff; + skb_increase_gso_size(shinfo, len_diff); /* Header must be checked, and gso_segs recomputed. */ - skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY; - skb_shinfo(skb)->gso_segs = 0; + shinfo->gso_type |= SKB_GSO_DODGY; + shinfo->gso_segs = 0; } skb->protocol = htons(ETH_P_IP); @@ -2243,6 +2451,10 @@ static int bpf_skb_net_grow(struct sk_buff *skb, u32 len_diff) u32 off = skb_mac_header_len(skb) + bpf_skb_net_base_len(skb); int ret; + /* SCTP uses GSO_BY_FRAGS, thus cannot adjust it. */ + if (skb_is_gso(skb) && unlikely(skb_is_gso_sctp(skb))) + return -ENOTSUPP; + ret = skb_cow(skb, len_diff); if (unlikely(ret < 0)) return ret; @@ -2252,11 +2464,13 @@ static int bpf_skb_net_grow(struct sk_buff *skb, u32 len_diff) return ret; if (skb_is_gso(skb)) { + struct skb_shared_info *shinfo = skb_shinfo(skb); + /* Due to header grow, MSS needs to be downgraded. */ - skb_shinfo(skb)->gso_size -= len_diff; + skb_decrease_gso_size(shinfo, len_diff); /* Header must be checked, and gso_segs recomputed. */ - skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY; - skb_shinfo(skb)->gso_segs = 0; + shinfo->gso_type |= SKB_GSO_DODGY; + shinfo->gso_segs = 0; } return 0; @@ -2267,6 +2481,10 @@ static int bpf_skb_net_shrink(struct sk_buff *skb, u32 len_diff) u32 off = skb_mac_header_len(skb) + bpf_skb_net_base_len(skb); int ret; + /* SCTP uses GSO_BY_FRAGS, thus cannot adjust it. */ + if (skb_is_gso(skb) && unlikely(skb_is_gso_sctp(skb))) + return -ENOTSUPP; + ret = skb_unclone(skb, GFP_ATOMIC); if (unlikely(ret < 0)) return ret; @@ -2276,11 +2494,13 @@ static int bpf_skb_net_shrink(struct sk_buff *skb, u32 len_diff) return ret; if (skb_is_gso(skb)) { + struct skb_shared_info *shinfo = skb_shinfo(skb); + /* Due to header shrink, MSS can be upgraded. */ - skb_shinfo(skb)->gso_size += len_diff; + skb_increase_gso_size(shinfo, len_diff); /* Header must be checked, and gso_segs recomputed. */ - skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY; - skb_shinfo(skb)->gso_segs = 0; + shinfo->gso_type |= SKB_GSO_DODGY; + shinfo->gso_segs = 0; } return 0; @@ -2831,7 +3051,8 @@ bool bpf_helper_changes_pkt_data(void *func) func == bpf_l3_csum_replace || func == bpf_l4_csum_replace || func == bpf_xdp_adjust_head || - func == bpf_xdp_adjust_meta) + func == bpf_xdp_adjust_meta || + func == bpf_msg_pull_data) return true; return false; @@ -3591,6 +3812,22 @@ static const struct bpf_func_proto * } } +static const struct bpf_func_proto *sk_msg_func_proto(enum bpf_func_id func_id) +{ + switch (func_id) { + case BPF_FUNC_msg_redirect_map: + return &bpf_msg_redirect_map_proto; + case BPF_FUNC_msg_apply_bytes: + return &bpf_msg_apply_bytes_proto; + case BPF_FUNC_msg_cork_bytes: + return &bpf_msg_cork_bytes_proto; + case BPF_FUNC_msg_pull_data: + return &bpf_msg_pull_data_proto; + default: + return bpf_base_func_proto(func_id); + } +} + static const struct bpf_func_proto *sk_skb_func_proto(enum bpf_func_id func_id) { switch (func_id) { @@ -3980,6 +4217,32 @@ static bool sk_skb_is_valid_access(int off, int size, return bpf_skb_is_valid_access(off, size, type, info); } +static bool sk_msg_is_valid_access(int off, int size, + enum bpf_access_type type, + struct bpf_insn_access_aux *info) +{ + if (type == BPF_WRITE) + return false; + + switch (off) { + case offsetof(struct sk_msg_md, data): + info->reg_type = PTR_TO_PACKET; + break; + case offsetof(struct sk_msg_md, data_end): + info->reg_type = PTR_TO_PACKET_END; + break; + } + + if (off < 0 || off >= sizeof(struct sk_msg_md)) + return false; + if (off % size != 0) + return false; + if (size != sizeof(__u64)) + return false; + + return true; +} + static u32 bpf_convert_ctx_access(enum bpf_access_type type, const struct bpf_insn *si, struct bpf_insn *insn_buf, @@ -4778,6 +5041,29 @@ static u32 sk_skb_convert_ctx_access(enum bpf_access_type type, return insn - insn_buf; } +static u32 sk_msg_convert_ctx_access(enum bpf_access_type type, + const struct bpf_insn *si, + struct bpf_insn *insn_buf, + struct bpf_prog *prog, u32 *target_size) +{ + struct bpf_insn *insn = insn_buf; + + switch (si->off) { + case offsetof(struct sk_msg_md, data): + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_msg_buff, data), + si->dst_reg, si->src_reg, + offsetof(struct sk_msg_buff, data)); + break; + case offsetof(struct sk_msg_md, data_end): + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_msg_buff, data_end), + si->dst_reg, si->src_reg, + offsetof(struct sk_msg_buff, data_end)); + break; + } + + return insn - insn_buf; +} + const struct bpf_verifier_ops sk_filter_verifier_ops = { .get_func_proto = sk_filter_func_proto, .is_valid_access = sk_filter_is_valid_access, @@ -4868,6 +5154,15 @@ const struct bpf_verifier_ops sk_skb_verifier_ops = { const struct bpf_prog_ops sk_skb_prog_ops = { }; +const struct bpf_verifier_ops sk_msg_verifier_ops = { + .get_func_proto = sk_msg_func_proto, + .is_valid_access = sk_msg_is_valid_access, + .convert_ctx_access = sk_msg_convert_ctx_access, +}; + +const struct bpf_prog_ops sk_msg_prog_ops = { +}; + int sk_detach_filter(struct sock *sk) { int ret = -ENOENT; diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c index 65b51e778782..9737302907b1 100644 --- a/net/core/net-procfs.c +++ b/net/core/net-procfs.c @@ -315,12 +315,12 @@ static int __net_init dev_proc_net_init(struct net *net) { int rc = -ENOMEM; - if (!proc_create("dev", S_IRUGO, net->proc_net, &dev_seq_fops)) + if (!proc_create("dev", 0444, net->proc_net, &dev_seq_fops)) goto out; - if (!proc_create("softnet_stat", S_IRUGO, net->proc_net, + if (!proc_create("softnet_stat", 0444, net->proc_net, &softnet_seq_fops)) goto out_dev; - if (!proc_create("ptype", S_IRUGO, net->proc_net, &ptype_seq_fops)) + if (!proc_create("ptype", 0444, net->proc_net, &ptype_seq_fops)) goto out_softnet; if (wext_proc_init(net)) @@ -349,7 +349,6 @@ static void __net_exit dev_proc_net_exit(struct net *net) static struct pernet_operations __net_initdata dev_proc_ops = { .init = dev_proc_net_init, .exit = dev_proc_net_exit, - .async = true, }; static int dev_mc_seq_show(struct seq_file *seq, void *v) @@ -406,7 +405,6 @@ static void __net_exit dev_mc_net_exit(struct net *net) static struct pernet_operations __net_initdata dev_mc_net_ops = { .init = dev_mc_net_init, .exit = dev_mc_net_exit, - .async = true, }; int __init dev_proc_init(void) diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 60a5ad2c33ee..c476f0794132 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -431,7 +431,7 @@ static ssize_t group_store(struct device *dev, struct device_attribute *attr, return netdev_store(dev, attr, buf, len, change_group); } NETDEVICE_SHOW(group, fmt_dec); -static DEVICE_ATTR(netdev_group, S_IRUGO | S_IWUSR, group_show, group_store); +static DEVICE_ATTR(netdev_group, 0644, group_show, group_store); static int change_proto_down(struct net_device *dev, unsigned long proto_down) { @@ -854,10 +854,10 @@ static ssize_t store_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue, } static struct rx_queue_attribute rps_cpus_attribute __ro_after_init - = __ATTR(rps_cpus, S_IRUGO | S_IWUSR, show_rps_map, store_rps_map); + = __ATTR(rps_cpus, 0644, show_rps_map, store_rps_map); static struct rx_queue_attribute rps_dev_flow_table_cnt_attribute __ro_after_init - = __ATTR(rps_flow_cnt, S_IRUGO | S_IWUSR, + = __ATTR(rps_flow_cnt, 0644, show_rps_dev_flow_table_cnt, store_rps_dev_flow_table_cnt); #endif /* CONFIG_RPS */ @@ -1154,7 +1154,7 @@ static ssize_t bql_set_hold_time(struct netdev_queue *queue, } static struct netdev_queue_attribute bql_hold_time_attribute __ro_after_init - = __ATTR(hold_time, S_IRUGO | S_IWUSR, + = __ATTR(hold_time, 0644, bql_show_hold_time, bql_set_hold_time); static ssize_t bql_show_inflight(struct netdev_queue *queue, @@ -1166,7 +1166,7 @@ static ssize_t bql_show_inflight(struct netdev_queue *queue, } static struct netdev_queue_attribute bql_inflight_attribute __ro_after_init = - __ATTR(inflight, S_IRUGO, bql_show_inflight, NULL); + __ATTR(inflight, 0444, bql_show_inflight, NULL); #define BQL_ATTR(NAME, FIELD) \ static ssize_t bql_show_ ## NAME(struct netdev_queue *queue, \ @@ -1182,7 +1182,7 @@ static ssize_t bql_set_ ## NAME(struct netdev_queue *queue, \ } \ \ static struct netdev_queue_attribute bql_ ## NAME ## _attribute __ro_after_init \ - = __ATTR(NAME, S_IRUGO | S_IWUSR, \ + = __ATTR(NAME, 0644, \ bql_show_ ## NAME, bql_set_ ## NAME) BQL_ATTR(limit, limit); diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 690e78c6af45..7fdf321d4997 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -33,6 +33,10 @@ static struct list_head *first_device = &pernet_list; LIST_HEAD(net_namespace_list); EXPORT_SYMBOL_GPL(net_namespace_list); +/* Protects net_namespace_list. Nests iside rtnl_lock() */ +DECLARE_RWSEM(net_rwsem); +EXPORT_SYMBOL_GPL(net_rwsem); + struct net init_net = { .count = REFCOUNT_INIT(1), .dev_base_head = LIST_HEAD_INIT(init_net.dev_base_head), @@ -40,12 +44,13 @@ struct net init_net = { EXPORT_SYMBOL(init_net); static bool init_net_initialized; -static unsigned nr_sync_pernet_ops; /* - * net_sem: protects: pernet_list, net_generic_ids, nr_sync_pernet_ops, + * pernet_ops_rwsem: protects: pernet_list, net_generic_ids, * init_net_initialized and first_device pointer. + * This is internal net namespace object. Please, don't use it + * outside. */ -DECLARE_RWSEM(net_sem); +DECLARE_RWSEM(pernet_ops_rwsem); #define MIN_PERNET_OPS_ID \ ((sizeof(struct net_generic) + sizeof(void *) - 1) / sizeof(void *)) @@ -73,7 +78,7 @@ static int net_assign_generic(struct net *net, unsigned int id, void *data) BUG_ON(id < MIN_PERNET_OPS_ID); old_ng = rcu_dereference_protected(net->gen, - lockdep_is_held(&net_sem)); + lockdep_is_held(&pernet_ops_rwsem)); if (old_ng->s.len > id) { old_ng->ptr[id] = data; return 0; @@ -290,7 +295,7 @@ struct net *get_net_ns_by_id(struct net *net, int id) */ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns) { - /* Must be called with net_sem held */ + /* Must be called with pernet_ops_rwsem held */ const struct pernet_operations *ops, *saved_ops; int error = 0; LIST_HEAD(net_exit_list); @@ -301,15 +306,16 @@ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns) net->user_ns = user_ns; idr_init(&net->netns_ids); spin_lock_init(&net->nsid_lock); + mutex_init(&net->ipv4.ra_mutex); list_for_each_entry(ops, &pernet_list, list) { error = ops_init(ops, net); if (error < 0) goto out_undo; } - rtnl_lock(); + down_write(&net_rwsem); list_add_tail_rcu(&net->list, &net_namespace_list); - rtnl_unlock(); + up_write(&net_rwsem); out: return error; @@ -338,7 +344,6 @@ static int __net_init net_defaults_init_net(struct net *net) static struct pernet_operations net_defaults_ops = { .init = net_defaults_init_net, - .async = true, }; static __init int net_defaults_init(void) @@ -405,7 +410,6 @@ struct net *copy_net_ns(unsigned long flags, { struct ucounts *ucounts; struct net *net; - unsigned write; int rv; if (!(flags & CLONE_NEWNET)) @@ -423,25 +427,14 @@ struct net *copy_net_ns(unsigned long flags, refcount_set(&net->passive, 1); net->ucounts = ucounts; get_user_ns(user_ns); -again: - write = READ_ONCE(nr_sync_pernet_ops); - if (write) - rv = down_write_killable(&net_sem); - else - rv = down_read_killable(&net_sem); + + rv = down_read_killable(&pernet_ops_rwsem); if (rv < 0) goto put_userns; - if (!write && unlikely(READ_ONCE(nr_sync_pernet_ops))) { - up_read(&net_sem); - goto again; - } rv = setup_net(net, user_ns); - if (write) - up_write(&net_sem); - else - up_read(&net_sem); + up_read(&pernet_ops_rwsem); if (rv < 0) { put_userns: @@ -461,7 +454,7 @@ static void unhash_nsid(struct net *net, struct net *last) * and this work is the only process, that may delete * a net from net_namespace_list. So, when the below * is executing, the list may only grow. Thus, we do not - * use for_each_net_rcu() or rtnl_lock(). + * use for_each_net_rcu() or net_rwsem. */ for_each_net(tmp) { int id; @@ -489,24 +482,14 @@ static void cleanup_net(struct work_struct *work) struct net *net, *tmp, *last; struct llist_node *net_kill_list; LIST_HEAD(net_exit_list); - unsigned write; /* Atomically snapshot the list of namespaces to cleanup */ net_kill_list = llist_del_all(&cleanup_list); -again: - write = READ_ONCE(nr_sync_pernet_ops); - if (write) - down_write(&net_sem); - else - down_read(&net_sem); - if (!write && unlikely(READ_ONCE(nr_sync_pernet_ops))) { - up_read(&net_sem); - goto again; - } + down_read(&pernet_ops_rwsem); /* Don't let anyone else find us. */ - rtnl_lock(); + down_write(&net_rwsem); llist_for_each_entry(net, net_kill_list, cleanup_list) list_del_rcu(&net->list); /* Cache last net. After we unlock rtnl, no one new net @@ -520,7 +503,7 @@ again: * useless anyway, as netns_ids are destroyed there. */ last = list_last_entry(&net_namespace_list, struct net, list); - rtnl_unlock(); + up_write(&net_rwsem); llist_for_each_entry(net, net_kill_list, cleanup_list) { unhash_nsid(net, last); @@ -542,10 +525,7 @@ again: list_for_each_entry_reverse(ops, &pernet_list, list) ops_free_list(ops, &net_exit_list); - if (write) - up_write(&net_sem); - else - up_read(&net_sem); + up_read(&pernet_ops_rwsem); /* Ensure there are no outstanding rcu callbacks using this * network namespace. @@ -572,8 +552,8 @@ again: */ void net_ns_barrier(void) { - down_write(&net_sem); - up_write(&net_sem); + down_write(&pernet_ops_rwsem); + up_write(&pernet_ops_rwsem); } EXPORT_SYMBOL(net_ns_barrier); @@ -653,7 +633,6 @@ static __net_exit void net_ns_net_exit(struct net *net) static struct pernet_operations __net_initdata net_ns_ops = { .init = net_ns_net_init, .exit = net_ns_net_exit, - .async = true, }; static const struct nla_policy rtnl_net_policy[NETNSA_MAX + 1] = { @@ -882,7 +861,7 @@ static int __init net_ns_init(void) #ifdef CONFIG_NET_NS net_cachep = kmem_cache_create("net_namespace", sizeof(struct net), SMP_CACHE_BYTES, - SLAB_PANIC, NULL); + SLAB_PANIC|SLAB_ACCOUNT, NULL); /* Create workqueue for cleanup */ netns_wq = create_singlethread_workqueue("netns"); @@ -896,12 +875,12 @@ static int __init net_ns_init(void) rcu_assign_pointer(init_net.gen, ng); - down_write(&net_sem); + down_write(&pernet_ops_rwsem); if (setup_net(&init_net, &init_user_ns)) panic("Could not setup the initial network namespace"); init_net_initialized = true; - up_write(&net_sem); + up_write(&pernet_ops_rwsem); register_pernet_subsys(&net_ns_ops); @@ -925,6 +904,9 @@ static int __register_pernet_operations(struct list_head *list, list_add_tail(&ops->list, list); if (ops->init || (ops->id && ops->size)) { + /* We held write locked pernet_ops_rwsem, and parallel + * setup_net() and cleanup_net() are not possible. + */ for_each_net(net) { error = ops_init(ops, net); if (error) @@ -948,6 +930,7 @@ static void __unregister_pernet_operations(struct pernet_operations *ops) LIST_HEAD(net_exit_list); list_del(&ops->list); + /* See comment in __register_pernet_operations() */ for_each_net(net) list_add_tail(&net->exit_list, &net_exit_list); ops_exit_list(ops, &net_exit_list); @@ -1005,9 +988,6 @@ again: rcu_barrier(); if (ops->id) ida_remove(&net_generic_ids, *ops->id); - } else if (!ops->async) { - pr_info_once("Pernet operations %ps are sync.\n", ops); - nr_sync_pernet_ops++; } return error; @@ -1015,8 +995,6 @@ again: static void unregister_pernet_operations(struct pernet_operations *ops) { - if (!ops->async) - BUG_ON(nr_sync_pernet_ops-- == 0); __unregister_pernet_operations(ops); rcu_barrier(); if (ops->id) @@ -1045,9 +1023,9 @@ static void unregister_pernet_operations(struct pernet_operations *ops) int register_pernet_subsys(struct pernet_operations *ops) { int error; - down_write(&net_sem); + down_write(&pernet_ops_rwsem); error = register_pernet_operations(first_device, ops); - up_write(&net_sem); + up_write(&pernet_ops_rwsem); return error; } EXPORT_SYMBOL_GPL(register_pernet_subsys); @@ -1063,9 +1041,9 @@ EXPORT_SYMBOL_GPL(register_pernet_subsys); */ void unregister_pernet_subsys(struct pernet_operations *ops) { - down_write(&net_sem); + down_write(&pernet_ops_rwsem); unregister_pernet_operations(ops); - up_write(&net_sem); + up_write(&pernet_ops_rwsem); } EXPORT_SYMBOL_GPL(unregister_pernet_subsys); @@ -1091,11 +1069,11 @@ EXPORT_SYMBOL_GPL(unregister_pernet_subsys); int register_pernet_device(struct pernet_operations *ops) { int error; - down_write(&net_sem); + down_write(&pernet_ops_rwsem); error = register_pernet_operations(&pernet_list, ops); if (!error && (first_device == &pernet_list)) first_device = &ops->list; - up_write(&net_sem); + up_write(&pernet_ops_rwsem); return error; } EXPORT_SYMBOL_GPL(register_pernet_device); @@ -1111,11 +1089,11 @@ EXPORT_SYMBOL_GPL(register_pernet_device); */ void unregister_pernet_device(struct pernet_operations *ops) { - down_write(&net_sem); + down_write(&pernet_ops_rwsem); if (&ops->list == first_device) first_device = first_device->next; unregister_pernet_operations(ops); - up_write(&net_sem); + up_write(&pernet_ops_rwsem); } EXPORT_SYMBOL_GPL(unregister_pernet_device); diff --git a/net/core/pktgen.c b/net/core/pktgen.c index b8ab5c829511..7e4ede34cc52 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -906,13 +906,14 @@ static ssize_t pktgen_if_write(struct file *file, i += len; if (debug) { - size_t copy = min_t(size_t, count, 1023); - char tb[copy + 1]; - if (copy_from_user(tb, user_buffer, copy)) - return -EFAULT; - tb[copy] = 0; - pr_debug("%s,%lu buffer -:%s:-\n", - name, (unsigned long)count, tb); + size_t copy = min_t(size_t, count + 1, 1024); + char *tp = strndup_user(user_buffer, copy); + + if (IS_ERR(tp)) + return PTR_ERR(tp); + + pr_debug("%s,%zu buffer -:%s:-\n", name, count, tp); + kfree(tp); } if (!strcmp(name, "min_pkt_size")) { diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 67f375cfb982..e86b28482ca7 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -75,6 +75,12 @@ void rtnl_lock(void) } EXPORT_SYMBOL(rtnl_lock); +int rtnl_lock_killable(void) +{ + return mutex_lock_killable(&rtnl_mutex); +} +EXPORT_SYMBOL(rtnl_lock_killable); + static struct sk_buff *defer_kfree_skb_list; void rtnl_kfree_skbs(struct sk_buff *head, struct sk_buff *tail) { @@ -412,9 +418,11 @@ void __rtnl_link_unregister(struct rtnl_link_ops *ops) { struct net *net; + down_read(&net_rwsem); for_each_net(net) { __rtnl_kill_links(net, ops); } + up_read(&net_rwsem); list_del(&ops->list); } EXPORT_SYMBOL_GPL(__rtnl_link_unregister); @@ -432,6 +440,9 @@ static void rtnl_lock_unregistering_all(void) for (;;) { unregistering = false; rtnl_lock(); + /* We held write locked pernet_ops_rwsem, and parallel + * setup_net() and cleanup_net() are not possible. + */ for_each_net(net) { if (net->dev_unreg_count > 0) { unregistering = true; @@ -453,12 +464,12 @@ static void rtnl_lock_unregistering_all(void) */ void rtnl_link_unregister(struct rtnl_link_ops *ops) { - /* Close the race with cleanup_net() */ - down_write(&net_sem); + /* Close the race with setup_net() and cleanup_net() */ + down_write(&pernet_ops_rwsem); rtnl_lock_unregistering_all(); __rtnl_link_unregister(ops); rtnl_unlock(); - up_write(&net_sem); + up_write(&pernet_ops_rwsem); } EXPORT_SYMBOL_GPL(rtnl_link_unregister); @@ -4724,7 +4735,6 @@ static void __net_exit rtnetlink_net_exit(struct net *net) static struct pernet_operations rtnetlink_net_ops = { .init = rtnetlink_net_init, .exit = rtnetlink_net_exit, - .async = true, }; void __init rtnetlink_init(void) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 96d36b81a3a5..b5c75d4fcf37 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3460,6 +3460,19 @@ void *skb_pull_rcsum(struct sk_buff *skb, unsigned int len) } EXPORT_SYMBOL_GPL(skb_pull_rcsum); +static inline skb_frag_t skb_head_frag_to_page_desc(struct sk_buff *frag_skb) +{ + skb_frag_t head_frag; + struct page *page; + + page = virt_to_head_page(frag_skb->head); + head_frag.page.p = page; + head_frag.page_offset = frag_skb->data - + (unsigned char *)page_address(page); + head_frag.size = skb_headlen(frag_skb); + return head_frag; +} + /** * skb_segment - Perform protocol segmentation on skb. * @head_skb: buffer to segment @@ -3664,15 +3677,19 @@ normal: while (pos < offset + len) { if (i >= nfrags) { - BUG_ON(skb_headlen(list_skb)); - i = 0; nfrags = skb_shinfo(list_skb)->nr_frags; frag = skb_shinfo(list_skb)->frags; frag_skb = list_skb; + if (!skb_headlen(list_skb)) { + BUG_ON(!nfrags); + } else { + BUG_ON(!list_skb->head_frag); - BUG_ON(!nfrags); - + /* to make room for head_frag. */ + i--; + frag--; + } if (skb_orphan_frags(frag_skb, GFP_ATOMIC) || skb_zerocopy_clone(nskb, frag_skb, GFP_ATOMIC)) @@ -3689,7 +3706,7 @@ normal: goto err; } - *nskb_frag = *frag; + *nskb_frag = (i < 0) ? skb_head_frag_to_page_desc(frag_skb) : *frag; __skb_frag_ref(nskb_frag); size = skb_frag_size(nskb_frag); @@ -4181,7 +4198,7 @@ int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb) skb_queue_tail(&sk->sk_error_queue, skb); if (!sock_flag(sk, SOCK_DEAD)) - sk->sk_data_ready(sk); + sk->sk_error_report(sk); return 0; } EXPORT_SYMBOL(sock_queue_err_skb); @@ -4893,7 +4910,7 @@ EXPORT_SYMBOL_GPL(skb_scrub_packet); * * The MAC/L2 or network (IP, IPv6) headers are not accounted for. */ -unsigned int skb_gso_transport_seglen(const struct sk_buff *skb) +static unsigned int skb_gso_transport_seglen(const struct sk_buff *skb) { const struct skb_shared_info *shinfo = skb_shinfo(skb); unsigned int thlen = 0; @@ -4906,7 +4923,7 @@ unsigned int skb_gso_transport_seglen(const struct sk_buff *skb) thlen += inner_tcp_hdrlen(skb); } else if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) { thlen = tcp_hdrlen(skb); - } else if (unlikely(shinfo->gso_type & SKB_GSO_SCTP)) { + } else if (unlikely(skb_is_gso_sctp(skb))) { thlen = sizeof(struct sctphdr); } /* UFO sets gso_size to the size of the fragmentation @@ -4915,7 +4932,40 @@ unsigned int skb_gso_transport_seglen(const struct sk_buff *skb) */ return thlen + shinfo->gso_size; } -EXPORT_SYMBOL_GPL(skb_gso_transport_seglen); + +/** + * skb_gso_network_seglen - Return length of individual segments of a gso packet + * + * @skb: GSO skb + * + * skb_gso_network_seglen is used to determine the real size of the + * individual segments, including Layer3 (IP, IPv6) and L4 headers (TCP/UDP). + * + * The MAC/L2 header is not accounted for. + */ +static unsigned int skb_gso_network_seglen(const struct sk_buff *skb) +{ + unsigned int hdr_len = skb_transport_header(skb) - + skb_network_header(skb); + + return hdr_len + skb_gso_transport_seglen(skb); +} + +/** + * skb_gso_mac_seglen - Return length of individual segments of a gso packet + * + * @skb: GSO skb + * + * skb_gso_mac_seglen is used to determine the real size of the + * individual segments, including MAC/L2, Layer3 (IP, IPv6) and L4 + * headers (TCP/UDP). + */ +static unsigned int skb_gso_mac_seglen(const struct sk_buff *skb) +{ + unsigned int hdr_len = skb_transport_header(skb) - skb_mac_header(skb); + + return hdr_len + skb_gso_transport_seglen(skb); +} /** * skb_gso_size_check - check the skb size, considering GSO_BY_FRAGS @@ -4957,19 +5007,20 @@ static inline bool skb_gso_size_check(const struct sk_buff *skb, } /** - * skb_gso_validate_mtu - Return in case such skb fits a given MTU + * skb_gso_validate_network_len - Will a split GSO skb fit into a given MTU? * * @skb: GSO skb * @mtu: MTU to validate against * - * skb_gso_validate_mtu validates if a given skb will fit a wanted MTU - * once split. + * skb_gso_validate_network_len validates if a given skb will fit a + * wanted MTU once split. It considers L3 headers, L4 headers, and the + * payload. */ -bool skb_gso_validate_mtu(const struct sk_buff *skb, unsigned int mtu) +bool skb_gso_validate_network_len(const struct sk_buff *skb, unsigned int mtu) { return skb_gso_size_check(skb, skb_gso_network_seglen(skb), mtu); } -EXPORT_SYMBOL_GPL(skb_gso_validate_mtu); +EXPORT_SYMBOL_GPL(skb_gso_validate_network_len); /** * skb_gso_validate_mac_len - Will a split GSO skb fit in a given length? @@ -4988,13 +5039,16 @@ EXPORT_SYMBOL_GPL(skb_gso_validate_mac_len); static struct sk_buff *skb_reorder_vlan_header(struct sk_buff *skb) { + int mac_len; + if (skb_cow(skb, skb_headroom(skb)) < 0) { kfree_skb(skb); return NULL; } - memmove(skb->data - ETH_HLEN, skb->data - skb->mac_len - VLAN_HLEN, - 2 * ETH_ALEN); + mac_len = skb->data - skb_mac_header(skb); + memmove(skb_mac_header(skb) + VLAN_HLEN, skb_mac_header(skb), + mac_len - VLAN_HLEN - ETH_TLEN); skb->mac_header += VLAN_HLEN; return skb; } diff --git a/net/core/sock.c b/net/core/sock.c index 507d8c6c4319..6444525f610c 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1052,8 +1052,6 @@ set_rcvbuf: if (sk->sk_family == PF_INET || sk->sk_family == PF_INET6) { if (sk->sk_protocol != IPPROTO_TCP) ret = -ENOTSUPP; - else if (sk->sk_state != TCP_CLOSE) - ret = -EBUSY; } else if (sk->sk_family != PF_RDS) { ret = -ENOTSUPP; } @@ -1062,8 +1060,9 @@ set_rcvbuf: ret = -EINVAL; else sock_valbool_flag(sk, SOCK_ZEROCOPY, valbool); - break; } + break; + default: ret = -ENOPROTOOPT; break; @@ -2238,6 +2237,67 @@ bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag) } EXPORT_SYMBOL(sk_page_frag_refill); +int sk_alloc_sg(struct sock *sk, int len, struct scatterlist *sg, + int sg_start, int *sg_curr_index, unsigned int *sg_curr_size, + int first_coalesce) +{ + int sg_curr = *sg_curr_index, use = 0, rc = 0; + unsigned int size = *sg_curr_size; + struct page_frag *pfrag; + struct scatterlist *sge; + + len -= size; + pfrag = sk_page_frag(sk); + + while (len > 0) { + unsigned int orig_offset; + + if (!sk_page_frag_refill(sk, pfrag)) { + rc = -ENOMEM; + goto out; + } + + use = min_t(int, len, pfrag->size - pfrag->offset); + + if (!sk_wmem_schedule(sk, use)) { + rc = -ENOMEM; + goto out; + } + + sk_mem_charge(sk, use); + size += use; + orig_offset = pfrag->offset; + pfrag->offset += use; + + sge = sg + sg_curr - 1; + if (sg_curr > first_coalesce && sg_page(sg) == pfrag->page && + sg->offset + sg->length == orig_offset) { + sg->length += use; + } else { + sge = sg + sg_curr; + sg_unmark_end(sge); + sg_set_page(sge, pfrag->page, use, orig_offset); + get_page(pfrag->page); + sg_curr++; + + if (sg_curr == MAX_SKB_FRAGS) + sg_curr = 0; + + if (sg_curr == sg_start) { + rc = -ENOSPC; + break; + } + } + + len -= use; + } +out: + *sg_curr_size = size; + *sg_curr_index = sg_curr; + return rc; +} +EXPORT_SYMBOL(sk_alloc_sg); + static void __lock_sock(struct sock *sk) __releases(&sk->sk_lock.slock) __acquires(&sk->sk_lock.slock) @@ -3115,7 +3175,6 @@ static void __net_exit sock_inuse_exit_net(struct net *net) static struct pernet_operations net_inuse_ops = { .init = sock_inuse_init_net, .exit = sock_inuse_exit_net, - .async = true, }; static __init int net_inuse_init(void) @@ -3266,6 +3325,27 @@ void proto_unregister(struct proto *prot) } EXPORT_SYMBOL(proto_unregister); +int sock_load_diag_module(int family, int protocol) +{ + if (!protocol) { + if (!sock_is_registered(family)) + return -ENOENT; + + return request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK, + NETLINK_SOCK_DIAG, family); + } + +#ifdef CONFIG_INET + if (family == AF_INET && + !rcu_access_pointer(inet_protos[protocol])) + return -ENOENT; +#endif + + return request_module("net-pf-%d-proto-%d-type-%d-%d", PF_NETLINK, + NETLINK_SOCK_DIAG, family, protocol); +} +EXPORT_SYMBOL(sock_load_diag_module); + #ifdef CONFIG_PROC_FS static void *proto_seq_start(struct seq_file *seq, loff_t *pos) __acquires(proto_list_mutex) @@ -3374,7 +3454,7 @@ static const struct file_operations proto_seq_fops = { static __net_init int proto_init_net(struct net *net) { - if (!proc_create("protocols", S_IRUGO, net->proc_net, &proto_seq_fops)) + if (!proc_create("protocols", 0444, net->proc_net, &proto_seq_fops)) return -ENOMEM; return 0; @@ -3389,7 +3469,6 @@ static __net_exit void proto_exit_net(struct net *net) static __net_initdata struct pernet_operations proto_net_ops = { .init = proto_init_net, .exit = proto_exit_net, - .async = true, }; static int __init proto_init(void) diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c index aee5642affd9..c37b5be7c5e4 100644 --- a/net/core/sock_diag.c +++ b/net/core/sock_diag.c @@ -220,8 +220,7 @@ static int __sock_diag_cmd(struct sk_buff *skb, struct nlmsghdr *nlh) return -EINVAL; if (sock_diag_handlers[req->sdiag_family] == NULL) - request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK, - NETLINK_SOCK_DIAG, req->sdiag_family); + sock_load_diag_module(req->sdiag_family, 0); mutex_lock(&sock_diag_table_mutex); hndl = sock_diag_handlers[req->sdiag_family]; @@ -247,8 +246,7 @@ static int sock_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, case TCPDIAG_GETSOCK: case DCCPDIAG_GETSOCK: if (inet_rcv_compat == NULL) - request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK, - NETLINK_SOCK_DIAG, AF_INET); + sock_load_diag_module(AF_INET, 0); mutex_lock(&sock_diag_table_mutex); if (inet_rcv_compat != NULL) @@ -281,14 +279,12 @@ static int sock_diag_bind(struct net *net, int group) case SKNLGRP_INET_TCP_DESTROY: case SKNLGRP_INET_UDP_DESTROY: if (!sock_diag_handlers[AF_INET]) - request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK, - NETLINK_SOCK_DIAG, AF_INET); + sock_load_diag_module(AF_INET, 0); break; case SKNLGRP_INET6_TCP_DESTROY: case SKNLGRP_INET6_UDP_DESTROY: if (!sock_diag_handlers[AF_INET6]) - request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK, - NETLINK_SOCK_DIAG, AF_INET6); + sock_load_diag_module(AF_INET6, 0); break; } return 0; @@ -328,7 +324,6 @@ static void __net_exit diag_net_exit(struct net *net) static struct pernet_operations diag_net_ops = { .init = diag_net_init, .exit = diag_net_exit, - .async = true, }; static int __init sock_diag_init(void) diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index d714f65782b7..b3b609f0eeb5 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -32,6 +32,9 @@ static int max_skb_frags = MAX_SKB_FRAGS; static int net_msg_warn; /* Unused, but still a sysctl */ +int sysctl_fb_tunnels_only_for_init_net __read_mostly = 0; +EXPORT_SYMBOL(sysctl_fb_tunnels_only_for_init_net); + #ifdef CONFIG_RPS static int rps_sock_flow_sysctl(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) @@ -513,6 +516,15 @@ static struct ctl_table net_core_table[] = { .proc_handler = proc_dointvec_minmax, .extra1 = &zero, }, + { + .procname = "fb_tunnels_only_for_init_net", + .data = &sysctl_fb_tunnels_only_for_init_net, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &one, + }, { } }; @@ -572,7 +584,6 @@ static __net_exit void sysctl_core_net_exit(struct net *net) static __net_initdata struct pernet_operations sysctl_core_ops = { .init = sysctl_core_net_init, .exit = sysctl_core_net_exit, - .async = true, }; static __init int sysctl_core_init(void) diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 13ad28ab1e79..e65fcb45c3f6 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -1031,7 +1031,6 @@ static struct pernet_operations dccp_v4_ops = { .init = dccp_v4_init_net, .exit = dccp_v4_exit_net, .exit_batch = dccp_v4_exit_batch, - .async = true, }; static int __init dccp_v4_init(void) diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 2f48c020f8c3..5df7857fc0f3 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -1116,7 +1116,6 @@ static struct pernet_operations dccp_v6_ops = { .init = dccp_v6_init_net, .exit = dccp_v6_exit_net, .exit_batch = dccp_v6_exit_batch, - .async = true, }; static int __init dccp_v6_init(void) diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 15bdc002d90c..84cd4e3fd01b 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -794,6 +794,11 @@ int dccp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) if (skb == NULL) goto out_release; + if (sk->sk_state == DCCP_CLOSED) { + rc = -ENOTCONN; + goto out_discard; + } + skb_reserve(skb, sk->sk_prot->max_header); rc = memcpy_from_msg(skb_put(skb, len), msg, len); if (rc != 0) diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index 2ee8306c23e3..32751602767f 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -2383,7 +2383,7 @@ static int __init decnet_init(void) dev_add_pack(&dn_dix_packet_type); register_netdevice_notifier(&dn_dev_notifier); - proc_create("decnet", S_IRUGO, init_net.proc_net, &dn_socket_seq_fops); + proc_create("decnet", 0444, init_net.proc_net, &dn_socket_seq_fops); dn_register_sysctl(); out: return rc; diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index c9f5e1ebb9c8..c03b046478c3 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c @@ -1424,7 +1424,7 @@ void __init dn_dev_init(void) rtnl_register_module(THIS_MODULE, PF_DECnet, RTM_GETADDR, NULL, dn_nl_dump_ifaddr, 0); - proc_create("decnet_dev", S_IRUGO, init_net.proc_net, &dn_dev_seq_fops); + proc_create("decnet_dev", 0444, init_net.proc_net, &dn_dev_seq_fops); #ifdef CONFIG_SYSCTL { diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c index 6e37d9e6345e..13156165afa3 100644 --- a/net/decnet/dn_neigh.c +++ b/net/decnet/dn_neigh.c @@ -608,7 +608,7 @@ static const struct file_operations dn_neigh_seq_fops = { void __init dn_neigh_init(void) { neigh_table_init(NEIGH_DN_TABLE, &dn_neigh_table); - proc_create("decnet_neigh", S_IRUGO, init_net.proc_net, + proc_create("decnet_neigh", 0444, init_net.proc_net, &dn_neigh_seq_fops); } diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index ef20b8e31669..eca0cc6b761f 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -1918,7 +1918,7 @@ void __init dn_route_init(void) dn_dst_ops.gc_thresh = (dn_rt_hash_mask + 1); - proc_create("decnet_cache", S_IRUGO, init_net.proc_net, + proc_create("decnet_cache", 0444, init_net.proc_net, &dn_rt_cache_seq_fops); #ifdef CONFIG_DECNET_ROUTER diff --git a/net/dns_resolver/dns_key.c b/net/dns_resolver/dns_key.c index e1d4d898a007..8396705deffc 100644 --- a/net/dns_resolver/dns_key.c +++ b/net/dns_resolver/dns_key.c @@ -38,7 +38,7 @@ MODULE_AUTHOR("Wang Lei"); MODULE_LICENSE("GPL"); unsigned int dns_resolver_debug; -module_param_named(debug, dns_resolver_debug, uint, S_IWUSR | S_IRUGO); +module_param_named(debug, dns_resolver_debug, uint, 0644); MODULE_PARM_DESC(debug, "DNS Resolver debugging mask"); const struct cred *dns_resolver_cache; diff --git a/net/dsa/legacy.c b/net/dsa/legacy.c index cb54b81d0bd9..42a7b85b84e1 100644 --- a/net/dsa/legacy.c +++ b/net/dsa/legacy.c @@ -194,7 +194,7 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, ds->ports[i].dn = cd->port_dn[i]; ds->ports[i].cpu_dp = dst->cpu_dp; - if (dsa_is_user_port(ds, i)) + if (!dsa_is_user_port(ds, i)) continue; ret = dsa_slave_create(&ds->ports[i]); diff --git a/net/ieee802154/6lowpan/core.c b/net/ieee802154/6lowpan/core.c index e4f305320519..275449b0d633 100644 --- a/net/ieee802154/6lowpan/core.c +++ b/net/ieee802154/6lowpan/core.c @@ -207,9 +207,13 @@ static inline void lowpan_netlink_fini(void) static int lowpan_device_event(struct notifier_block *unused, unsigned long event, void *ptr) { - struct net_device *wdev = netdev_notifier_info_to_dev(ptr); + struct net_device *ndev = netdev_notifier_info_to_dev(ptr); + struct wpan_dev *wpan_dev; - if (wdev->type != ARPHRD_IEEE802154) + if (ndev->type != ARPHRD_IEEE802154) + return NOTIFY_DONE; + wpan_dev = ndev->ieee802154_ptr; + if (!wpan_dev) return NOTIFY_DONE; switch (event) { @@ -218,8 +222,8 @@ static int lowpan_device_event(struct notifier_block *unused, * also delete possible lowpan interfaces which belongs * to the wpan interface. */ - if (wdev->ieee802154_ptr->lowpan_dev) - lowpan_dellink(wdev->ieee802154_ptr->lowpan_dev, NULL); + if (wpan_dev->lowpan_dev) + lowpan_dellink(wpan_dev->lowpan_dev, NULL); break; default: return NOTIFY_DONE; diff --git a/net/ieee802154/core.c b/net/ieee802154/core.c index 9104943c15ba..cb7176cd4cd6 100644 --- a/net/ieee802154/core.c +++ b/net/ieee802154/core.c @@ -345,7 +345,6 @@ static void __net_exit cfg802154_pernet_exit(struct net *net) static struct pernet_operations cfg802154_pernet_ops = { .exit = cfg802154_pernet_exit, - .async = true, }; static int __init wpan_phy_class_init(void) diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index e8c7fad8c329..f98e2f0db841 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1735,7 +1735,6 @@ static __net_exit void ipv4_mib_exit_net(struct net *net) static __net_initdata struct pernet_operations ipv4_mib_ops = { .init = ipv4_mib_init_net, .exit = ipv4_mib_exit_net, - .async = true, }; static int __init init_ipv4_mibs(void) @@ -1789,7 +1788,6 @@ static __net_exit void inet_exit_net(struct net *net) static __net_initdata struct pernet_operations af_inet_ops = { .init = inet_init_net, .exit = inet_exit_net, - .async = true, }; static int __init init_inet_pernet_ops(void) diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 7dc9de8444a9..be4c595edccb 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -1434,7 +1434,7 @@ static const struct file_operations arp_seq_fops = { static int __net_init arp_net_init(struct net *net) { - if (!proc_create("arp", S_IRUGO, net->proc_net, &arp_seq_fops)) + if (!proc_create("arp", 0444, net->proc_net, &arp_seq_fops)) return -ENOMEM; return 0; } @@ -1447,7 +1447,6 @@ static void __net_exit arp_net_exit(struct net *net) static struct pernet_operations arp_net_ops = { .init = arp_net_init, .exit = arp_net_exit, - .async = true, }; static int __init arp_proc_init(void) diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 5ae0d1f097ca..40f001782c1b 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -2469,7 +2469,6 @@ static __net_exit void devinet_exit_net(struct net *net) static __net_initdata struct pernet_operations devinet_ops = { .init = devinet_init_net, .exit = devinet_exit_net, - .async = true, }; static struct rtnl_af_ops inet_af_ops __read_mostly = { diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 296d0b956bfe..97689012b357 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -654,7 +654,7 @@ static void esp_input_restore_header(struct sk_buff *skb) static void esp_input_set_header(struct sk_buff *skb, __be32 *seqhi) { struct xfrm_state *x = xfrm_input_state(skb); - struct ip_esp_hdr *esph = (struct ip_esp_hdr *)skb->data; + struct ip_esp_hdr *esph; /* For ESN we move the header forward by 4 bytes to * accomodate the high bits. We will move it back after diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c index da5635fc52c2..7cf755ef9efb 100644 --- a/net/ipv4/esp4_offload.c +++ b/net/ipv4/esp4_offload.c @@ -138,6 +138,8 @@ static struct sk_buff *esp4_gso_segment(struct sk_buff *skb, if (!(features & NETIF_F_HW_ESP) || !x->xso.offload_handle || (x->xso.dev != skb->dev)) esp_features = features & ~(NETIF_F_SG | NETIF_F_CSUM_MASK); + else if (!(features & NETIF_F_HW_ESP_TX_CSUM)) + esp_features = features & ~NETIF_F_CSUM_MASK; xo->flags |= XFRM_GSO_SEGMENT; diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index ac71c3d496c0..f05afaf3235c 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -1362,7 +1362,6 @@ static void __net_exit fib_net_exit(struct net *net) static struct pernet_operations fib_net_ops = { .init = fib_net_init, .exit = fib_net_exit, - .async = true, }; void __init ip_fib_init(void) diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 62243a8abf92..3dcffd3ce98c 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1065,6 +1065,9 @@ noleaf: return -ENOMEM; } +/* fib notifier for ADD is sent before calling fib_insert_alias with + * the expectation that the only possible failure ENOMEM + */ static int fib_insert_alias(struct trie *t, struct key_vector *tp, struct key_vector *l, struct fib_alias *new, struct fib_alias *fa, t_key key) @@ -1216,8 +1219,13 @@ int fib_table_insert(struct net *net, struct fib_table *tb, new_fa->tb_id = tb->tb_id; new_fa->fa_default = -1; - call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE, - key, plen, new_fa, extack); + err = call_fib_entry_notifiers(net, + FIB_EVENT_ENTRY_REPLACE, + key, plen, new_fa, + extack); + if (err) + goto out_free_new_fa; + rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id, &cfg->fc_nlinfo, nlflags); @@ -1263,21 +1271,32 @@ int fib_table_insert(struct net *net, struct fib_table *tb, new_fa->tb_id = tb->tb_id; new_fa->fa_default = -1; + err = call_fib_entry_notifiers(net, event, key, plen, new_fa, extack); + if (err) + goto out_free_new_fa; + /* Insert new entry to the list. */ err = fib_insert_alias(t, tp, l, new_fa, fa, key); if (err) - goto out_free_new_fa; + goto out_fib_notif; if (!plen) tb->tb_num_default++; rt_cache_flush(cfg->fc_nlinfo.nl_net); - call_fib_entry_notifiers(net, event, key, plen, new_fa, extack); rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, new_fa->tb_id, &cfg->fc_nlinfo, nlflags); succeeded: return 0; +out_fib_notif: + /* notifier was sent that entry would be added to trie, but + * the add failed and need to recover. Only failure for + * fib_insert_alias is ENOMEM. + */ + NL_SET_ERR_MSG(extack, "Failed to insert route into trie"); + call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, key, + plen, new_fa, NULL); out_free_new_fa: kmem_cache_free(fn_alias_kmem, new_fa); out: @@ -2722,14 +2741,14 @@ static const struct file_operations fib_route_fops = { int __net_init fib_proc_init(struct net *net) { - if (!proc_create("fib_trie", S_IRUGO, net->proc_net, &fib_trie_fops)) + if (!proc_create("fib_trie", 0444, net->proc_net, &fib_trie_fops)) goto out1; - if (!proc_create("fib_triestat", S_IRUGO, net->proc_net, + if (!proc_create("fib_triestat", 0444, net->proc_net, &fib_triestat_fops)) goto out2; - if (!proc_create("route", S_IRUGO, net->proc_net, &fib_route_fops)) + if (!proc_create("route", 0444, net->proc_net, &fib_route_fops)) goto out3; return 0; diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c index d3e1a9af478b..1540db65241a 100644 --- a/net/ipv4/fou.c +++ b/net/ipv4/fou.c @@ -1081,7 +1081,6 @@ static struct pernet_operations fou_net_ops = { .exit = fou_exit_net, .id = &fou_net_id, .size = sizeof(struct fou_net), - .async = true, }; static int __init fou_init(void) diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index cc56efa64d5c..1617604c9284 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -1257,7 +1257,6 @@ fail: static struct pernet_operations __net_initdata icmp_sk_ops = { .init = icmp_sk_init, .exit = icmp_sk_exit, - .async = true, }; int __init icmp_init(void) diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index c2743763777e..b26a81a7de42 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -2993,10 +2993,10 @@ static int __net_init igmp_net_init(struct net *net) struct proc_dir_entry *pde; int err; - pde = proc_create("igmp", S_IRUGO, net->proc_net, &igmp_mc_seq_fops); + pde = proc_create("igmp", 0444, net->proc_net, &igmp_mc_seq_fops); if (!pde) goto out_igmp; - pde = proc_create("mcfilter", S_IRUGO, net->proc_net, + pde = proc_create("mcfilter", 0444, net->proc_net, &igmp_mcf_seq_fops); if (!pde) goto out_mcfilter; @@ -3028,7 +3028,6 @@ static void __net_exit igmp_net_exit(struct net *net) static struct pernet_operations igmp_net_ops = { .init = igmp_net_init, .exit = igmp_net_exit, - .async = true, }; #endif diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index a383f299ce24..4e5bc4b2f14e 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -53,8 +53,7 @@ static DEFINE_MUTEX(inet_diag_table_mutex); static const struct inet_diag_handler *inet_diag_lock_handler(int proto) { if (!inet_diag_table[proto]) - request_module("net-pf-%d-proto-%d-type-%d-%d", PF_NETLINK, - NETLINK_SOCK_DIAG, AF_INET, proto); + sock_load_diag_module(AF_INET, proto); mutex_lock(&inet_diag_table_mutex); if (!inet_diag_table[proto]) diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index 26a3d0315728..e8ec28999f5c 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -119,6 +119,9 @@ out: static bool inet_fragq_should_evict(const struct inet_frag_queue *q) { + if (!hlist_unhashed(&q->list_evictor)) + return false; + return q->net->low_thresh == 0 || frag_mem_limit(q->net) >= q->net->low_thresh; } diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c index 2dd21c3281a1..b54b948b0596 100644 --- a/net/ipv4/ip_forward.c +++ b/net/ipv4/ip_forward.c @@ -55,7 +55,7 @@ static bool ip_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu) if (skb->ignore_df) return false; - if (skb_is_gso(skb) && skb_gso_validate_mtu(skb, mtu)) + if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu)) return false; return true; diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 5e843ae5e468..bbf1b94942c0 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -885,7 +885,6 @@ static void __net_exit ipv4_frags_exit_net(struct net *net) static struct pernet_operations ip4_frags_ops = { .init = ipv4_frags_init_net, .exit = ipv4_frags_exit_net, - .async = true, }; void __init ipfrag_init(void) diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 95fd225f402e..a8772a978224 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -550,7 +550,7 @@ static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev, (TUNNEL_CSUM | TUNNEL_KEY | TUNNEL_SEQ); gre_build_header(skb, tunnel_hlen, flags, proto, tunnel_id_to_key32(tun_info->key.tun_id), - (flags | TUNNEL_SEQ) ? htonl(tunnel->o_seqno++) : 0); + (flags & TUNNEL_SEQ) ? htonl(tunnel->o_seqno++) : 0); df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0; @@ -973,9 +973,6 @@ static void __gre_tunnel_init(struct net_device *dev) t_hlen = tunnel->hlen + sizeof(struct iphdr); - dev->needed_headroom = LL_MAX_HEADER + t_hlen + 4; - dev->mtu = ETH_DATA_LEN - t_hlen - 4; - dev->features |= GRE_FEATURES; dev->hw_features |= GRE_FEATURES; @@ -1047,7 +1044,6 @@ static struct pernet_operations ipgre_net_ops = { .exit_batch = ipgre_exit_batch_net, .id = &ipgre_net_id, .size = sizeof(struct ip_tunnel_net), - .async = true, }; static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[], @@ -1294,8 +1290,6 @@ static int erspan_tunnel_init(struct net_device *dev) erspan_hdr_len(tunnel->erspan_ver); t_hlen = tunnel->hlen + sizeof(struct iphdr); - dev->needed_headroom = LL_MAX_HEADER + t_hlen + 4; - dev->mtu = ETH_DATA_LEN - t_hlen - 4; dev->features |= GRE_FEATURES; dev->hw_features |= GRE_FEATURES; dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; @@ -1633,7 +1627,6 @@ static struct pernet_operations ipgre_tap_net_ops = { .exit_batch = ipgre_tap_exit_batch_net, .id = &gre_tap_net_id, .size = sizeof(struct ip_tunnel_net), - .async = true, }; static int __net_init erspan_init_net(struct net *net) @@ -1652,7 +1645,6 @@ static struct pernet_operations erspan_net_ops = { .exit_batch = erspan_exit_batch_net, .id = &erspan_net_id, .size = sizeof(struct ip_tunnel_net), - .async = true, }; static int __init ipgre_init(void) diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 57fc13c6ab2b..7582713dd18f 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -159,7 +159,7 @@ bool ip_call_ra_chain(struct sk_buff *skb) struct net_device *dev = skb->dev; struct net *net = dev_net(dev); - for (ra = rcu_dereference(ip_ra_chain); ra; ra = rcu_dereference(ra->next)) { + for (ra = rcu_dereference(net->ipv4.ra_chain); ra; ra = rcu_dereference(ra->next)) { struct sock *sk = ra->sk; /* If socket is bound to an interface, only report @@ -167,8 +167,7 @@ bool ip_call_ra_chain(struct sk_buff *skb) */ if (sk && inet_sk(sk)->inet_num == protocol && (!sk->sk_bound_dev_if || - sk->sk_bound_dev_if == dev->ifindex) && - net_eq(sock_net(sk), net)) { + sk->sk_bound_dev_if == dev->ifindex)) { if (ip_is_fragment(ip_hdr(skb))) { if (ip_defrag(net, skb, IP_DEFRAG_CALL_RA_CHAIN)) return true; diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index e8e675be60ec..66340ab750e6 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -248,7 +248,7 @@ static int ip_finish_output_gso(struct net *net, struct sock *sk, /* common case: seglen is <= mtu */ - if (skb_gso_validate_mtu(skb, mtu)) + if (skb_gso_validate_network_len(skb, mtu)) return ip_finish_output2(net, sk, skb); /* Slowpath - GSO segment length exceeds the egress MTU. diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 74c962b9b09c..5ad2d8ed3a3f 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -322,20 +322,6 @@ int ip_cmsg_send(struct sock *sk, struct msghdr *msg, struct ipcm_cookie *ipc, return 0; } - -/* Special input handler for packets caught by router alert option. - They are selected only by protocol field, and then processed likely - local ones; but only if someone wants them! Otherwise, router - not running rsvpd will kill RSVP. - - It is user level problem, what it will make with them. - I have no idea, how it will masquearde or NAT them (it is joke, joke :-)), - but receiver should be enough clever f.e. to forward mtrace requests, - sent to multicast group to reach destination designated router. - */ -struct ip_ra_chain __rcu *ip_ra_chain; - - static void ip_ra_destroy_rcu(struct rcu_head *head) { struct ip_ra_chain *ra = container_of(head, struct ip_ra_chain, rcu); @@ -349,23 +335,28 @@ int ip_ra_control(struct sock *sk, unsigned char on, { struct ip_ra_chain *ra, *new_ra; struct ip_ra_chain __rcu **rap; + struct net *net = sock_net(sk); if (sk->sk_type != SOCK_RAW || inet_sk(sk)->inet_num == IPPROTO_RAW) return -EINVAL; new_ra = on ? kmalloc(sizeof(*new_ra), GFP_KERNEL) : NULL; - for (rap = &ip_ra_chain; - (ra = rtnl_dereference(*rap)) != NULL; + mutex_lock(&net->ipv4.ra_mutex); + for (rap = &net->ipv4.ra_chain; + (ra = rcu_dereference_protected(*rap, + lockdep_is_held(&net->ipv4.ra_mutex))) != NULL; rap = &ra->next) { if (ra->sk == sk) { if (on) { + mutex_unlock(&net->ipv4.ra_mutex); kfree(new_ra); return -EADDRINUSE; } /* dont let ip_call_ra_chain() use sk again */ ra->sk = NULL; RCU_INIT_POINTER(*rap, ra->next); + mutex_unlock(&net->ipv4.ra_mutex); if (ra->destructor) ra->destructor(sk); @@ -379,14 +370,17 @@ int ip_ra_control(struct sock *sk, unsigned char on, return 0; } } - if (!new_ra) + if (!new_ra) { + mutex_unlock(&net->ipv4.ra_mutex); return -ENOBUFS; + } new_ra->sk = sk; new_ra->destructor = destructor; RCU_INIT_POINTER(new_ra->next, ra); rcu_assign_pointer(*rap, new_ra); sock_hold(sk); + mutex_unlock(&net->ipv4.ra_mutex); return 0; } @@ -586,7 +580,6 @@ static bool setsockopt_needs_rtnl(int optname) case MCAST_LEAVE_GROUP: case MCAST_LEAVE_SOURCE_GROUP: case MCAST_UNBLOCK_SOURCE: - case IP_ROUTER_ALERT: return true; } return false; @@ -639,6 +632,8 @@ static int do_ip_setsockopt(struct sock *sk, int level, /* If optlen==0, it is equivalent to val == 0 */ + if (optname == IP_ROUTER_ALERT) + return ip_ra_control(sk, val ? 1 : 0, NULL); if (ip_mroute_opt(optname)) return ip_mroute_setsockopt(sk, optname, optval, optlen); @@ -1149,9 +1144,6 @@ mc_msf_out: goto e_inval; inet->mc_all = val; break; - case IP_ROUTER_ALERT: - err = ip_ra_control(sk, val ? 1 : 0, NULL); - break; case IP_FREEBIND: if (optlen < 1) diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index b2117d89bc83..5fcb17cb426b 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -347,8 +347,7 @@ static struct ip_tunnel *ip_tunnel_create(struct net *net, struct net_device *dev; int t_hlen; - BUG_ON(!itn->fb_tunnel_dev); - dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms); + dev = __ip_tunnel_create(net, itn->rtnl_link_ops, parms); if (IS_ERR(dev)) return ERR_CAST(dev); @@ -694,16 +693,9 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, } } - if (tunnel->fwmark) { - ip_tunnel_init_flow(&fl4, protocol, dst, tnl_params->saddr, - tunnel->parms.o_key, RT_TOS(tos), - tunnel->parms.link, tunnel->fwmark); - } - else { - ip_tunnel_init_flow(&fl4, protocol, dst, tnl_params->saddr, - tunnel->parms.o_key, RT_TOS(tos), - tunnel->parms.link, skb->mark); - } + ip_tunnel_init_flow(&fl4, protocol, dst, tnl_params->saddr, + tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link, + tunnel->fwmark); if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0) goto tx_error; @@ -829,7 +821,6 @@ int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd) struct net *net = t->net; struct ip_tunnel_net *itn = net_generic(net, t->ip_tnl_net_id); - BUG_ON(!itn->fb_tunnel_dev); switch (cmd) { case SIOCGETTUNNEL: if (dev == itn->fb_tunnel_dev) { @@ -854,7 +845,7 @@ int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd) p->o_key = 0; } - t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type); + t = ip_tunnel_find(itn, p, itn->type); if (cmd == SIOCADDTUNNEL) { if (!t) { @@ -998,10 +989,15 @@ int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id, struct ip_tunnel_parm parms; unsigned int i; + itn->rtnl_link_ops = ops; for (i = 0; i < IP_TNL_HASH_SIZE; i++) INIT_HLIST_HEAD(&itn->tunnels[i]); - if (!ops) { + if (!ops || !net_has_fallback_tunnels(net)) { + struct ip_tunnel_net *it_init_net; + + it_init_net = net_generic(&init_net, ip_tnl_net_id); + itn->type = it_init_net->type; itn->fb_tunnel_dev = NULL; return 0; } @@ -1019,6 +1015,7 @@ int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id, itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL; itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev); ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev)); + itn->type = itn->fb_tunnel_dev->type; } rtnl_unlock(); @@ -1026,10 +1023,10 @@ int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id, } EXPORT_SYMBOL_GPL(ip_tunnel_init_net); -static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head, +static void ip_tunnel_destroy(struct net *net, struct ip_tunnel_net *itn, + struct list_head *head, struct rtnl_link_ops *ops) { - struct net *net = dev_net(itn->fb_tunnel_dev); struct net_device *dev, *aux; int h; @@ -1061,7 +1058,7 @@ void ip_tunnel_delete_nets(struct list_head *net_list, unsigned int id, rtnl_lock(); list_for_each_entry(net, net_list, exit_list) { itn = net_generic(net, id); - ip_tunnel_destroy(itn, &list, ops); + ip_tunnel_destroy(net, itn, &list, ops); } unregister_netdevice_many(&list); rtnl_unlock(); diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index b10bf563afd9..51b1669334fe 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -454,7 +454,6 @@ static struct pernet_operations vti_net_ops = { .exit_batch = vti_exit_batch_net, .id = &vti_net_id, .size = sizeof(struct ip_tunnel_net), - .async = true, }; static int vti_tunnel_validate(struct nlattr *tb[], struct nlattr *data[], diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index f75802ad960f..43f620feb1c4 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -1369,7 +1369,7 @@ static int __init ip_auto_config(void) unsigned int i; #ifdef CONFIG_PROC_FS - proc_create("pnp", S_IRUGO, init_net.proc_net, &pnp_seq_fops); + proc_create("pnp", 0444, init_net.proc_net, &pnp_seq_fops); #endif /* CONFIG_PROC_FS */ if (!ic_enable) diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 9c5a4d164f09..c891235b4966 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -669,7 +669,6 @@ static struct pernet_operations ipip_net_ops = { .exit_batch = ipip_exit_batch_net, .id = &ipip_net_id, .size = sizeof(struct ip_tunnel_net), - .async = true, }; static int __init ipip_init(void) diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index d752a70855d8..2fb4de3f7f66 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -644,80 +644,22 @@ static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt) } #endif -static int call_ipmr_vif_entry_notifier(struct notifier_block *nb, - struct net *net, - enum fib_event_type event_type, - struct vif_device *vif, - vifi_t vif_index, u32 tb_id) -{ - struct vif_entry_notifier_info info = { - .info = { - .family = RTNL_FAMILY_IPMR, - .net = net, - }, - .dev = vif->dev, - .vif_index = vif_index, - .vif_flags = vif->flags, - .tb_id = tb_id, - }; - - return call_fib_notifier(nb, net, event_type, &info.info); -} - static int call_ipmr_vif_entry_notifiers(struct net *net, enum fib_event_type event_type, struct vif_device *vif, vifi_t vif_index, u32 tb_id) { - struct vif_entry_notifier_info info = { - .info = { - .family = RTNL_FAMILY_IPMR, - .net = net, - }, - .dev = vif->dev, - .vif_index = vif_index, - .vif_flags = vif->flags, - .tb_id = tb_id, - }; - - ASSERT_RTNL(); - net->ipv4.ipmr_seq++; - return call_fib_notifiers(net, event_type, &info.info); -} - -static int call_ipmr_mfc_entry_notifier(struct notifier_block *nb, - struct net *net, - enum fib_event_type event_type, - struct mfc_cache *mfc, u32 tb_id) -{ - struct mfc_entry_notifier_info info = { - .info = { - .family = RTNL_FAMILY_IPMR, - .net = net, - }, - .mfc = mfc, - .tb_id = tb_id - }; - - return call_fib_notifier(nb, net, event_type, &info.info); + return mr_call_vif_notifiers(net, RTNL_FAMILY_IPMR, event_type, + vif, vif_index, tb_id, + &net->ipv4.ipmr_seq); } static int call_ipmr_mfc_entry_notifiers(struct net *net, enum fib_event_type event_type, struct mfc_cache *mfc, u32 tb_id) { - struct mfc_entry_notifier_info info = { - .info = { - .family = RTNL_FAMILY_IPMR, - .net = net, - }, - .mfc = mfc, - .tb_id = tb_id - }; - - ASSERT_RTNL(); - net->ipv4.ipmr_seq++; - return call_fib_notifiers(net, event_type, &info.info); + return mr_call_mfc_notifiers(net, RTNL_FAMILY_IPMR, event_type, + &mfc->_c, tb_id, &net->ipv4.ipmr_seq); } /** @@ -790,11 +732,10 @@ static void ipmr_cache_free_rcu(struct rcu_head *head) kmem_cache_free(mrt_cachep, (struct mfc_cache *)c); } -void ipmr_cache_free(struct mfc_cache *c) +static void ipmr_cache_free(struct mfc_cache *c) { call_rcu(&c->_c.rcu, ipmr_cache_free_rcu); } -EXPORT_SYMBOL(ipmr_cache_free); /* Destroy an unresolved cache entry, killing queued skbs * and reporting error to netlink readers. @@ -1045,6 +986,7 @@ static struct mfc_cache *ipmr_cache_alloc(void) if (c) { c->_c.mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1; c->_c.mfc_un.res.minvif = MAXVIFS; + c->_c.free = ipmr_cache_free_rcu; refcount_set(&c->_c.mfc_un.res.refcount, 1); } return c; @@ -1264,7 +1206,7 @@ static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc, int parent) list_del_rcu(&c->_c.list); call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, c, mrt->id); mroute_netlink_event(mrt, c, RTM_DELROUTE); - ipmr_cache_put(c); + mr_cache_put(&c->_c); return 0; } @@ -1376,7 +1318,7 @@ static void mroute_clean_tables(struct mr_table *mrt, bool all) call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, cache, mrt->id); mroute_netlink_event(mrt, cache, RTM_DELROUTE); - ipmr_cache_put(cache); + mr_cache_put(c); } if (atomic_read(&mrt->cache_resolve_queue_len) != 0) { @@ -1399,7 +1341,7 @@ static void mrtsock_destruct(struct sock *sk) struct net *net = sock_net(sk); struct mr_table *mrt; - ASSERT_RTNL(); + rtnl_lock(); ipmr_for_each_table(mrt, net) { if (sk == rtnl_dereference(mrt->mroute_sk)) { IPV4_DEVCONF_ALL(net, MC_FORWARDING)--; @@ -1411,6 +1353,7 @@ static void mrtsock_destruct(struct sock *sk) mroute_clean_tables(mrt, false); } } + rtnl_unlock(); } /* Socket options and virtual interface manipulation. The whole @@ -1475,8 +1418,13 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, if (sk != rcu_access_pointer(mrt->mroute_sk)) { ret = -EACCES; } else { + /* We need to unlock here because mrtsock_destruct takes + * care of rtnl itself and we can't change that due to + * the IP_ROUTER_ALERT setsockopt which runs without it. + */ + rtnl_unlock(); ret = ip_ra_control(sk, 0, NULL); - goto out_unlock; + goto out; } break; case MRT_ADD_VIF: @@ -1588,6 +1536,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, } out_unlock: rtnl_unlock(); +out: return ret; } @@ -2982,38 +2931,8 @@ static unsigned int ipmr_seq_read(struct net *net) static int ipmr_dump(struct net *net, struct notifier_block *nb) { - struct mr_table *mrt; - int err; - - err = ipmr_rules_dump(net, nb); - if (err) - return err; - - ipmr_for_each_table(mrt, net) { - struct vif_device *v = &mrt->vif_table[0]; - struct mr_mfc *mfc; - int vifi; - - /* Notifiy on table VIF entries */ - read_lock(&mrt_lock); - for (vifi = 0; vifi < mrt->maxvif; vifi++, v++) { - if (!v->dev) - continue; - - call_ipmr_vif_entry_notifier(nb, net, FIB_EVENT_VIF_ADD, - v, vifi, mrt->id); - } - read_unlock(&mrt_lock); - - /* Notify on table MFC entries */ - list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list) - call_ipmr_mfc_entry_notifier(nb, net, - FIB_EVENT_ENTRY_ADD, - (struct mfc_cache *)mfc, - mrt->id); - } - - return 0; + return mr_dump(net, nb, RTNL_FAMILY_IPMR, ipmr_rules_dump, + ipmr_mr_table_iter, &mrt_lock); } static const struct fib_notifier_ops ipmr_notifier_ops_template = { @@ -3090,7 +3009,6 @@ static void __net_exit ipmr_net_exit(struct net *net) static struct pernet_operations ipmr_net_ops = { .init = ipmr_net_init, .exit = ipmr_net_exit, - .async = true, }; int __init ip_mr_init(void) diff --git a/net/ipv4/ipmr_base.c b/net/ipv4/ipmr_base.c index 8ba55bfda817..4fe97723b53f 100644 --- a/net/ipv4/ipmr_base.c +++ b/net/ipv4/ipmr_base.c @@ -321,3 +321,45 @@ done: return skb->len; } EXPORT_SYMBOL(mr_rtm_dumproute); + +int mr_dump(struct net *net, struct notifier_block *nb, unsigned short family, + int (*rules_dump)(struct net *net, + struct notifier_block *nb), + struct mr_table *(*mr_iter)(struct net *net, + struct mr_table *mrt), + rwlock_t *mrt_lock) +{ + struct mr_table *mrt; + int err; + + err = rules_dump(net, nb); + if (err) + return err; + + for (mrt = mr_iter(net, NULL); mrt; mrt = mr_iter(net, mrt)) { + struct vif_device *v = &mrt->vif_table[0]; + struct mr_mfc *mfc; + int vifi; + + /* Notifiy on table VIF entries */ + read_lock(mrt_lock); + for (vifi = 0; vifi < mrt->maxvif; vifi++, v++) { + if (!v->dev) + continue; + + mr_call_vif_notifier(nb, net, family, + FIB_EVENT_VIF_ADD, + v, vifi, mrt->id); + } + read_unlock(mrt_lock); + + /* Notify on table MFC entries */ + list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list) + mr_call_mfc_notifier(nb, net, family, + FIB_EVENT_ENTRY_ADD, + mfc, mrt->id); + } + + return 0; +} +EXPORT_SYMBOL(mr_dump); diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index aaafdbd15ad3..2dc83de53f94 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -1628,7 +1628,6 @@ static void __net_exit arp_tables_net_exit(struct net *net) static struct pernet_operations arp_tables_net_ops = { .init = arp_tables_net_init, .exit = arp_tables_net_exit, - .async = true, }; static int __init arp_tables_init(void) diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index f9063513f9d1..44b308d93ec2 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -1909,7 +1909,6 @@ static void __net_exit ip_tables_net_exit(struct net *net) static struct pernet_operations ip_tables_net_ops = { .init = ip_tables_net_init, .exit = ip_tables_net_exit, - .async = true, }; static int __init ip_tables_init(void) diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index 08b3e48f44fc..2c8d313ae216 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -232,7 +232,6 @@ clusterip_config_init(struct net *net, const struct ipt_clusterip_tgt_info *i, c->hash_mode = i->hash_mode; c->hash_initval = i->hash_initval; refcount_set(&c->refcount, 1); - refcount_set(&c->entries, 1); spin_lock_bh(&cn->lock); if (__clusterip_config_find(net, ip)) { @@ -251,7 +250,7 @@ clusterip_config_init(struct net *net, const struct ipt_clusterip_tgt_info *i, /* create proc dir entry */ sprintf(buffer, "%pI4", &ip); - c->pde = proc_create_data(buffer, S_IWUSR|S_IRUSR, + c->pde = proc_create_data(buffer, 0600, cn->procdir, &clusterip_proc_fops, c); if (!c->pde) { @@ -263,8 +262,10 @@ clusterip_config_init(struct net *net, const struct ipt_clusterip_tgt_info *i, c->notifier.notifier_call = clusterip_netdev_event; err = register_netdevice_notifier(&c->notifier); - if (!err) + if (!err) { + refcount_set(&c->entries, 1); return c; + } #ifdef CONFIG_PROC_FS proc_remove(c->pde); @@ -273,7 +274,7 @@ err: spin_lock_bh(&cn->lock); list_del_rcu(&c->list); spin_unlock_bh(&cn->lock); - kfree(c); + clusterip_config_put(c); return ERR_PTR(err); } @@ -496,12 +497,15 @@ static int clusterip_tg_check(const struct xt_tgchk_param *par) return PTR_ERR(config); } } - cipinfo->config = config; ret = nf_ct_netns_get(par->net, par->family); - if (ret < 0) + if (ret < 0) { pr_info("cannot load conntrack support for proto=%u\n", par->family); + clusterip_config_entry_put(par->net, config); + clusterip_config_put(config); + return ret; + } if (!par->net->xt.clusterip_deprecated_warning) { pr_info("ipt_CLUSTERIP is deprecated and it will removed soon, " @@ -509,6 +513,7 @@ static int clusterip_tg_check(const struct xt_tgchk_param *par) par->net->xt.clusterip_deprecated_warning = true; } + cipinfo->config = config; return ret; } @@ -840,7 +845,6 @@ static struct pernet_operations clusterip_net_ops = { .exit = clusterip_net_exit, .id = &clusterip_net_id, .size = sizeof(struct clusterip_net), - .async = true, }; static int __init clusterip_tg_init(void) diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c index c1c136a93911..9ac92ea7b93c 100644 --- a/net/ipv4/netfilter/iptable_filter.c +++ b/net/ipv4/netfilter/iptable_filter.c @@ -87,7 +87,6 @@ static void __net_exit iptable_filter_net_exit(struct net *net) static struct pernet_operations iptable_filter_net_ops = { .init = iptable_filter_net_init, .exit = iptable_filter_net_exit, - .async = true, }; static int __init iptable_filter_init(void) diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c index 57244b62a4fc..a0d3ad60a411 100644 --- a/net/ipv4/netfilter/nf_defrag_ipv4.c +++ b/net/ipv4/netfilter/nf_defrag_ipv4.c @@ -118,7 +118,6 @@ static void __net_exit defrag4_net_exit(struct net *net) static struct pernet_operations defrag4_net_ops = { .exit = defrag4_net_exit, - .async = true, }; static int __init nf_defrag_init(void) diff --git a/net/ipv4/netfilter/nf_flow_table_ipv4.c b/net/ipv4/netfilter/nf_flow_table_ipv4.c index 25d2975da156..0cd46bffa469 100644 --- a/net/ipv4/netfilter/nf_flow_table_ipv4.c +++ b/net/ipv4/netfilter/nf_flow_table_ipv4.c @@ -111,6 +111,7 @@ static int nf_flow_dnat_ip(const struct flow_offload *flow, struct sk_buff *skb, default: return -1; } + csum_replace4(&iph->check, addr, new_addr); return nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr); } @@ -185,7 +186,7 @@ static bool __nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu) if ((ip_hdr(skb)->frag_off & htons(IP_DF)) == 0) return false; - if (skb_is_gso(skb) && skb_gso_validate_mtu(skb, mtu)) + if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu)) return false; return true; diff --git a/net/ipv4/netfilter/nf_log_arp.c b/net/ipv4/netfilter/nf_log_arp.c index 162293469ac2..df5c2a2061a4 100644 --- a/net/ipv4/netfilter/nf_log_arp.c +++ b/net/ipv4/netfilter/nf_log_arp.c @@ -122,7 +122,6 @@ static void __net_exit nf_log_arp_net_exit(struct net *net) static struct pernet_operations nf_log_arp_net_ops = { .init = nf_log_arp_net_init, .exit = nf_log_arp_net_exit, - .async = true, }; static int __init nf_log_arp_init(void) diff --git a/net/ipv4/netfilter/nf_log_ipv4.c b/net/ipv4/netfilter/nf_log_ipv4.c index 7a06de140f3c..4388de0e5380 100644 --- a/net/ipv4/netfilter/nf_log_ipv4.c +++ b/net/ipv4/netfilter/nf_log_ipv4.c @@ -358,7 +358,6 @@ static void __net_exit nf_log_ipv4_net_exit(struct net *net) static struct pernet_operations nf_log_ipv4_net_ops = { .init = nf_log_ipv4_net_init, .exit = nf_log_ipv4_net_exit, - .async = true, }; static int __init nf_log_ipv4_init(void) diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 0164def9c808..05e47d777009 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -1177,7 +1177,7 @@ static struct ping_seq_afinfo ping_v4_seq_afinfo = { int ping_proc_register(struct net *net, struct ping_seq_afinfo *afinfo) { struct proc_dir_entry *p; - p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net, + p = proc_create_data(afinfo->name, 0444, net->proc_net, afinfo->seq_fops, afinfo); if (!p) return -ENOMEM; @@ -1204,7 +1204,6 @@ static void __net_exit ping_v4_proc_exit_net(struct net *net) static struct pernet_operations ping_v4_net_ops = { .init = ping_v4_proc_init_net, .exit = ping_v4_proc_exit_net, - .async = true, }; int __init ping_proc_init(void) diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index d97e83b2dd33..adfb75340275 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -521,12 +521,12 @@ static const struct file_operations netstat_seq_fops = { static __net_init int ip_proc_init_net(struct net *net) { - if (!proc_create("sockstat", S_IRUGO, net->proc_net, + if (!proc_create("sockstat", 0444, net->proc_net, &sockstat_seq_fops)) goto out_sockstat; - if (!proc_create("netstat", S_IRUGO, net->proc_net, &netstat_seq_fops)) + if (!proc_create("netstat", 0444, net->proc_net, &netstat_seq_fops)) goto out_netstat; - if (!proc_create("snmp", S_IRUGO, net->proc_net, &snmp_seq_fops)) + if (!proc_create("snmp", 0444, net->proc_net, &snmp_seq_fops)) goto out_snmp; return 0; @@ -549,7 +549,6 @@ static __net_exit void ip_proc_exit_net(struct net *net) static __net_initdata struct pernet_operations ip_proc_ops = { .init = ip_proc_init_net, .exit = ip_proc_exit_net, - .async = true, }; int __init ip_misc_proc_init(void) diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 54648d20bf0f..1b4d3355624a 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -711,9 +711,7 @@ static void raw_close(struct sock *sk, long timeout) /* * Raw sockets may have direct kernel references. Kill them. */ - rtnl_lock(); ip_ra_control(sk, 0, NULL); - rtnl_unlock(); sk_common_release(sk); } @@ -1142,7 +1140,7 @@ static const struct file_operations raw_seq_fops = { static __net_init int raw_init_net(struct net *net) { - if (!proc_create("raw", S_IRUGO, net->proc_net, &raw_seq_fops)) + if (!proc_create("raw", 0444, net->proc_net, &raw_seq_fops)) return -ENOMEM; return 0; @@ -1156,7 +1154,6 @@ static __net_exit void raw_exit_net(struct net *net) static __net_initdata struct pernet_operations raw_net_ops = { .init = raw_init_net, .exit = raw_exit_net, - .async = true, }; int __init raw_proc_init(void) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 6a7b3cba3972..8322e479f299 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -128,10 +128,11 @@ static int ip_rt_redirect_silence __read_mostly = ((HZ / 50) << (9 + 1)); static int ip_rt_error_cost __read_mostly = HZ; static int ip_rt_error_burst __read_mostly = 5 * HZ; static int ip_rt_mtu_expires __read_mostly = 10 * 60 * HZ; -static int ip_rt_min_pmtu __read_mostly = 512 + 20 + 20; +static u32 ip_rt_min_pmtu __read_mostly = 512 + 20 + 20; static int ip_rt_min_advmss __read_mostly = 256; static int ip_rt_gc_timeout __read_mostly = RT_GC_TIMEOUT; + /* * Interface to generic destination cache. */ @@ -378,12 +379,12 @@ static int __net_init ip_rt_do_proc_init(struct net *net) { struct proc_dir_entry *pde; - pde = proc_create("rt_cache", S_IRUGO, net->proc_net, + pde = proc_create("rt_cache", 0444, net->proc_net, &rt_cache_seq_fops); if (!pde) goto err1; - pde = proc_create("rt_cache", S_IRUGO, + pde = proc_create("rt_cache", 0444, net->proc_net_stat, &rt_cpu_seq_fops); if (!pde) goto err2; @@ -417,7 +418,6 @@ static void __net_exit ip_rt_do_proc_exit(struct net *net) static struct pernet_operations ip_rt_proc_ops __net_initdata = { .init = ip_rt_do_proc_init, .exit = ip_rt_do_proc_exit, - .async = true, }; static int __init ip_rt_proc_init(void) @@ -634,6 +634,7 @@ static inline u32 fnhe_hashfun(__be32 daddr) static void fill_route_from_fnhe(struct rtable *rt, struct fib_nh_exception *fnhe) { rt->rt_pmtu = fnhe->fnhe_pmtu; + rt->rt_mtu_locked = fnhe->fnhe_mtu_locked; rt->dst.expires = fnhe->fnhe_expires; if (fnhe->fnhe_gw) { @@ -644,7 +645,7 @@ static void fill_route_from_fnhe(struct rtable *rt, struct fib_nh_exception *fnh } static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw, - u32 pmtu, unsigned long expires) + u32 pmtu, bool lock, unsigned long expires) { struct fnhe_hash_bucket *hash; struct fib_nh_exception *fnhe; @@ -681,8 +682,10 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw, fnhe->fnhe_genid = genid; if (gw) fnhe->fnhe_gw = gw; - if (pmtu) + if (pmtu) { fnhe->fnhe_pmtu = pmtu; + fnhe->fnhe_mtu_locked = lock; + } fnhe->fnhe_expires = max(1UL, expires); /* Update all cached dsts too */ rt = rcu_dereference(fnhe->fnhe_rth_input); @@ -706,6 +709,7 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw, fnhe->fnhe_daddr = daddr; fnhe->fnhe_gw = gw; fnhe->fnhe_pmtu = pmtu; + fnhe->fnhe_mtu_locked = lock; fnhe->fnhe_expires = expires; /* Exception created; mark the cached routes for the nexthop @@ -787,7 +791,8 @@ static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flow struct fib_nh *nh = &FIB_RES_NH(res); update_or_create_fnhe(nh, fl4->daddr, new_gw, - 0, jiffies + ip_rt_gc_timeout); + 0, false, + jiffies + ip_rt_gc_timeout); } if (kill_route) rt->dst.obsolete = DST_OBSOLETE_KILL; @@ -931,14 +936,23 @@ out_put_peer: static int ip_error(struct sk_buff *skb) { - struct in_device *in_dev = __in_dev_get_rcu(skb->dev); struct rtable *rt = skb_rtable(skb); + struct net_device *dev = skb->dev; + struct in_device *in_dev; struct inet_peer *peer; unsigned long now; struct net *net; bool send; int code; + if (netif_is_l3_master(skb->dev)) { + dev = __dev_get_by_index(dev_net(skb->dev), IPCB(skb)->iif); + if (!dev) + goto out; + } + + in_dev = __in_dev_get_rcu(dev); + /* IP on this device is disabled. */ if (!in_dev) goto out; @@ -1000,15 +1014,18 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu) { struct dst_entry *dst = &rt->dst; struct fib_result res; + bool lock = false; - if (dst_metric_locked(dst, RTAX_MTU)) + if (ip_mtu_locked(dst)) return; if (ipv4_mtu(dst) < mtu) return; - if (mtu < ip_rt_min_pmtu) + if (mtu < ip_rt_min_pmtu) { + lock = true; mtu = ip_rt_min_pmtu; + } if (rt->rt_pmtu == mtu && time_before(jiffies, dst->expires - ip_rt_mtu_expires / 2)) @@ -1018,7 +1035,7 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu) if (fib_lookup(dev_net(dst->dev), fl4, &res, 0) == 0) { struct fib_nh *nh = &FIB_RES_NH(res); - update_or_create_fnhe(nh, fl4->daddr, 0, mtu, + update_or_create_fnhe(nh, fl4->daddr, 0, mtu, lock, jiffies + ip_rt_mtu_expires); } rcu_read_unlock(); @@ -1271,7 +1288,7 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst) mtu = READ_ONCE(dst->dev->mtu); - if (unlikely(dst_metric_locked(dst, RTAX_MTU))) { + if (unlikely(ip_mtu_locked(dst))) { if (rt->rt_uses_gateway && mtu > 576) mtu = 576; } @@ -1384,7 +1401,7 @@ struct uncached_list { static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt_uncached_list); -static void rt_add_uncached_list(struct rtable *rt) +void rt_add_uncached_list(struct rtable *rt) { struct uncached_list *ul = raw_cpu_ptr(&rt_uncached_list); @@ -1395,14 +1412,8 @@ static void rt_add_uncached_list(struct rtable *rt) spin_unlock_bh(&ul->lock); } -static void ipv4_dst_destroy(struct dst_entry *dst) +void rt_del_uncached_list(struct rtable *rt) { - struct dst_metrics *p = (struct dst_metrics *)DST_METRICS_PTR(dst); - struct rtable *rt = (struct rtable *) dst; - - if (p != &dst_default_metrics && refcount_dec_and_test(&p->refcnt)) - kfree(p); - if (!list_empty(&rt->rt_uncached)) { struct uncached_list *ul = rt->rt_uncached_list; @@ -1412,6 +1423,17 @@ static void ipv4_dst_destroy(struct dst_entry *dst) } } +static void ipv4_dst_destroy(struct dst_entry *dst) +{ + struct dst_metrics *p = (struct dst_metrics *)DST_METRICS_PTR(dst); + struct rtable *rt = (struct rtable *)dst; + + if (p != &dst_default_metrics && refcount_dec_and_test(&p->refcnt)) + kfree(p); + + rt_del_uncached_list(rt); +} + void rt_flush_dev(struct net_device *dev) { struct net *net = dev_net(dev); @@ -1507,6 +1529,7 @@ struct rtable *rt_dst_alloc(struct net_device *dev, rt->rt_is_input = 0; rt->rt_iif = 0; rt->rt_pmtu = 0; + rt->rt_mtu_locked = 0; rt->rt_gateway = 0; rt->rt_uses_gateway = 0; INIT_LIST_HEAD(&rt->rt_uncached); @@ -2523,6 +2546,7 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or rt->rt_is_input = ort->rt_is_input; rt->rt_iif = ort->rt_iif; rt->rt_pmtu = ort->rt_pmtu; + rt->rt_mtu_locked = ort->rt_mtu_locked; rt->rt_genid = rt_genid_ipv4(net); rt->rt_flags = ort->rt_flags; @@ -2625,6 +2649,8 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src, u32 table_id, memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics)); if (rt->rt_pmtu && expires) metrics[RTAX_MTU - 1] = rt->rt_pmtu; + if (rt->rt_mtu_locked && expires) + metrics[RTAX_LOCK - 1] |= BIT(RTAX_MTU); if (rtnetlink_put_metrics(skb, metrics) < 0) goto nla_put_failure; @@ -2810,6 +2836,7 @@ void ip_rt_multicast_event(struct in_device *in_dev) static int ip_rt_gc_interval __read_mostly = 60 * HZ; static int ip_rt_gc_min_interval __read_mostly = HZ / 2; static int ip_rt_gc_elasticity __read_mostly = 8; +static int ip_min_valid_pmtu __read_mostly = IPV4_MIN_MTU; static int ipv4_sysctl_rtcache_flush(struct ctl_table *__ctl, int write, void __user *buffer, @@ -2925,7 +2952,8 @@ static struct ctl_table ipv4_route_table[] = { .data = &ip_rt_min_pmtu, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_dointvec_minmax, + .extra1 = &ip_min_valid_pmtu, }, { .procname = "min_adv_mss", @@ -2988,7 +3016,6 @@ static __net_exit void sysctl_route_net_exit(struct net *net) static __net_initdata struct pernet_operations sysctl_route_ops = { .init = sysctl_route_net_init, .exit = sysctl_route_net_exit, - .async = true, }; #endif @@ -3002,7 +3029,6 @@ static __net_init int rt_genid_init(struct net *net) static __net_initdata struct pernet_operations rt_genid_ops = { .init = rt_genid_init, - .async = true, }; static int __net_init ipv4_inetpeer_init(struct net *net) @@ -3028,7 +3054,6 @@ static void __net_exit ipv4_inetpeer_exit(struct net *net) static __net_initdata struct pernet_operations ipv4_inetpeer_ops = { .init = ipv4_inetpeer_init, .exit = ipv4_inetpeer_exit, - .async = true, }; #ifdef CONFIG_IP_ROUTE_CLASSID diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 011de9a20ec6..4b195bac8ac0 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -520,22 +520,6 @@ static struct ctl_table ipv4_table[] = { .mode = 0644, .proc_handler = proc_doulongvec_minmax, }, - { - .procname = "udp_rmem_min", - .data = &sysctl_udp_rmem_min, - .maxlen = sizeof(sysctl_udp_rmem_min), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &one - }, - { - .procname = "udp_wmem_min", - .data = &sysctl_udp_wmem_min, - .maxlen = sizeof(sysctl_udp_wmem_min), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &one - }, { } }; @@ -1167,6 +1151,22 @@ static struct ctl_table ipv4_net_table[] = { .proc_handler = proc_dointvec_minmax, .extra1 = &one, }, + { + .procname = "udp_rmem_min", + .data = &init_net.ipv4.sysctl_udp_rmem_min, + .maxlen = sizeof(init_net.ipv4.sysctl_udp_rmem_min), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &one + }, + { + .procname = "udp_wmem_min", + .data = &init_net.ipv4.sysctl_udp_wmem_min, + .maxlen = sizeof(init_net.ipv4.sysctl_udp_wmem_min), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &one + }, { } }; @@ -1219,7 +1219,6 @@ static __net_exit void ipv4_sysctl_exit_net(struct net *net) static __net_initdata struct pernet_operations ipv4_sysctl_ops = { .init = ipv4_sysctl_init_net, .exit = ipv4_sysctl_exit_net, - .async = true, }; static __init int sysctl_ipv4_init(void) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index fb350f740f69..0c31be306572 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -994,7 +994,9 @@ new_segment: get_page(page); skb_fill_page_desc(skb, i, page, offset, copy); } - skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG; + + if (!(flags & MSG_NO_SHARED_FRAGS)) + skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG; skb->len += copy; skb->data_len += copy; @@ -3031,7 +3033,7 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk) u32 rate; stats = alloc_skb(7 * nla_total_size_64bit(sizeof(u64)) + - 4 * nla_total_size(sizeof(u32)) + + 5 * nla_total_size(sizeof(u32)) + 3 * nla_total_size(sizeof(u8)), GFP_ATOMIC); if (!stats) return NULL; @@ -3061,6 +3063,7 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk) nla_put_u8(stats, TCP_NLA_RECUR_RETRANS, inet_csk(sk)->icsk_retransmits); nla_put_u8(stats, TCP_NLA_DELIVERY_RATE_APP_LMT, !!tp->rate_app_limited); + nla_put_u32(stats, TCP_NLA_SND_SSTHRESH, tp->snd_ssthresh); nla_put_u32(stats, TCP_NLA_SNDQ_SIZE, tp->write_seq - tp->snd_una); nla_put_u8(stats, TCP_NLA_CA_STATE, inet_csk(sk)->icsk_ca_state); @@ -3542,6 +3545,7 @@ int tcp_abort(struct sock *sk, int err) bh_unlock_sock(sk); local_bh_enable(); + tcp_write_queue_purge(sk); release_sock(sk); return 0; } diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c index c92014cb1e16..158d105e76da 100644 --- a/net/ipv4/tcp_bbr.c +++ b/net/ipv4/tcp_bbr.c @@ -731,6 +731,8 @@ static void bbr_check_drain(struct sock *sk, const struct rate_sample *rs) bbr->mode = BBR_DRAIN; /* drain queue we created */ bbr->pacing_gain = bbr_drain_gain; /* pace slow to drain */ bbr->cwnd_gain = bbr_high_gain; /* maintain cwnd */ + tcp_sk(sk)->snd_ssthresh = + bbr_target_cwnd(sk, bbr_max_bw(sk), BBR_UNIT); } /* fall through to check if in-flight is already small: */ if (bbr->mode == BBR_DRAIN && tcp_packets_in_flight(tcp_sk(sk)) <= @@ -834,6 +836,7 @@ static void bbr_init(struct sock *sk) struct bbr *bbr = inet_csk_ca(sk); bbr->prior_cwnd = 0; + tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; bbr->rtt_cnt = 0; bbr->next_rtt_delivered = 0; bbr->prev_ca_state = TCP_CA_Open; @@ -886,7 +889,7 @@ static u32 bbr_undo_cwnd(struct sock *sk) static u32 bbr_ssthresh(struct sock *sk) { bbr_save_cwnd(sk); - return TCP_INFINITE_SSTHRESH; /* BBR does not use ssthresh */ + return tcp_sk(sk)->snd_ssthresh; } static size_t bbr_get_info(struct sock *sk, u32 ext, int *attr, diff --git a/net/ipv4/tcp_illinois.c b/net/ipv4/tcp_illinois.c index 7c843578f233..faddf4f9a707 100644 --- a/net/ipv4/tcp_illinois.c +++ b/net/ipv4/tcp_illinois.c @@ -6,7 +6,7 @@ * The algorithm is described in: * "TCP-Illinois: A Loss and Delay-Based Congestion Control Algorithm * for High-Speed Networks" - * http://www.ifp.illinois.edu/~srikant/Papers/liubassri06perf.pdf + * http://tamerbasar.csl.illinois.edu/LiuBasarSrikantPerfEvalArtJun2008.pdf * * Implemented from description in paper and ns-2 simulation. * Copyright (C) 2007 Stephen Hemminger <shemminger@linux-foundation.org> diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 06b9c4765f42..451ef3012636 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1968,11 +1968,6 @@ void tcp_enter_loss(struct sock *sk) /* F-RTO RFC5682 sec 3.1 step 1: retransmit SND.UNA if no previous * loss recovery is underway except recurring timeout(s) on * the same SND.UNA (sec 3.2). Disable F-RTO on path MTU probing - * - * In theory F-RTO can be used repeatedly during loss recovery. - * In practice this interacts badly with broken middle-boxes that - * falsely raise the receive window, which results in repeated - * timeouts and stop-and-go behavior. */ tp->frto = net->ipv4.sysctl_tcp_frto && (new_recovery || icsk->icsk_retransmits) && @@ -2628,18 +2623,14 @@ static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack, tcp_try_undo_loss(sk, false)) return; - /* The ACK (s)acks some never-retransmitted data meaning not all - * the data packets before the timeout were lost. Therefore we - * undo the congestion window and state. This is essentially - * the operation in F-RTO (RFC5682 section 3.1 step 3.b). Since - * a retransmitted skb is permantly marked, we can apply such an - * operation even if F-RTO was not used. - */ - if ((flag & FLAG_ORIG_SACK_ACKED) && - tcp_try_undo_loss(sk, tp->undo_marker)) - return; - if (tp->frto) { /* F-RTO RFC5682 sec 3.1 (sack enhanced version). */ + /* Step 3.b. A timeout is spurious if not all data are + * lost, i.e., never-retransmitted data are (s)acked. + */ + if ((flag & FLAG_ORIG_SACK_ACKED) && + tcp_try_undo_loss(sk, true)) + return; + if (after(tp->snd_nxt, tp->high_seq)) { if (flag & FLAG_DATA_SACKED || is_dupack) tp->frto = 0; /* Step 3.a. loss was real */ @@ -3998,6 +3989,7 @@ void tcp_reset(struct sock *sk) /* This barrier is coupled with smp_rmb() in tcp_poll() */ smp_wmb(); + tcp_write_queue_purge(sk); tcp_done(sk); if (!sock_flag(sk, SOCK_DEAD)) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 2c6aec2643e8..9639334ebb7c 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2215,7 +2215,7 @@ int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo) afinfo->seq_ops.next = tcp_seq_next; afinfo->seq_ops.stop = tcp_seq_stop; - p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net, + p = proc_create_data(afinfo->name, 0444, net->proc_net, afinfo->seq_fops, afinfo); if (!p) rc = -ENOMEM; @@ -2391,7 +2391,6 @@ static void __net_exit tcp4_proc_exit_net(struct net *net) static struct pernet_operations tcp4_net_ops = { .init = tcp4_proc_init_net, .exit = tcp4_proc_exit_net, - .async = true, }; int __init tcp4_proc_init(void) @@ -2578,7 +2577,6 @@ static struct pernet_operations __net_initdata tcp_sk_ops = { .init = tcp_sk_init, .exit = tcp_sk_exit, .exit_batch = tcp_sk_exit_batch, - .async = true, }; void __init tcp_v4_init(void) diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index aa6fea9f3328..03b51cdcc731 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -1024,7 +1024,6 @@ static void __net_exit tcp_net_metrics_exit_batch(struct list_head *net_exit_lis static __net_initdata struct pernet_operations tcp_net_metrics_ops = { .init = tcp_net_metrics_init, .exit_batch = tcp_net_metrics_exit_batch, - .async = true, }; void __init tcp_metrics_init(void) diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 71fc60f1b326..f7d944855f8e 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -34,6 +34,7 @@ static void tcp_write_err(struct sock *sk) sk->sk_err = sk->sk_err_soft ? : ETIMEDOUT; sk->sk_error_report(sk); + tcp_write_queue_purge(sk); tcp_done(sk); __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONTIMEOUT); } diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 3013404d0935..f49e14cd3891 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -122,12 +122,6 @@ EXPORT_SYMBOL(udp_table); long sysctl_udp_mem[3] __read_mostly; EXPORT_SYMBOL(sysctl_udp_mem); -int sysctl_udp_rmem_min __read_mostly; -EXPORT_SYMBOL(sysctl_udp_rmem_min); - -int sysctl_udp_wmem_min __read_mostly; -EXPORT_SYMBOL(sysctl_udp_wmem_min); - atomic_long_t udp_memory_allocated; EXPORT_SYMBOL(udp_memory_allocated); @@ -2533,35 +2527,35 @@ int udp_abort(struct sock *sk, int err) EXPORT_SYMBOL_GPL(udp_abort); struct proto udp_prot = { - .name = "UDP", - .owner = THIS_MODULE, - .close = udp_lib_close, - .connect = ip4_datagram_connect, - .disconnect = udp_disconnect, - .ioctl = udp_ioctl, - .init = udp_init_sock, - .destroy = udp_destroy_sock, - .setsockopt = udp_setsockopt, - .getsockopt = udp_getsockopt, - .sendmsg = udp_sendmsg, - .recvmsg = udp_recvmsg, - .sendpage = udp_sendpage, - .release_cb = ip4_datagram_release_cb, - .hash = udp_lib_hash, - .unhash = udp_lib_unhash, - .rehash = udp_v4_rehash, - .get_port = udp_v4_get_port, - .memory_allocated = &udp_memory_allocated, - .sysctl_mem = sysctl_udp_mem, - .sysctl_wmem = &sysctl_udp_wmem_min, - .sysctl_rmem = &sysctl_udp_rmem_min, - .obj_size = sizeof(struct udp_sock), - .h.udp_table = &udp_table, + .name = "UDP", + .owner = THIS_MODULE, + .close = udp_lib_close, + .connect = ip4_datagram_connect, + .disconnect = udp_disconnect, + .ioctl = udp_ioctl, + .init = udp_init_sock, + .destroy = udp_destroy_sock, + .setsockopt = udp_setsockopt, + .getsockopt = udp_getsockopt, + .sendmsg = udp_sendmsg, + .recvmsg = udp_recvmsg, + .sendpage = udp_sendpage, + .release_cb = ip4_datagram_release_cb, + .hash = udp_lib_hash, + .unhash = udp_lib_unhash, + .rehash = udp_v4_rehash, + .get_port = udp_v4_get_port, + .memory_allocated = &udp_memory_allocated, + .sysctl_mem = sysctl_udp_mem, + .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_udp_wmem_min), + .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_udp_rmem_min), + .obj_size = sizeof(struct udp_sock), + .h.udp_table = &udp_table, #ifdef CONFIG_COMPAT - .compat_setsockopt = compat_udp_setsockopt, - .compat_getsockopt = compat_udp_getsockopt, + .compat_setsockopt = compat_udp_setsockopt, + .compat_getsockopt = compat_udp_getsockopt, #endif - .diag_destroy = udp_abort, + .diag_destroy = udp_abort, }; EXPORT_SYMBOL(udp_prot); @@ -2679,7 +2673,7 @@ int udp_proc_register(struct net *net, struct udp_seq_afinfo *afinfo) afinfo->seq_ops.next = udp_seq_next; afinfo->seq_ops.stop = udp_seq_stop; - p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net, + p = proc_create_data(afinfo->name, 0444, net->proc_net, afinfo->seq_fops, afinfo); if (!p) rc = -ENOMEM; @@ -2762,7 +2756,6 @@ static void __net_exit udp4_proc_exit_net(struct net *net) static struct pernet_operations udp4_net_ops = { .init = udp4_proc_init_net, .exit = udp4_proc_exit_net, - .async = true, }; int __init udp4_proc_init(void) @@ -2831,6 +2824,26 @@ u32 udp_flow_hashrnd(void) } EXPORT_SYMBOL(udp_flow_hashrnd); +static void __udp_sysctl_init(struct net *net) +{ + net->ipv4.sysctl_udp_rmem_min = SK_MEM_QUANTUM; + net->ipv4.sysctl_udp_wmem_min = SK_MEM_QUANTUM; + +#ifdef CONFIG_NET_L3_MASTER_DEV + net->ipv4.sysctl_udp_l3mdev_accept = 0; +#endif +} + +static int __net_init udp_sysctl_init(struct net *net) +{ + __udp_sysctl_init(net); + return 0; +} + +static struct pernet_operations __net_initdata udp_sysctl_ops = { + .init = udp_sysctl_init, +}; + void __init udp_init(void) { unsigned long limit; @@ -2843,8 +2856,7 @@ void __init udp_init(void) sysctl_udp_mem[1] = limit; sysctl_udp_mem[2] = sysctl_udp_mem[0] * 2; - sysctl_udp_rmem_min = SK_MEM_QUANTUM; - sysctl_udp_wmem_min = SK_MEM_QUANTUM; + __udp_sysctl_init(&init_net); /* 16 spinlocks per cpu */ udp_busylocks_log = ilog2(nr_cpu_ids) + 4; @@ -2854,4 +2866,7 @@ void __init udp_init(void) panic("UDP: failed to alloc udp_busylocks\n"); for (i = 0; i < (1U << udp_busylocks_log); i++) spin_lock_init(udp_busylocks + i); + + if (register_pernet_subsys(&udp_sysctl_ops)) + panic("UDP: failed to init sysctl parameters.\n"); } diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c index 72f2c3806408..f96614e9b9a5 100644 --- a/net/ipv4/udplite.c +++ b/net/ipv4/udplite.c @@ -104,7 +104,6 @@ static void __net_exit udplite4_proc_exit_net(struct net *net) static struct pernet_operations udplite4_net_ops = { .init = udplite4_proc_init_net, .exit = udplite4_proc_exit_net, - .async = true, }; static __init int udplite4_proc_init(void) diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c index 63faeee989a9..2a9764bd1719 100644 --- a/net/ipv4/xfrm4_mode_tunnel.c +++ b/net/ipv4/xfrm4_mode_tunnel.c @@ -92,7 +92,8 @@ static int xfrm4_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb) skb_reset_network_header(skb); skb_mac_header_rebuild(skb); - eth_hdr(skb)->h_proto = skb->protocol; + if (skb->mac_len) + eth_hdr(skb)->h_proto = skb->protocol; err = 0; diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c index 94b8702603bc..be980c195fc5 100644 --- a/net/ipv4/xfrm4_output.c +++ b/net/ipv4/xfrm4_output.c @@ -30,7 +30,8 @@ static int xfrm4_tunnel_check_size(struct sk_buff *skb) mtu = dst_mtu(skb_dst(skb)); if ((!skb_is_gso(skb) && skb->len > mtu) || - (skb_is_gso(skb) && skb_gso_network_seglen(skb) > ip_skb_dst_mtu(skb->sk, skb))) { + (skb_is_gso(skb) && + !skb_gso_validate_network_len(skb, ip_skb_dst_mtu(skb->sk, skb)))) { skb->protocol = htons(ETH_P_IP); if (skb->sk) diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 0c752dc3f93b..d73a6d6652f6 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -100,7 +100,9 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, xdst->u.rt.rt_gateway = rt->rt_gateway; xdst->u.rt.rt_uses_gateway = rt->rt_uses_gateway; xdst->u.rt.rt_pmtu = rt->rt_pmtu; + xdst->u.rt.rt_mtu_locked = rt->rt_mtu_locked; INIT_LIST_HEAD(&xdst->u.rt.rt_uncached); + rt_add_uncached_list(&xdst->u.rt); return 0; } @@ -240,7 +242,8 @@ static void xfrm4_dst_destroy(struct dst_entry *dst) struct xfrm_dst *xdst = (struct xfrm_dst *)dst; dst_destroy_metrics_generic(dst); - + if (xdst->u.rt.rt_uncached_list) + rt_del_uncached_list(&xdst->u.rt); xfrm_dst_destroy(xdst); } @@ -364,7 +367,6 @@ static void __net_exit xfrm4_net_exit(struct net *net) static struct pernet_operations __net_initdata xfrm4_net_ops = { .init = xfrm4_net_init, .exit = xfrm4_net_exit, - .async = true, }; static void __init xfrm4_policy_init(void) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index b5fd116c046a..78cef00c9596 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -94,15 +94,6 @@ #include <linux/seq_file.h> #include <linux/export.h> -/* Set to 3 to get tracing... */ -#define ACONF_DEBUG 2 - -#if ACONF_DEBUG >= 3 -#define ADBG(fmt, ...) printk(fmt, ##__VA_ARGS__) -#else -#define ADBG(fmt, ...) do { if (0) printk(fmt, ##__VA_ARGS__); } while (0) -#endif - #define INFINITY_LIFE_TIME 0xFFFFFFFF #define IPV6_MAX_STRLEN \ @@ -409,9 +400,8 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev) dev_hold(dev); if (snmp6_alloc_dev(ndev) < 0) { - ADBG(KERN_WARNING - "%s: cannot allocate memory for statistics; dev=%s.\n", - __func__, dev->name); + netdev_dbg(dev, "%s: cannot allocate memory for statistics\n", + __func__); neigh_parms_release(&nd_tbl, ndev->nd_parms); dev_put(dev); kfree(ndev); @@ -419,9 +409,8 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev) } if (snmp6_register_dev(ndev) < 0) { - ADBG(KERN_WARNING - "%s: cannot create /proc/net/dev_snmp6/%s\n", - __func__, dev->name); + netdev_dbg(dev, "%s: cannot create /proc/net/dev_snmp6/%s\n", + __func__, dev->name); goto err_release; } @@ -984,7 +973,7 @@ static int ipv6_add_addr_hash(struct net_device *dev, struct inet6_ifaddr *ifa) /* Ignore adding duplicate addresses on an interface */ if (ipv6_chk_same_addr(dev_net(dev), &ifa->addr, dev, hash)) { - ADBG("ipv6_add_addr: already assigned\n"); + netdev_dbg(dev, "ipv6_add_addr: already assigned\n"); err = -EEXIST; } else { hlist_add_head_rcu(&ifa->addr_lst, &inet6_addr_lst[hash]); @@ -1044,7 +1033,6 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, ifa = kzalloc(sizeof(*ifa), gfp_flags); if (!ifa) { - ADBG("ipv6_add_addr: malloc failed\n"); err = -ENOBUFS; goto out; } @@ -1851,22 +1839,42 @@ static int ipv6_count_addresses(const struct inet6_dev *idev) int ipv6_chk_addr(struct net *net, const struct in6_addr *addr, const struct net_device *dev, int strict) { - return ipv6_chk_addr_and_flags(net, addr, dev, strict, IFA_F_TENTATIVE); + return ipv6_chk_addr_and_flags(net, addr, dev, !dev, + strict, IFA_F_TENTATIVE); } EXPORT_SYMBOL(ipv6_chk_addr); +/* device argument is used to find the L3 domain of interest. If + * skip_dev_check is set, then the ifp device is not checked against + * the passed in dev argument. So the 2 cases for addresses checks are: + * 1. does the address exist in the L3 domain that dev is part of + * (skip_dev_check = true), or + * + * 2. does the address exist on the specific device + * (skip_dev_check = false) + */ int ipv6_chk_addr_and_flags(struct net *net, const struct in6_addr *addr, - const struct net_device *dev, int strict, - u32 banned_flags) + const struct net_device *dev, bool skip_dev_check, + int strict, u32 banned_flags) { unsigned int hash = inet6_addr_hash(net, addr); + const struct net_device *l3mdev; struct inet6_ifaddr *ifp; u32 ifp_flags; rcu_read_lock(); + + l3mdev = l3mdev_master_dev_rcu(dev); + if (skip_dev_check) + dev = NULL; + hlist_for_each_entry_rcu(ifp, &inet6_addr_lst[hash], addr_lst) { if (!net_eq(dev_net(ifp->idev->dev), net)) continue; + + if (l3mdev_master_dev_rcu(ifp->idev->dev) != l3mdev) + continue; + /* Decouple optimistic from tentative for evaluation here. * Ban optimistic addresses explicitly, when required. */ @@ -2598,7 +2606,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao) pinfo = (struct prefix_info *) opt; if (len < sizeof(struct prefix_info)) { - ADBG("addrconf: prefix option too short\n"); + netdev_dbg(dev, "addrconf: prefix option too short\n"); return; } @@ -4261,7 +4269,7 @@ static const struct file_operations if6_fops = { static int __net_init if6_proc_net_init(struct net *net) { - if (!proc_create("if_inet6", S_IRUGO, net->proc_net, &if6_fops)) + if (!proc_create("if_inet6", 0444, net->proc_net, &if6_fops)) return -ENOMEM; return 0; } @@ -4274,7 +4282,6 @@ static void __net_exit if6_proc_net_exit(struct net *net) static struct pernet_operations if6_proc_net_ops = { .init = if6_proc_net_init, .exit = if6_proc_net_exit, - .async = true, }; int __init if6_proc_init(void) @@ -4426,8 +4433,8 @@ restart: if (time_before(next_sched, jiffies + ADDRCONF_TIMER_FUZZ_MAX)) next_sched = jiffies + ADDRCONF_TIMER_FUZZ_MAX; - ADBG(KERN_DEBUG "now = %lu, schedule = %lu, rounded schedule = %lu => %lu\n", - now, next, next_sec, next_sched); + pr_debug("now = %lu, schedule = %lu, rounded schedule = %lu => %lu\n", + now, next, next_sec, next_sched); mod_delayed_work(addrconf_wq, &addr_chk_work, next_sched - now); rcu_read_unlock_bh(); } @@ -6584,7 +6591,6 @@ static void __net_exit addrconf_exit_net(struct net *net) static struct pernet_operations addrconf_ops = { .init = addrconf_init_net, .exit = addrconf_exit_net, - .async = true, }; static struct rtnl_af_ops inet6_ops __read_mostly = { diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c index ba2e63633370..1d6ced37ad71 100644 --- a/net/ipv6/addrlabel.c +++ b/net/ipv6/addrlabel.c @@ -344,7 +344,6 @@ static void __net_exit ip6addrlbl_net_exit(struct net *net) static struct pernet_operations ipv6_addr_label_ops = { .init = ip6addrlbl_net_init, .exit = ip6addrlbl_net_exit, - .async = true, }; int __init ipv6_addr_label_init(void) diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index dbbe04018813..c1e292db04db 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -857,7 +857,6 @@ static void __net_exit inet6_net_exit(struct net *net) static struct pernet_operations inet6_net_ops = { .init = inet6_net_init, .exit = inet6_net_exit, - .async = true, }; static const struct ipv6_stub ipv6_stub_impl = { diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c index c61718dba2e6..bbcabbba9bd8 100644 --- a/net/ipv6/anycast.c +++ b/net/ipv6/anycast.c @@ -66,7 +66,11 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr) return -EPERM; if (ipv6_addr_is_multicast(addr)) return -EINVAL; - if (ipv6_chk_addr(net, addr, NULL, 0)) + + if (ifindex) + dev = __dev_get_by_index(net, ifindex); + + if (ipv6_chk_addr_and_flags(net, addr, dev, true, 0, IFA_F_TENTATIVE)) return -EINVAL; pac = sock_kmalloc(sk, sizeof(struct ipv6_ac_socklist), GFP_KERNEL); @@ -90,8 +94,7 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr) dev = __dev_get_by_flags(net, IFF_UP, IFF_UP | IFF_LOOPBACK); } - } else - dev = __dev_get_by_index(net, ifindex); + } if (!dev) { err = -ENODEV; @@ -541,7 +544,7 @@ static const struct file_operations ac6_seq_fops = { int __net_init ac6_proc_init(struct net *net) { - if (!proc_create("anycast6", S_IRUGO, net->proc_net, &ac6_seq_fops)) + if (!proc_create("anycast6", 0444, net->proc_net, &ac6_seq_fops)) return -ENOMEM; return 0; diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index fbf08ce3f5ab..88bc2ef7c7a8 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -146,10 +146,12 @@ int __ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr; struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); - struct in6_addr *daddr; + struct in6_addr *daddr, old_daddr; + __be32 fl6_flowlabel = 0; + __be32 old_fl6_flowlabel; + __be16 old_dport; int addr_type; int err; - __be32 fl6_flowlabel = 0; if (usin->sin6_family == AF_INET) { if (__ipv6_only_sock(sk)) @@ -238,9 +240,13 @@ ipv4_connected: } } + /* save the current peer information before updating it */ + old_daddr = sk->sk_v6_daddr; + old_fl6_flowlabel = np->flow_label; + old_dport = inet->inet_dport; + sk->sk_v6_daddr = *daddr; np->flow_label = fl6_flowlabel; - inet->inet_dport = usin->sin6_port; /* @@ -250,11 +256,12 @@ ipv4_connected: err = ip6_datagram_dst_update(sk, true); if (err) { - /* Reset daddr and dport so that udp_v6_early_demux() - * fails to find this socket + /* Restore the socket peer info, to keep it consistent with + * the old socket state */ - memset(&sk->sk_v6_daddr, 0, sizeof(sk->sk_v6_daddr)); - inet->inet_dport = 0; + sk->sk_v6_daddr = old_daddr; + np->flow_label = old_fl6_flowlabel; + inet->inet_dport = old_dport; goto out; } @@ -801,8 +808,9 @@ int ip6_datagram_send_ctl(struct net *net, struct sock *sk, if (addr_type != IPV6_ADDR_ANY) { int strict = __ipv6_addr_src_scope(addr_type) <= IPV6_ADDR_SCOPE_LINKLOCAL; if (!(inet_sk(sk)->freebind || inet_sk(sk)->transparent) && - !ipv6_chk_addr(net, &src_info->ipi6_addr, - strict ? dev : NULL, 0) && + !ipv6_chk_addr_and_flags(net, &src_info->ipi6_addr, + dev, !strict, 0, + IFA_F_TENTATIVE) && !ipv6_chk_acast_addr_src(net, dev, &src_info->ipi6_addr)) err = -EINVAL; diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c index 3fd1ec775dc2..27f59b61f70f 100644 --- a/net/ipv6/esp6_offload.c +++ b/net/ipv6/esp6_offload.c @@ -165,6 +165,8 @@ static struct sk_buff *esp6_gso_segment(struct sk_buff *skb, if (!(features & NETIF_F_HW_ESP) || !x->xso.offload_handle || (x->xso.dev != skb->dev)) esp_features = features & ~(NETIF_F_SG | NETIF_F_CSUM_MASK); + else if (!(features & NETIF_F_HW_ESP_TX_CSUM)) + esp_features = features & ~NETIF_F_CSUM_MASK; xo->flags |= XFRM_GSO_SEGMENT; diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index 00ef9467f3c0..df113c7b5fc8 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -397,7 +397,6 @@ static void __net_exit fib6_rules_net_exit(struct net *net) static struct pernet_operations fib6_rules_net_ops = { .init = fib6_rules_net_init, .exit = fib6_rules_net_exit, - .async = true, }; int __init fib6_rules_init(void) diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 6f84668be6ea..d8c4b6374377 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -998,7 +998,6 @@ static void __net_exit icmpv6_sk_exit(struct net *net) static struct pernet_operations icmpv6_sk_ops = { .init = icmpv6_sk_init, .exit = icmpv6_sk_exit, - .async = true, }; int __init icmpv6_init(void) diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c index e438699f000f..44c39c5f0638 100644 --- a/net/ipv6/ila/ila_xlat.c +++ b/net/ipv6/ila/ila_xlat.c @@ -613,7 +613,6 @@ static struct pernet_operations ila_net_ops = { .exit = ila_exit_net, .id = &ila_net_id, .size = sizeof(struct ila_net), - .async = true, }; static int ila_xlat_addr(struct sk_buff *skb, bool sir2ila) diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 2f995e9e3050..deab2db6692e 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -1007,12 +1007,16 @@ add: if (err) return err; + err = call_fib6_entry_notifiers(info->nl_net, + FIB_EVENT_ENTRY_ADD, + rt, extack); + if (err) + return err; + rcu_assign_pointer(rt->rt6_next, iter); atomic_inc(&rt->rt6i_ref); rcu_assign_pointer(rt->rt6i_node, fn); rcu_assign_pointer(*ins, rt); - call_fib6_entry_notifiers(info->nl_net, FIB_EVENT_ENTRY_ADD, - rt, extack); if (!info->skip_notify) inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags); info->nl_net->ipv6.rt6_stats->fib_rt_entries++; @@ -1036,12 +1040,16 @@ add: if (err) return err; + err = call_fib6_entry_notifiers(info->nl_net, + FIB_EVENT_ENTRY_REPLACE, + rt, extack); + if (err) + return err; + atomic_inc(&rt->rt6i_ref); rcu_assign_pointer(rt->rt6i_node, fn); rt->rt6_next = iter->rt6_next; rcu_assign_pointer(*ins, rt); - call_fib6_entry_notifiers(info->nl_net, FIB_EVENT_ENTRY_REPLACE, - rt, extack); if (!info->skip_notify) inet6_rt_notify(RTM_NEWROUTE, rt, info, NLM_F_REPLACE); if (!(fn->fn_flags & RTN_RTINFO)) { @@ -2161,7 +2169,6 @@ static void fib6_net_exit(struct net *net) static struct pernet_operations fib6_net_ops = { .init = fib6_net_init, .exit = fib6_net_exit, - .async = true, }; int __init fib6_init(void) diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index 6ddf52282894..c05c4e82a7ca 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -844,7 +844,7 @@ static const struct file_operations ip6fl_seq_fops = { static int __net_init ip6_flowlabel_proc_init(struct net *net) { - if (!proc_create("ip6_flowlabel", S_IRUGO, net->proc_net, + if (!proc_create("ip6_flowlabel", 0444, net->proc_net, &ip6fl_seq_fops)) return -ENOMEM; return 0; @@ -873,7 +873,6 @@ static void __net_exit ip6_flowlabel_net_exit(struct net *net) static struct pernet_operations ip6_flowlabel_net_ops = { .init = ip6_flowlabel_proc_init, .exit = ip6_flowlabel_net_exit, - .async = true, }; int ip6_flowlabel_init(void) diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 18a3dfbd0300..22e86557aca4 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -126,7 +126,8 @@ static struct ip6_tnl *ip6gre_tunnel_lookup(struct net_device *dev, struct ip6_tnl *t, *cand = NULL; struct ip6gre_net *ign = net_generic(net, ip6gre_net_id); int dev_type = (gre_proto == htons(ETH_P_TEB) || - gre_proto == htons(ETH_P_ERSPAN)) ? + gre_proto == htons(ETH_P_ERSPAN) || + gre_proto == htons(ETH_P_ERSPAN2)) ? ARPHRD_ETHER : ARPHRD_IP6GRE; int score, cand_score = 4; @@ -236,7 +237,7 @@ static struct ip6_tnl *ip6gre_tunnel_lookup(struct net_device *dev, return t; dev = ign->fb_tunnel_dev; - if (dev->flags & IFF_UP) + if (dev && dev->flags & IFF_UP) return netdev_priv(dev); return NULL; @@ -724,7 +725,7 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb, gre_build_header(skb, tunnel->tun_hlen, flags, protocol, tunnel_id_to_key32(tun_info->key.tun_id), - (flags | TUNNEL_SEQ) ? htonl(tunnel->o_seqno++) + (flags & TUNNEL_SEQ) ? htonl(tunnel->o_seqno++) : 0); } else { @@ -905,6 +906,9 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb, truncate = true; } + if (skb_cow_head(skb, dev->needed_headroom)) + goto tx_err; + t->parms.o_flags &= ~TUNNEL_KEY; IPCB(skb)->flags = 0; @@ -947,6 +951,8 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb, md->u.md2.dir, get_hwid(&md->u.md2), truncate, false); + } else { + goto tx_err; } } else { switch (skb->protocol) { @@ -1472,6 +1478,8 @@ static int __net_init ip6gre_init_net(struct net *net) struct ip6gre_net *ign = net_generic(net, ip6gre_net_id); int err; + if (!net_has_fallback_tunnels(net)) + return 0; ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip6_tnl), "ip6gre0", NET_NAME_UNKNOWN, ip6gre_tunnel_setup); @@ -1520,7 +1528,6 @@ static struct pernet_operations ip6gre_net_ops = { .exit_batch = ip6gre_exit_batch_net, .id = &ip6gre_net_id, .size = sizeof(struct ip6gre_net), - .async = true, }; static int ip6gre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[], diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index a6eb0e699b15..2c7f09c3c39e 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -412,7 +412,7 @@ static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu) if (skb->ignore_df) return false; - if (skb_is_gso(skb) && skb_gso_validate_mtu(skb, mtu)) + if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu)) return false; return true; diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 1124f310df5a..df4c29f7d59f 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -758,9 +758,11 @@ int ip6_tnl_rcv_ctl(struct ip6_tnl *t, ldev = dev_get_by_index_rcu(net, p->link); if ((ipv6_addr_is_multicast(laddr) || - likely(ipv6_chk_addr(net, laddr, ldev, 0))) && + likely(ipv6_chk_addr_and_flags(net, laddr, ldev, false, + 0, IFA_F_TENTATIVE))) && ((p->flags & IP6_TNL_F_ALLOW_LOCAL_REMOTE) || - likely(!ipv6_chk_addr(net, raddr, NULL, 0)))) + likely(!ipv6_chk_addr_and_flags(net, raddr, ldev, true, + 0, IFA_F_TENTATIVE)))) ret = 1; } return ret; @@ -990,12 +992,14 @@ int ip6_tnl_xmit_ctl(struct ip6_tnl *t, if (p->link) ldev = dev_get_by_index_rcu(net, p->link); - if (unlikely(!ipv6_chk_addr(net, laddr, ldev, 0))) + if (unlikely(!ipv6_chk_addr_and_flags(net, laddr, ldev, false, + 0, IFA_F_TENTATIVE))) pr_warn("%s xmit: Local address not yet configured!\n", p->name); else if (!(p->flags & IP6_TNL_F_ALLOW_LOCAL_REMOTE) && !ipv6_addr_is_multicast(raddr) && - unlikely(ipv6_chk_addr(net, raddr, NULL, 0))) + unlikely(ipv6_chk_addr_and_flags(net, raddr, ldev, + true, 0, IFA_F_TENTATIVE))) pr_warn("%s xmit: Routing loop! Remote address found on this node!\n", p->name); else @@ -1982,14 +1986,14 @@ static int ip6_tnl_newlink(struct net *src_net, struct net_device *dev, { struct net *net = dev_net(dev); struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); - struct ip6_tnl *nt, *t; struct ip_tunnel_encap ipencap; + struct ip6_tnl *nt, *t; + int err; nt = netdev_priv(dev); if (ip6_tnl_netlink_encap_parms(data, &ipencap)) { - int err = ip6_tnl_encap_setup(nt, &ipencap); - + err = ip6_tnl_encap_setup(nt, &ipencap); if (err < 0) return err; } @@ -2005,7 +2009,11 @@ static int ip6_tnl_newlink(struct net *src_net, struct net_device *dev, return -EEXIST; } - return ip6_tnl_create2(dev); + err = ip6_tnl_create2(dev); + if (!err && tb[IFLA_MTU]) + ip6_tnl_change_mtu(dev, nla_get_u32(tb[IFLA_MTU])); + + return err; } static int ip6_tnl_changelink(struct net_device *dev, struct nlattr *tb[], @@ -2201,6 +2209,8 @@ static int __net_init ip6_tnl_init_net(struct net *net) ip6n->tnls[0] = ip6n->tnls_wc; ip6n->tnls[1] = ip6n->tnls_r_l; + if (!net_has_fallback_tunnels(net)) + return 0; err = -ENOMEM; ip6n->fb_tnl_dev = alloc_netdev(sizeof(struct ip6_tnl), "ip6tnl0", NET_NAME_UNKNOWN, ip6_tnl_dev_setup); @@ -2250,7 +2260,6 @@ static struct pernet_operations ip6_tnl_net_ops = { .exit_batch = ip6_tnl_exit_batch_net, .id = &ip6_tnl_net_id, .size = sizeof(struct ip6_tnl_net), - .async = true, }; /** diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index a482b854eeea..60b771f49fb5 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -1148,7 +1148,6 @@ static struct pernet_operations vti6_net_ops = { .exit_batch = vti6_exit_batch_net, .id = &vti6_net_id, .size = sizeof(struct vti6_net), - .async = true, }; static struct xfrm6_protocol vti_esp6_protocol __read_mostly = { diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 2a38f9de45d3..298fd8b6ed17 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -258,6 +258,23 @@ static void __net_exit ip6mr_rules_exit(struct net *net) fib_rules_unregister(net->ipv6.mr6_rules_ops); rtnl_unlock(); } + +static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb) +{ + return fib_rules_dump(net, nb, RTNL_FAMILY_IP6MR); +} + +static unsigned int ip6mr_rules_seq_read(struct net *net) +{ + return fib_rules_seq_read(net, RTNL_FAMILY_IP6MR); +} + +bool ip6mr_rule_default(const struct fib_rule *rule) +{ + return fib_rule_matchall(rule) && rule->action == FR_ACT_TO_TBL && + rule->table == RT6_TABLE_DFLT && !rule->l3mdev; +} +EXPORT_SYMBOL(ip6mr_rule_default); #else #define ip6mr_for_each_table(mrt, net) \ for (mrt = net->ipv6.mrt6; mrt; mrt = NULL) @@ -295,6 +312,16 @@ static void __net_exit ip6mr_rules_exit(struct net *net) net->ipv6.mrt6 = NULL; rtnl_unlock(); } + +static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb) +{ + return 0; +} + +static unsigned int ip6mr_rules_seq_read(struct net *net) +{ + return 0; +} #endif static int ip6mr_hash_cmp(struct rhashtable_compare_arg *arg, @@ -653,10 +680,25 @@ failure: } #endif -/* - * Delete a VIF entry - */ +static int call_ip6mr_vif_entry_notifiers(struct net *net, + enum fib_event_type event_type, + struct vif_device *vif, + mifi_t vif_index, u32 tb_id) +{ + return mr_call_vif_notifiers(net, RTNL_FAMILY_IP6MR, event_type, + vif, vif_index, tb_id, + &net->ipv6.ipmr_seq); +} +static int call_ip6mr_mfc_entry_notifiers(struct net *net, + enum fib_event_type event_type, + struct mfc6_cache *mfc, u32 tb_id) +{ + return mr_call_mfc_notifiers(net, RTNL_FAMILY_IP6MR, event_type, + &mfc->_c, tb_id, &net->ipv6.ipmr_seq); +} + +/* Delete a VIF entry */ static int mif6_delete(struct mr_table *mrt, int vifi, int notify, struct list_head *head) { @@ -669,6 +711,11 @@ static int mif6_delete(struct mr_table *mrt, int vifi, int notify, v = &mrt->vif_table[vifi]; + if (VIF_EXISTS(mrt, vifi)) + call_ip6mr_vif_entry_notifiers(read_pnet(&mrt->net), + FIB_EVENT_VIF_DEL, v, vifi, + mrt->id); + write_lock_bh(&mrt_lock); dev = v->dev; v->dev = NULL; @@ -887,6 +934,8 @@ static int mif6_add(struct net *net, struct mr_table *mrt, if (vifi + 1 > mrt->maxvif) mrt->maxvif = vifi + 1; write_unlock_bh(&mrt_lock); + call_ip6mr_vif_entry_notifiers(net, FIB_EVENT_VIF_ADD, + v, vifi, mrt->id); return 0; } @@ -940,6 +989,8 @@ static struct mfc6_cache *ip6mr_cache_alloc(void) return NULL; c->_c.mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1; c->_c.mfc_un.res.minvif = MAXMIFS; + c->_c.free = ip6mr_cache_free_rcu; + refcount_set(&c->_c.mfc_un.res.refcount, 1); return c; } @@ -1175,8 +1226,10 @@ static int ip6mr_mfc_delete(struct mr_table *mrt, struct mf6cctl *mfc, rhltable_remove(&mrt->mfc_hash, &c->_c.mnode, ip6mr_rht_params); list_del_rcu(&c->_c.list); + call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net), + FIB_EVENT_ENTRY_DEL, c, mrt->id); mr6_netlink_event(mrt, c, RTM_DELROUTE); - ip6mr_cache_free(c); + mr_cache_put(&c->_c); return 0; } @@ -1203,21 +1256,63 @@ static int ip6mr_device_event(struct notifier_block *this, return NOTIFY_DONE; } +static unsigned int ip6mr_seq_read(struct net *net) +{ + ASSERT_RTNL(); + + return net->ipv6.ipmr_seq + ip6mr_rules_seq_read(net); +} + +static int ip6mr_dump(struct net *net, struct notifier_block *nb) +{ + return mr_dump(net, nb, RTNL_FAMILY_IP6MR, ip6mr_rules_dump, + ip6mr_mr_table_iter, &mrt_lock); +} + static struct notifier_block ip6_mr_notifier = { .notifier_call = ip6mr_device_event }; -/* - * Setup for IP multicast routing - */ +static const struct fib_notifier_ops ip6mr_notifier_ops_template = { + .family = RTNL_FAMILY_IP6MR, + .fib_seq_read = ip6mr_seq_read, + .fib_dump = ip6mr_dump, + .owner = THIS_MODULE, +}; + +static int __net_init ip6mr_notifier_init(struct net *net) +{ + struct fib_notifier_ops *ops; + + net->ipv6.ipmr_seq = 0; + ops = fib_notifier_ops_register(&ip6mr_notifier_ops_template, net); + if (IS_ERR(ops)) + return PTR_ERR(ops); + + net->ipv6.ip6mr_notifier_ops = ops; + + return 0; +} + +static void __net_exit ip6mr_notifier_exit(struct net *net) +{ + fib_notifier_ops_unregister(net->ipv6.ip6mr_notifier_ops); + net->ipv6.ip6mr_notifier_ops = NULL; +} + +/* Setup for IP multicast routing */ static int __net_init ip6mr_net_init(struct net *net) { int err; + err = ip6mr_notifier_init(net); + if (err) + return err; + err = ip6mr_rules_init(net); if (err < 0) - goto fail; + goto ip6mr_rules_fail; #ifdef CONFIG_PROC_FS err = -ENOMEM; @@ -1235,7 +1330,8 @@ proc_cache_fail: proc_vif_fail: ip6mr_rules_exit(net); #endif -fail: +ip6mr_rules_fail: + ip6mr_notifier_exit(net); return err; } @@ -1246,12 +1342,12 @@ static void __net_exit ip6mr_net_exit(struct net *net) remove_proc_entry("ip6_mr_vif", net->proc_net); #endif ip6mr_rules_exit(net); + ip6mr_notifier_exit(net); } static struct pernet_operations ip6mr_net_ops = { .init = ip6mr_net_init, .exit = ip6mr_net_exit, - .async = true, }; int __init ip6_mr_init(void) @@ -1337,6 +1433,8 @@ static int ip6mr_mfc_add(struct net *net, struct mr_table *mrt, if (!mrtsock) c->_c.mfc_flags |= MFC_STATIC; write_unlock_bh(&mrt_lock); + call_ip6mr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE, + c, mrt->id); mr6_netlink_event(mrt, c, RTM_NEWROUTE); return 0; } @@ -1388,6 +1486,8 @@ static int ip6mr_mfc_add(struct net *net, struct mr_table *mrt, ip6mr_cache_resolve(net, mrt, uc, c); ip6mr_cache_free(uc); } + call_ip6mr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_ADD, + c, mrt->id); mr6_netlink_event(mrt, c, RTM_NEWROUTE); return 0; } @@ -1417,13 +1517,17 @@ static void mroute_clean_tables(struct mr_table *mrt, bool all) rhltable_remove(&mrt->mfc_hash, &c->mnode, ip6mr_rht_params); list_del_rcu(&c->list); mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE); - ip6mr_cache_free((struct mfc6_cache *)c); + mr_cache_put(c); } if (atomic_read(&mrt->cache_resolve_queue_len) != 0) { spin_lock_bh(&mfc_unres_lock); list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) { list_del(&c->list); + call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net), + FIB_EVENT_ENTRY_DEL, + (struct mfc6_cache *)c, + mrt->id); mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE); ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c); @@ -1443,6 +1547,7 @@ static int ip6mr_sk_init(struct mr_table *mrt, struct sock *sk) err = -EADDRINUSE; } else { rcu_assign_pointer(mrt->mroute_sk, sk); + sock_set_flag(sk, SOCK_RCU_FREE); net->ipv6.devconf_all->mc_forwarding++; } write_unlock_bh(&mrt_lock); @@ -1472,6 +1577,10 @@ int ip6mr_sk_done(struct sock *sk) if (sk == rtnl_dereference(mrt->mroute_sk)) { write_lock_bh(&mrt_lock); RCU_INIT_POINTER(mrt->mroute_sk, NULL); + /* Note that mroute_sk had SOCK_RCU_FREE set, + * so the RCU grace period before sk freeing + * is guaranteed by sk_destruct() + */ net->ipv6.devconf_all->mc_forwarding--; write_unlock_bh(&mrt_lock); inet6_netconf_notify_devconf(net, RTM_NEWNETCONF, @@ -1485,7 +1594,6 @@ int ip6mr_sk_done(struct sock *sk) } } rtnl_unlock(); - synchronize_rcu(); return err; } diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index d1a0cefac273..793159d77d8a 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -2921,9 +2921,9 @@ static int __net_init igmp6_proc_init(struct net *net) int err; err = -ENOMEM; - if (!proc_create("igmp6", S_IRUGO, net->proc_net, &igmp6_mc_seq_fops)) + if (!proc_create("igmp6", 0444, net->proc_net, &igmp6_mc_seq_fops)) goto out; - if (!proc_create("mcfilter6", S_IRUGO, net->proc_net, + if (!proc_create("mcfilter6", 0444, net->proc_net, &igmp6_mcf_seq_fops)) goto out_proc_net_igmp6; @@ -2997,7 +2997,6 @@ static void __net_exit igmp6_net_exit(struct net *net) static struct pernet_operations igmp6_net_ops = { .init = igmp6_net_init, .exit = igmp6_net_exit, - .async = true, }; int __init igmp6_init(void) diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 0a19ce3a6f7f..9de4dfb126ba 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -527,7 +527,7 @@ void ndisc_send_na(struct net_device *dev, const struct in6_addr *daddr, } if (!dev->addr_len) - inc_opt = 0; + inc_opt = false; if (inc_opt) optlen += ndisc_opt_addr_space(dev, NDISC_NEIGHBOUR_ADVERTISEMENT); @@ -707,7 +707,7 @@ static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb) int probes = atomic_read(&neigh->probes); if (skb && ipv6_chk_addr_and_flags(dev_net(dev), &ipv6_hdr(skb)->saddr, - dev, 1, + dev, false, 1, IFA_F_TENTATIVE|IFA_F_OPTIMISTIC)) saddr = &ipv6_hdr(skb)->saddr; probes -= NEIGH_VAR(neigh->parms, UCAST_PROBES); @@ -1554,7 +1554,8 @@ static void ndisc_fill_redirect_hdr_option(struct sk_buff *skb, *(opt++) = (rd_len >> 3); opt += 6; - memcpy(opt, ipv6_hdr(orig_skb), rd_len - 8); + skb_copy_bits(orig_skb, skb_network_offset(orig_skb), opt, + rd_len - 8); } void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target) @@ -1882,7 +1883,6 @@ static void __net_exit ndisc_net_exit(struct net *net) static struct pernet_operations ndisc_net_ops = { .init = ndisc_net_init, .exit = ndisc_net_exit, - .async = true, }; int __init ndisc_init(void) diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index d95ceca7ff8f..531d6957af36 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -21,18 +21,19 @@ int ip6_route_me_harder(struct net *net, struct sk_buff *skb) { const struct ipv6hdr *iph = ipv6_hdr(skb); + struct sock *sk = sk_to_full_sk(skb->sk); unsigned int hh_len; struct dst_entry *dst; struct flowi6 fl6 = { - .flowi6_oif = skb->sk ? skb->sk->sk_bound_dev_if : 0, + .flowi6_oif = sk ? sk->sk_bound_dev_if : 0, .flowi6_mark = skb->mark, - .flowi6_uid = sock_net_uid(net, skb->sk), + .flowi6_uid = sock_net_uid(net, sk), .daddr = iph->daddr, .saddr = iph->saddr, }; int err; - dst = ip6_route_output(net, skb->sk, &fl6); + dst = ip6_route_output(net, sk, &fl6); err = dst->error; if (err) { IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); @@ -50,7 +51,7 @@ int ip6_route_me_harder(struct net *net, struct sk_buff *skb) if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) && xfrm_decode_session(skb, flowi6_to_flowi(&fl6), AF_INET6) == 0) { skb_dst_set(skb, NULL); - dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), skb->sk, 0); + dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), sk, 0); if (IS_ERR(dst)) return PTR_ERR(dst); skb_dst_set(skb, dst); diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 3c36a4c77f29..65c9e1a58305 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -1921,7 +1921,6 @@ static void __net_exit ip6_tables_net_exit(struct net *net) static struct pernet_operations ip6_tables_net_ops = { .init = ip6_tables_net_init, .exit = ip6_tables_net_exit, - .async = true, }; static int __init ip6_tables_init(void) diff --git a/net/ipv6/netfilter/ip6t_rpfilter.c b/net/ipv6/netfilter/ip6t_rpfilter.c index 910a27318f58..d12f511929f5 100644 --- a/net/ipv6/netfilter/ip6t_rpfilter.c +++ b/net/ipv6/netfilter/ip6t_rpfilter.c @@ -48,10 +48,6 @@ static bool rpfilter_lookup_reverse6(struct net *net, const struct sk_buff *skb, } fl6.flowi6_mark = flags & XT_RPFILTER_VALID_MARK ? skb->mark : 0; - if ((flags & XT_RPFILTER_LOOSE) == 0) { - fl6.flowi6_oif = dev->ifindex; - lookup_flags |= RT6_LOOKUP_F_IFACE; - } rt = (void *)ip6_route_lookup(net, &fl6, skb, lookup_flags); if (rt->dst.error) diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c index 32f98bc06900..c87b48359e8f 100644 --- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c +++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c @@ -103,7 +103,6 @@ static void __net_exit defrag6_net_exit(struct net *net) static struct pernet_operations defrag6_net_ops = { .exit = defrag6_net_exit, - .async = true, }; static int __init nf_defrag_init(void) diff --git a/net/ipv6/netfilter/nf_flow_table_ipv6.c b/net/ipv6/netfilter/nf_flow_table_ipv6.c index d346705d6ee6..207cb35569b1 100644 --- a/net/ipv6/netfilter/nf_flow_table_ipv6.c +++ b/net/ipv6/netfilter/nf_flow_table_ipv6.c @@ -178,7 +178,7 @@ static bool __nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu) if (skb->len <= mtu) return false; - if (skb_is_gso(skb) && skb_gso_validate_mtu(skb, mtu)) + if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu)) return false; return true; diff --git a/net/ipv6/netfilter/nf_log_ipv6.c b/net/ipv6/netfilter/nf_log_ipv6.c index 0220e584589c..b397a8fe88b9 100644 --- a/net/ipv6/netfilter/nf_log_ipv6.c +++ b/net/ipv6/netfilter/nf_log_ipv6.c @@ -390,7 +390,6 @@ static void __net_exit nf_log_ipv6_net_exit(struct net *net) static struct pernet_operations nf_log_ipv6_net_ops = { .init = nf_log_ipv6_net_init, .exit = nf_log_ipv6_net_exit, - .async = true, }; static int __init nf_log_ipv6_init(void) diff --git a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c index bed57ee65f7b..6b7f075f811f 100644 --- a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c @@ -99,6 +99,10 @@ static bool nf_nat_ipv6_manip_pkt(struct sk_buff *skb, !l4proto->manip_pkt(skb, &nf_nat_l3proto_ipv6, iphdroff, hdroff, target, maniptype)) return false; + + /* must reload, offset might have changed */ + ipv6h = (void *)skb->data + iphdroff; + manip_addr: if (maniptype == NF_NAT_MANIP_SRC) ipv6h->saddr = target->src.u3.in6; diff --git a/net/ipv6/netfilter/nft_fib_ipv6.c b/net/ipv6/netfilter/nft_fib_ipv6.c index 3230b3d7b11b..36be3cf0adef 100644 --- a/net/ipv6/netfilter/nft_fib_ipv6.c +++ b/net/ipv6/netfilter/nft_fib_ipv6.c @@ -180,7 +180,6 @@ void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs, } *dest = 0; - again: rt = (void *)ip6_route_lookup(nft_net(pkt), &fl6, pkt->skb, lookup_flags); if (rt->dst.error) @@ -190,15 +189,8 @@ void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs, if (rt->rt6i_flags & (RTF_REJECT | RTF_ANYCAST | RTF_LOCAL)) goto put_rt_err; - if (oif && oif != rt->rt6i_idev->dev) { - /* multipath route? Try again with F_IFACE */ - if ((lookup_flags & RT6_LOOKUP_F_IFACE) == 0) { - lookup_flags |= RT6_LOOKUP_F_IFACE; - fl6.flowi6_oif = oif->ifindex; - ip6_rt_put(rt); - goto again; - } - } + if (oif && oif != rt->rt6i_idev->dev) + goto put_rt_err; switch (priv->result) { case NFT_FIB_RESULT_OIF: diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index 318c6e914234..d12c55dad7d1 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -240,7 +240,6 @@ static void __net_init ping_v6_proc_exit_net(struct net *net) static struct pernet_operations ping_v6_net_ops = { .init = ping_v6_proc_init_net, .exit = ping_v6_proc_exit_net, - .async = true, }; #endif diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c index 1678cf037688..6e57028d2e91 100644 --- a/net/ipv6/proc.c +++ b/net/ipv6/proc.c @@ -290,7 +290,7 @@ int snmp6_register_dev(struct inet6_dev *idev) if (!net->mib.proc_net_devsnmp6) return -ENOENT; - p = proc_create_data(idev->dev->name, S_IRUGO, + p = proc_create_data(idev->dev->name, 0444, net->mib.proc_net_devsnmp6, &snmp6_dev_seq_fops, idev); if (!p) @@ -314,11 +314,11 @@ int snmp6_unregister_dev(struct inet6_dev *idev) static int __net_init ipv6_proc_init_net(struct net *net) { - if (!proc_create("sockstat6", S_IRUGO, net->proc_net, + if (!proc_create("sockstat6", 0444, net->proc_net, &sockstat6_seq_fops)) return -ENOMEM; - if (!proc_create("snmp6", S_IRUGO, net->proc_net, &snmp6_seq_fops)) + if (!proc_create("snmp6", 0444, net->proc_net, &snmp6_seq_fops)) goto proc_snmp6_fail; net->mib.proc_net_devsnmp6 = proc_mkdir("dev_snmp6", net->proc_net); @@ -343,7 +343,6 @@ static void __net_exit ipv6_proc_exit_net(struct net *net) static struct pernet_operations ipv6_proc_ops = { .init = ipv6_proc_init_net, .exit = ipv6_proc_exit_net, - .async = true, }; int __init ipv6_misc_proc_init(void) diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 10a4ac4933b7..5eb9b08947ed 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -1318,7 +1318,7 @@ static const struct file_operations raw6_seq_fops = { static int __net_init raw6_init_net(struct net *net) { - if (!proc_create("raw6", S_IRUGO, net->proc_net, &raw6_seq_fops)) + if (!proc_create("raw6", 0444, net->proc_net, &raw6_seq_fops)) return -ENOMEM; return 0; @@ -1332,7 +1332,6 @@ static void __net_exit raw6_exit_net(struct net *net) static struct pernet_operations raw6_net_ops = { .init = raw6_init_net, .exit = raw6_exit_net, - .async = true, }; int __init raw6_proc_init(void) diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index b5da69c83123..08a139f14d0f 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -650,10 +650,6 @@ static int __net_init ip6_frags_ns_sysctl_register(struct net *net) table[1].data = &net->ipv6.frags.low_thresh; table[1].extra2 = &net->ipv6.frags.high_thresh; table[2].data = &net->ipv6.frags.timeout; - - /* Don't export sysctls to unprivileged users */ - if (net->user_ns != &init_user_ns) - table[0].procname = NULL; } hdr = register_net_sysctl(net, "net/ipv6", table); @@ -733,7 +729,6 @@ static void __net_exit ipv6_frags_exit_net(struct net *net) static struct pernet_operations ip6_frags_ops = { .init = ipv6_frags_init_net, .exit = ipv6_frags_exit_net, - .async = true, }; int __init ipv6_frag_init(void) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index f0ae58424c45..ba8d5df50ebe 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -128,7 +128,7 @@ struct uncached_list { static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list); -static void rt6_uncached_list_add(struct rt6_info *rt) +void rt6_uncached_list_add(struct rt6_info *rt) { struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list); @@ -139,7 +139,7 @@ static void rt6_uncached_list_add(struct rt6_info *rt) spin_unlock_bh(&ul->lock); } -static void rt6_uncached_list_del(struct rt6_info *rt) +void rt6_uncached_list_del(struct rt6_info *rt) { if (!list_empty(&rt->rt6i_uncached)) { struct uncached_list *ul = rt->rt6i_uncached_list; @@ -1514,7 +1514,30 @@ static void rt6_exceptions_remove_prefsrc(struct rt6_info *rt) } } -static void rt6_exceptions_update_pmtu(struct rt6_info *rt, int mtu) +static bool rt6_mtu_change_route_allowed(struct inet6_dev *idev, + struct rt6_info *rt, int mtu) +{ + /* If the new MTU is lower than the route PMTU, this new MTU will be the + * lowest MTU in the path: always allow updating the route PMTU to + * reflect PMTU decreases. + * + * If the new MTU is higher, and the route PMTU is equal to the local + * MTU, this means the old MTU is the lowest in the path, so allow + * updating it: if other nodes now have lower MTUs, PMTU discovery will + * handle this. + */ + + if (dst_mtu(&rt->dst) >= mtu) + return true; + + if (dst_mtu(&rt->dst) == idev->cnf.mtu6) + return true; + + return false; +} + +static void rt6_exceptions_update_pmtu(struct inet6_dev *idev, + struct rt6_info *rt, int mtu) { struct rt6_exception_bucket *bucket; struct rt6_exception *rt6_ex; @@ -1523,20 +1546,22 @@ static void rt6_exceptions_update_pmtu(struct rt6_info *rt, int mtu) bucket = rcu_dereference_protected(rt->rt6i_exception_bucket, lockdep_is_held(&rt6_exception_lock)); - if (bucket) { - for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) { - hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) { - struct rt6_info *entry = rt6_ex->rt6i; - /* For RTF_CACHE with rt6i_pmtu == 0 - * (i.e. a redirected route), - * the metrics of its rt->dst.from has already - * been updated. - */ - if (entry->rt6i_pmtu && entry->rt6i_pmtu > mtu) - entry->rt6i_pmtu = mtu; - } - bucket++; + if (!bucket) + return; + + for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) { + hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) { + struct rt6_info *entry = rt6_ex->rt6i; + + /* For RTF_CACHE with rt6i_pmtu == 0 (i.e. a redirected + * route), the metrics of its rt->dst.from have already + * been updated. + */ + if (entry->rt6i_pmtu && + rt6_mtu_change_route_allowed(idev, entry, mtu)) + entry->rt6i_pmtu = mtu; } + bucket++; } } @@ -1846,7 +1871,7 @@ u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6, struct flow_keys hash_keys; u32 mhash; - switch (net->ipv6.sysctl.multipath_hash_policy) { + switch (ip6_multipath_hash_policy(net)) { case 0: memset(&hash_keys, 0, sizeof(hash_keys)); hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; @@ -2550,7 +2575,7 @@ static struct rt6_info *ip6_nh_lookup_table(struct net *net, static int ip6_route_check_nh_onlink(struct net *net, struct fib6_config *cfg, - struct net_device *dev, + const struct net_device *dev, struct netlink_ext_ack *extack) { u32 tbid = l3mdev_fib_table(dev) ? : RT_TABLE_MAIN; @@ -2626,6 +2651,79 @@ out: return err; } +static int ip6_validate_gw(struct net *net, struct fib6_config *cfg, + struct net_device **_dev, struct inet6_dev **idev, + struct netlink_ext_ack *extack) +{ + const struct in6_addr *gw_addr = &cfg->fc_gateway; + int gwa_type = ipv6_addr_type(gw_addr); + bool skip_dev = gwa_type & IPV6_ADDR_LINKLOCAL ? false : true; + const struct net_device *dev = *_dev; + bool need_addr_check = !dev; + int err = -EINVAL; + + /* if gw_addr is local we will fail to detect this in case + * address is still TENTATIVE (DAD in progress). rt6_lookup() + * will return already-added prefix route via interface that + * prefix route was assigned to, which might be non-loopback. + */ + if (dev && + ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) { + NL_SET_ERR_MSG(extack, "Gateway can not be a local address"); + goto out; + } + + if (gwa_type != (IPV6_ADDR_LINKLOCAL | IPV6_ADDR_UNICAST)) { + /* IPv6 strictly inhibits using not link-local + * addresses as nexthop address. + * Otherwise, router will not able to send redirects. + * It is very good, but in some (rare!) circumstances + * (SIT, PtP, NBMA NOARP links) it is handy to allow + * some exceptions. --ANK + * We allow IPv4-mapped nexthops to support RFC4798-type + * addressing + */ + if (!(gwa_type & (IPV6_ADDR_UNICAST | IPV6_ADDR_MAPPED))) { + NL_SET_ERR_MSG(extack, "Invalid gateway address"); + goto out; + } + + if (cfg->fc_flags & RTNH_F_ONLINK) + err = ip6_route_check_nh_onlink(net, cfg, dev, extack); + else + err = ip6_route_check_nh(net, cfg, _dev, idev); + + if (err) + goto out; + } + + /* reload in case device was changed */ + dev = *_dev; + + err = -EINVAL; + if (!dev) { + NL_SET_ERR_MSG(extack, "Egress device not specified"); + goto out; + } else if (dev->flags & IFF_LOOPBACK) { + NL_SET_ERR_MSG(extack, + "Egress device can not be loopback device for this route"); + goto out; + } + + /* if we did not check gw_addr above, do so now that the + * egress device has been resolved. + */ + if (need_addr_check && + ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) { + NL_SET_ERR_MSG(extack, "Gateway can not be a local address"); + goto out; + } + + err = 0; +out: + return err; +} + static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg, struct netlink_ext_ack *extack) { @@ -2808,61 +2906,11 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg, } if (cfg->fc_flags & RTF_GATEWAY) { - const struct in6_addr *gw_addr; - int gwa_type; - - gw_addr = &cfg->fc_gateway; - gwa_type = ipv6_addr_type(gw_addr); - - /* if gw_addr is local we will fail to detect this in case - * address is still TENTATIVE (DAD in progress). rt6_lookup() - * will return already-added prefix route via interface that - * prefix route was assigned to, which might be non-loopback. - */ - err = -EINVAL; - if (ipv6_chk_addr_and_flags(net, gw_addr, - gwa_type & IPV6_ADDR_LINKLOCAL ? - dev : NULL, 0, 0)) { - NL_SET_ERR_MSG(extack, "Invalid gateway address"); + err = ip6_validate_gw(net, cfg, &dev, &idev, extack); + if (err) goto out; - } - rt->rt6i_gateway = *gw_addr; - - if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) { - /* IPv6 strictly inhibits using not link-local - addresses as nexthop address. - Otherwise, router will not able to send redirects. - It is very good, but in some (rare!) circumstances - (SIT, PtP, NBMA NOARP links) it is handy to allow - some exceptions. --ANK - We allow IPv4-mapped nexthops to support RFC4798-type - addressing - */ - if (!(gwa_type & (IPV6_ADDR_UNICAST | - IPV6_ADDR_MAPPED))) { - NL_SET_ERR_MSG(extack, - "Invalid gateway address"); - goto out; - } - if (cfg->fc_flags & RTNH_F_ONLINK) { - err = ip6_route_check_nh_onlink(net, cfg, dev, - extack); - } else { - err = ip6_route_check_nh(net, cfg, &dev, &idev); - } - if (err) - goto out; - } - err = -EINVAL; - if (!dev) { - NL_SET_ERR_MSG(extack, "Egress device not specified"); - goto out; - } else if (dev->flags & IFF_LOOPBACK) { - NL_SET_ERR_MSG(extack, - "Egress device can not be loopback device for this route"); - goto out; - } + rt->rt6i_gateway = cfg->fc_gateway; } err = -ENODEV; @@ -3876,25 +3924,13 @@ static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg) Since RFC 1981 doesn't include administrative MTU increase update PMTU increase is a MUST. (i.e. jumbo frame) */ - /* - If new MTU is less than route PMTU, this new MTU will be the - lowest MTU in the path, update the route PMTU to reflect PMTU - decreases; if new MTU is greater than route PMTU, and the - old MTU is the lowest MTU in the path, update the route PMTU - to reflect the increase. In this case if the other nodes' MTU - also have the lowest MTU, TOO BIG MESSAGE will be lead to - PMTU discovery. - */ if (rt->dst.dev == arg->dev && - dst_metric_raw(&rt->dst, RTAX_MTU) && !dst_metric_locked(&rt->dst, RTAX_MTU)) { spin_lock_bh(&rt6_exception_lock); - if (dst_mtu(&rt->dst) >= arg->mtu || - (dst_mtu(&rt->dst) < arg->mtu && - dst_mtu(&rt->dst) == idev->cnf.mtu6)) { + if (dst_metric_raw(&rt->dst, RTAX_MTU) && + rt6_mtu_change_route_allowed(idev, rt, arg->mtu)) dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu); - } - rt6_exceptions_update_pmtu(rt, arg->mtu); + rt6_exceptions_update_pmtu(idev, rt, arg->mtu); spin_unlock_bh(&rt6_exception_lock); } return 0; @@ -4166,6 +4202,7 @@ static int ip6_route_multipath_add(struct fib6_config *cfg, r_cfg.fc_encap_type = nla_get_u16(nla); } + r_cfg.fc_flags |= (rtnh->rtnh_flags & RTNH_F_ONLINK); rt = ip6_route_info_create(&r_cfg, extack); if (IS_ERR(rt)) { err = PTR_ERR(rt); @@ -5030,7 +5067,7 @@ static int __net_init ip6_route_net_init_late(struct net *net) { #ifdef CONFIG_PROC_FS proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops); - proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops); + proc_create("rt6_stats", 0444, net->proc_net, &rt6_stats_seq_fops); #endif return 0; } @@ -5046,7 +5083,6 @@ static void __net_exit ip6_route_net_exit_late(struct net *net) static struct pernet_operations ip6_route_net_ops = { .init = ip6_route_net_init, .exit = ip6_route_net_exit, - .async = true, }; static int __net_init ipv6_inetpeer_init(struct net *net) @@ -5072,13 +5108,11 @@ static void __net_exit ipv6_inetpeer_exit(struct net *net) static struct pernet_operations ipv6_inetpeer_ops = { .init = ipv6_inetpeer_init, .exit = ipv6_inetpeer_exit, - .async = true, }; static struct pernet_operations ip6_route_net_late_ops = { .init = ip6_route_net_init_late, .exit = ip6_route_net_exit_late, - .async = true, }; static struct notifier_block ip6_route_dev_notifier = { diff --git a/net/ipv6/seg6.c b/net/ipv6/seg6.c index c3f13c3bd8a9..7f5621d09571 100644 --- a/net/ipv6/seg6.c +++ b/net/ipv6/seg6.c @@ -395,7 +395,6 @@ static void __net_exit seg6_net_exit(struct net *net) static struct pernet_operations ip6_segments_ops = { .init = seg6_net_init, .exit = seg6_net_exit, - .async = true, }; static const struct genl_ops seg6_genl_ops[] = { diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c index bd6cc688bd19..7a78dcfda68a 100644 --- a/net/ipv6/seg6_iptunnel.c +++ b/net/ipv6/seg6_iptunnel.c @@ -93,7 +93,8 @@ static void set_tun_src(struct net *net, struct net_device *dev, /* encapsulate an IPv6 packet within an outer IPv6 header with a given SRH */ int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto) { - struct net *net = dev_net(skb_dst(skb)->dev); + struct dst_entry *dst = skb_dst(skb); + struct net *net = dev_net(dst->dev); struct ipv6hdr *hdr, *inner_hdr; struct ipv6_sr_hdr *isrh; int hdrlen, tot_len, err; @@ -134,7 +135,7 @@ int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto) isrh->nexthdr = proto; hdr->daddr = isrh->segments[isrh->first_segment]; - set_tun_src(net, skb->dev, &hdr->daddr, &hdr->saddr); + set_tun_src(net, ip6_dst_idev(dst)->dev, &hdr->daddr, &hdr->saddr); #ifdef CONFIG_IPV6_SEG6_HMAC if (sr_has_hmac(isrh)) { @@ -418,7 +419,7 @@ static int seg6_build_state(struct nlattr *nla, slwt = seg6_lwt_lwtunnel(newts); - err = dst_cache_init(&slwt->cache, GFP_KERNEL); + err = dst_cache_init(&slwt->cache, GFP_ATOMIC); if (err) { kfree(newts); return err; diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 182db078f01e..1522bcfd253f 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -182,7 +182,7 @@ static void ipip6_tunnel_clone_6rd(struct net_device *dev, struct sit_net *sitn) #ifdef CONFIG_IPV6_SIT_6RD struct ip_tunnel *t = netdev_priv(dev); - if (dev == sitn->fb_tunnel_dev) { + if (dev == sitn->fb_tunnel_dev || !sitn->fb_tunnel_dev) { ipv6_addr_set(&t->ip6rd.prefix, htonl(0x20020000), 0, 0, 0); t->ip6rd.relay_prefix = 0; t->ip6rd.prefixlen = 16; @@ -1578,6 +1578,13 @@ static int ipip6_newlink(struct net *src_net, struct net_device *dev, if (err < 0) return err; + if (tb[IFLA_MTU]) { + u32 mtu = nla_get_u32(tb[IFLA_MTU]); + + if (mtu >= IPV6_MIN_MTU && mtu <= 0xFFF8 - dev->hard_header_len) + dev->mtu = mtu; + } + #ifdef CONFIG_IPV6_SIT_6RD if (ipip6_netlink_6rd_parms(data, &ip6rd)) err = ipip6_tunnel_update_6rd(nt, &ip6rd); @@ -1828,6 +1835,9 @@ static int __net_init sit_init_net(struct net *net) sitn->tunnels[2] = sitn->tunnels_r; sitn->tunnels[3] = sitn->tunnels_r_l; + if (!net_has_fallback_tunnels(net)) + return 0; + sitn->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "sit0", NET_NAME_UNKNOWN, ipip6_tunnel_setup); @@ -1878,7 +1888,6 @@ static struct pernet_operations sit_net_ops = { .exit_batch = sit_exit_batch_net, .id = &sit_net_id, .size = sizeof(struct sit_net), - .async = true, }; static void __exit sit_cleanup(void) diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c index 966c42af92f4..6fbdef630152 100644 --- a/net/ipv6/sysctl_net_ipv6.c +++ b/net/ipv6/sysctl_net_ipv6.c @@ -278,7 +278,6 @@ static void __net_exit ipv6_sysctl_net_exit(struct net *net) static struct pernet_operations ipv6_sysctl_net_ops = { .init = ipv6_sysctl_net_init, .exit = ipv6_sysctl_net_exit, - .async = true, }; static struct ctl_table_header *ip6_header; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 5425d7b100ee..883df0ad5bfe 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -2007,7 +2007,6 @@ static struct pernet_operations tcpv6_net_ops = { .init = tcpv6_net_init, .exit = tcpv6_net_exit, .exit_batch = tcpv6_net_exit_batch, - .async = true, }; int __init tcpv6_init(void) diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 52e3ea0e6f50..ad30f5e31969 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1509,34 +1509,34 @@ void udp6_proc_exit(struct net *net) /* ------------------------------------------------------------------------ */ struct proto udpv6_prot = { - .name = "UDPv6", - .owner = THIS_MODULE, - .close = udp_lib_close, - .connect = ip6_datagram_connect, - .disconnect = udp_disconnect, - .ioctl = udp_ioctl, - .init = udp_init_sock, - .destroy = udpv6_destroy_sock, - .setsockopt = udpv6_setsockopt, - .getsockopt = udpv6_getsockopt, - .sendmsg = udpv6_sendmsg, - .recvmsg = udpv6_recvmsg, - .release_cb = ip6_datagram_release_cb, - .hash = udp_lib_hash, - .unhash = udp_lib_unhash, - .rehash = udp_v6_rehash, - .get_port = udp_v6_get_port, - .memory_allocated = &udp_memory_allocated, - .sysctl_mem = sysctl_udp_mem, - .sysctl_wmem = &sysctl_udp_wmem_min, - .sysctl_rmem = &sysctl_udp_rmem_min, - .obj_size = sizeof(struct udp6_sock), - .h.udp_table = &udp_table, + .name = "UDPv6", + .owner = THIS_MODULE, + .close = udp_lib_close, + .connect = ip6_datagram_connect, + .disconnect = udp_disconnect, + .ioctl = udp_ioctl, + .init = udp_init_sock, + .destroy = udpv6_destroy_sock, + .setsockopt = udpv6_setsockopt, + .getsockopt = udpv6_getsockopt, + .sendmsg = udpv6_sendmsg, + .recvmsg = udpv6_recvmsg, + .release_cb = ip6_datagram_release_cb, + .hash = udp_lib_hash, + .unhash = udp_lib_unhash, + .rehash = udp_v6_rehash, + .get_port = udp_v6_get_port, + .memory_allocated = &udp_memory_allocated, + .sysctl_mem = sysctl_udp_mem, + .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_udp_wmem_min), + .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_udp_rmem_min), + .obj_size = sizeof(struct udp6_sock), + .h.udp_table = &udp_table, #ifdef CONFIG_COMPAT - .compat_setsockopt = compat_udpv6_setsockopt, - .compat_getsockopt = compat_udpv6_getsockopt, + .compat_setsockopt = compat_udpv6_setsockopt, + .compat_getsockopt = compat_udpv6_getsockopt, #endif - .diag_destroy = udp_abort, + .diag_destroy = udp_abort, }; static struct inet_protosw udpv6_protosw = { diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c index f3839780dc31..14ae32bb1f3d 100644 --- a/net/ipv6/udplite.c +++ b/net/ipv6/udplite.c @@ -123,7 +123,6 @@ static void __net_exit udplite6_proc_exit_net(struct net *net) static struct pernet_operations udplite6_net_ops = { .init = udplite6_proc_init_net, .exit = udplite6_proc_exit_net, - .async = true, }; int __init udplite6_proc_init(void) diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c index bb935a3b7fea..de1b0b8c53b0 100644 --- a/net/ipv6/xfrm6_mode_tunnel.c +++ b/net/ipv6/xfrm6_mode_tunnel.c @@ -92,7 +92,8 @@ static int xfrm6_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb) skb_reset_network_header(skb); skb_mac_header_rebuild(skb); - eth_hdr(skb)->h_proto = skb->protocol; + if (skb->mac_len) + eth_hdr(skb)->h_proto = skb->protocol; err = 0; diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index 8ae87d4ec5ff..5959ce9620eb 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -82,7 +82,7 @@ static int xfrm6_tunnel_check_size(struct sk_buff *skb) if ((!skb_is_gso(skb) && skb->len > mtu) || (skb_is_gso(skb) && - skb_gso_network_seglen(skb) > ip6_skb_dst_mtu(skb))) { + !skb_gso_validate_network_len(skb, ip6_skb_dst_mtu(skb)))) { skb->dev = dst->dev; skb->protocol = htons(ETH_P_IPV6); diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 88cd0c90fa81..416fe67271a9 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -113,6 +113,9 @@ static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, xdst->u.rt6.rt6i_gateway = rt->rt6i_gateway; xdst->u.rt6.rt6i_dst = rt->rt6i_dst; xdst->u.rt6.rt6i_src = rt->rt6i_src; + INIT_LIST_HEAD(&xdst->u.rt6.rt6i_uncached); + rt6_uncached_list_add(&xdst->u.rt6); + atomic_inc(&dev_net(dev)->ipv6.rt6_stats->fib_rt_uncache); return 0; } @@ -244,6 +247,8 @@ static void xfrm6_dst_destroy(struct dst_entry *dst) if (likely(xdst->u.rt6.rt6i_idev)) in6_dev_put(xdst->u.rt6.rt6i_idev); dst_destroy_metrics_generic(dst); + if (xdst->u.rt6.rt6i_uncached_list) + rt6_uncached_list_del(&xdst->u.rt6); xfrm_dst_destroy(xdst); } @@ -395,7 +400,6 @@ static void __net_exit xfrm6_net_exit(struct net *net) static struct pernet_operations xfrm6_net_ops = { .init = xfrm6_net_init, .exit = xfrm6_net_exit, - .async = true, }; int __init xfrm6_init(void) diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c index a9673619e0e9..f85f0d7480ac 100644 --- a/net/ipv6/xfrm6_tunnel.c +++ b/net/ipv6/xfrm6_tunnel.c @@ -353,7 +353,6 @@ static struct pernet_operations xfrm6_tunnel_net_ops = { .exit = xfrm6_tunnel_net_exit, .id = &xfrm6_tunnel_net_id, .size = sizeof(struct xfrm6_tunnel_net), - .async = true, }; static int __init xfrm6_tunnel_init(void) diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index 81ce15ffb878..893a022f9620 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -2432,9 +2432,11 @@ static int afiucv_iucv_init(void) af_iucv_dev->driver = &af_iucv_driver; err = device_register(af_iucv_dev); if (err) - goto out_driver; + goto out_iucv_dev; return 0; +out_iucv_dev: + put_device(af_iucv_dev); out_driver: driver_unregister(&af_iucv_driver); out_iucv: diff --git a/net/kcm/kcmproc.c b/net/kcm/kcmproc.c index 2c1c8b3e4452..1fac92543094 100644 --- a/net/kcm/kcmproc.c +++ b/net/kcm/kcmproc.c @@ -269,7 +269,7 @@ static int kcm_proc_register(struct net *net, struct kcm_seq_muxinfo *muxinfo) struct proc_dir_entry *p; int rc = 0; - p = proc_create_data(muxinfo->name, S_IRUGO, net->proc_net, + p = proc_create_data(muxinfo->name, 0444, net->proc_net, muxinfo->seq_fops, muxinfo); if (!p) rc = -ENOMEM; @@ -406,7 +406,7 @@ static int kcm_proc_init_net(struct net *net) { int err; - if (!proc_create("kcm_stats", S_IRUGO, net->proc_net, + if (!proc_create("kcm_stats", 0444, net->proc_net, &kcm_stats_seq_fops)) { err = -ENOMEM; goto out_kcm_stats; @@ -433,7 +433,6 @@ static void kcm_proc_exit_net(struct net *net) static struct pernet_operations kcm_net_ops = { .init = kcm_proc_init_net, .exit = kcm_proc_exit_net, - .async = true, }; int __init kcm_proc_init(void) diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c index a6cd0712e063..dc76bc346829 100644 --- a/net/kcm/kcmsock.c +++ b/net/kcm/kcmsock.c @@ -1381,24 +1381,32 @@ static int kcm_attach(struct socket *sock, struct socket *csock, .parse_msg = kcm_parse_func_strparser, .read_sock_done = kcm_read_sock_done, }; - int err; + int err = 0; csk = csock->sk; if (!csk) return -EINVAL; + lock_sock(csk); + /* Only allow TCP sockets to be attached for now */ if ((csk->sk_family != AF_INET && csk->sk_family != AF_INET6) || - csk->sk_protocol != IPPROTO_TCP) - return -EOPNOTSUPP; + csk->sk_protocol != IPPROTO_TCP) { + err = -EOPNOTSUPP; + goto out; + } /* Don't allow listeners or closed sockets */ - if (csk->sk_state == TCP_LISTEN || csk->sk_state == TCP_CLOSE) - return -EOPNOTSUPP; + if (csk->sk_state == TCP_LISTEN || csk->sk_state == TCP_CLOSE) { + err = -EOPNOTSUPP; + goto out; + } psock = kmem_cache_zalloc(kcm_psockp, GFP_KERNEL); - if (!psock) - return -ENOMEM; + if (!psock) { + err = -ENOMEM; + goto out; + } psock->mux = mux; psock->sk = csk; @@ -1407,7 +1415,7 @@ static int kcm_attach(struct socket *sock, struct socket *csock, err = strp_init(&psock->strp, csk, &cb); if (err) { kmem_cache_free(kcm_psockp, psock); - return err; + goto out; } write_lock_bh(&csk->sk_callback_lock); @@ -1420,7 +1428,8 @@ static int kcm_attach(struct socket *sock, struct socket *csock, strp_stop(&psock->strp); strp_done(&psock->strp); kmem_cache_free(kcm_psockp, psock); - return -EALREADY; + err = -EALREADY; + goto out; } psock->save_data_ready = csk->sk_data_ready; @@ -1456,7 +1465,10 @@ static int kcm_attach(struct socket *sock, struct socket *csock, /* Schedule RX work in case there are already bytes queued */ strp_check_rcv(&psock->strp); - return 0; +out: + release_sock(csk); + + return err; } static int kcm_attach_ioctl(struct socket *sock, struct kcm_attach *info) @@ -1508,6 +1520,7 @@ static void kcm_unattach(struct kcm_psock *psock) if (WARN_ON(psock->rx_kcm)) { write_unlock_bh(&csk->sk_callback_lock); + release_sock(csk); return; } @@ -2015,7 +2028,6 @@ static struct pernet_operations kcm_net_ops = { .exit = kcm_exit_net, .id = &kcm_net_id, .size = sizeof(struct kcm_net), - .async = true, }; static int __init kcm_init(void) diff --git a/net/key/af_key.c b/net/key/af_key.c index 3ac08ab26207..7e2e7188e7f4 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -3863,7 +3863,6 @@ static struct pernet_operations pfkey_net_ops = { .exit = pfkey_net_exit, .id = &pfkey_net_id, .size = sizeof(struct netns_pfkey), - .async = true, }; static void __exit ipsec_pfkey_exit(void) diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 194a7483bb93..14b67dfacc4b 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -111,6 +111,13 @@ struct l2tp_net { spinlock_t l2tp_session_hlist_lock; }; +#if IS_ENABLED(CONFIG_IPV6) +static bool l2tp_sk_is_v6(struct sock *sk) +{ + return sk->sk_family == PF_INET6 && + !ipv6_addr_v4mapped(&sk->sk_v6_daddr); +} +#endif static inline struct l2tp_tunnel *l2tp_tunnel(struct sock *sk) { @@ -136,51 +143,6 @@ l2tp_session_id_hash_2(struct l2tp_net *pn, u32 session_id) } -/* Lookup the tunnel socket, possibly involving the fs code if the socket is - * owned by userspace. A struct sock returned from this function must be - * released using l2tp_tunnel_sock_put once you're done with it. - */ -static struct sock *l2tp_tunnel_sock_lookup(struct l2tp_tunnel *tunnel) -{ - int err = 0; - struct socket *sock = NULL; - struct sock *sk = NULL; - - if (!tunnel) - goto out; - - if (tunnel->fd >= 0) { - /* Socket is owned by userspace, who might be in the process - * of closing it. Look the socket up using the fd to ensure - * consistency. - */ - sock = sockfd_lookup(tunnel->fd, &err); - if (sock) - sk = sock->sk; - } else { - /* Socket is owned by kernelspace */ - sk = tunnel->sock; - sock_hold(sk); - } - -out: - return sk; -} - -/* Drop a reference to a tunnel socket obtained via. l2tp_tunnel_sock_put */ -static void l2tp_tunnel_sock_put(struct sock *sk) -{ - struct l2tp_tunnel *tunnel = l2tp_sock_to_tunnel(sk); - if (tunnel) { - if (tunnel->fd >= 0) { - /* Socket is owned by userspace */ - sockfd_put(sk->sk_socket); - } - sock_put(sk); - } - sock_put(sk); -} - /* Session hash list. * The session_id SHOULD be random according to RFC2661, but several * L2TP implementations (Cisco and Microsoft) use incrementing @@ -193,6 +155,13 @@ l2tp_session_id_hash(struct l2tp_tunnel *tunnel, u32 session_id) return &tunnel->session_hlist[hash_32(session_id, L2TP_HASH_BITS)]; } +void l2tp_tunnel_free(struct l2tp_tunnel *tunnel) +{ + sock_put(tunnel->sock); + /* the tunnel is freed in the socket destructor */ +} +EXPORT_SYMBOL(l2tp_tunnel_free); + /* Lookup a tunnel. A new reference is held on the returned tunnel. */ struct l2tp_tunnel *l2tp_tunnel_get(const struct net *net, u32 tunnel_id) { @@ -345,13 +314,11 @@ int l2tp_session_register(struct l2tp_session *session, } l2tp_tunnel_inc_refcount(tunnel); - sock_hold(tunnel->sock); hlist_add_head_rcu(&session->global_hlist, g_head); spin_unlock_bh(&pn->l2tp_session_hlist_lock); } else { l2tp_tunnel_inc_refcount(tunnel); - sock_hold(tunnel->sock); } hlist_add_head(&session->hlist, head); @@ -969,7 +936,7 @@ int l2tp_udp_encap_recv(struct sock *sk, struct sk_buff *skb) { struct l2tp_tunnel *tunnel; - tunnel = l2tp_sock_to_tunnel(sk); + tunnel = l2tp_tunnel(sk); if (tunnel == NULL) goto pass_up; @@ -977,13 +944,10 @@ int l2tp_udp_encap_recv(struct sock *sk, struct sk_buff *skb) tunnel->name, skb->len); if (l2tp_udp_recv_core(tunnel, skb, tunnel->recv_payload_hook)) - goto pass_up_put; + goto pass_up; - sock_put(sk); return 0; -pass_up_put: - sock_put(sk); pass_up: return 1; } @@ -1092,7 +1056,7 @@ static int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb, /* Queue the packet to IP for output */ skb->ignore_df = 1; #if IS_ENABLED(CONFIG_IPV6) - if (tunnel->sock->sk_family == PF_INET6 && !tunnel->v4mapped) + if (l2tp_sk_is_v6(tunnel->sock)) error = inet6_csk_xmit(tunnel->sock, skb, NULL); else #endif @@ -1155,6 +1119,15 @@ int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len goto out_unlock; } + /* The user-space may change the connection status for the user-space + * provided socket at run time: we must check it under the socket lock + */ + if (tunnel->fd >= 0 && sk->sk_state != TCP_ESTABLISHED) { + kfree_skb(skb); + ret = NET_XMIT_DROP; + goto out_unlock; + } + /* Get routing info from the tunnel socket */ skb_dst_drop(skb); skb_dst_set(skb, dst_clone(__sk_dst_check(sk, 0))); @@ -1174,7 +1147,7 @@ int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len /* Calculate UDP checksum if configured to do so */ #if IS_ENABLED(CONFIG_IPV6) - if (sk->sk_family == PF_INET6 && !tunnel->v4mapped) + if (l2tp_sk_is_v6(sk)) udp6_set_csum(udp_get_no_check6_tx(sk), skb, &inet6_sk(sk)->saddr, &sk->sk_v6_daddr, udp_len); @@ -1207,14 +1180,12 @@ EXPORT_SYMBOL_GPL(l2tp_xmit_skb); static void l2tp_tunnel_destruct(struct sock *sk) { struct l2tp_tunnel *tunnel = l2tp_tunnel(sk); - struct l2tp_net *pn; if (tunnel == NULL) goto end; l2tp_info(tunnel, L2TP_MSG_CONTROL, "%s: closing...\n", tunnel->name); - /* Disable udp encapsulation */ switch (tunnel->encap) { case L2TP_ENCAPTYPE_UDP: @@ -1231,18 +1202,11 @@ static void l2tp_tunnel_destruct(struct sock *sk) sk->sk_destruct = tunnel->old_sk_destruct; sk->sk_user_data = NULL; - /* Remove the tunnel struct from the tunnel list */ - pn = l2tp_pernet(tunnel->l2tp_net); - spin_lock_bh(&pn->l2tp_tunnel_list_lock); - list_del_rcu(&tunnel->list); - spin_unlock_bh(&pn->l2tp_tunnel_list_lock); - - tunnel->sock = NULL; - l2tp_tunnel_dec_refcount(tunnel); - /* Call the original destructor */ if (sk->sk_destruct) (*sk->sk_destruct)(sk); + + kfree_rcu(tunnel, rcu); end: return; } @@ -1303,49 +1267,43 @@ EXPORT_SYMBOL_GPL(l2tp_tunnel_closeall); /* Tunnel socket destroy hook for UDP encapsulation */ static void l2tp_udp_encap_destroy(struct sock *sk) { - struct l2tp_tunnel *tunnel = l2tp_sock_to_tunnel(sk); - if (tunnel) { - l2tp_tunnel_closeall(tunnel); - sock_put(sk); - } + struct l2tp_tunnel *tunnel = l2tp_tunnel(sk); + + if (tunnel) + l2tp_tunnel_delete(tunnel); } /* Workqueue tunnel deletion function */ static void l2tp_tunnel_del_work(struct work_struct *work) { - struct l2tp_tunnel *tunnel = NULL; - struct socket *sock = NULL; - struct sock *sk = NULL; - - tunnel = container_of(work, struct l2tp_tunnel, del_work); + struct l2tp_tunnel *tunnel = container_of(work, struct l2tp_tunnel, + del_work); + struct sock *sk = tunnel->sock; + struct socket *sock = sk->sk_socket; + struct l2tp_net *pn; l2tp_tunnel_closeall(tunnel); - sk = l2tp_tunnel_sock_lookup(tunnel); - if (!sk) - goto out; - - sock = sk->sk_socket; - - /* If the tunnel socket was created by userspace, then go through the - * inet layer to shut the socket down, and let userspace close it. - * Otherwise, if we created the socket directly within the kernel, use + /* If the tunnel socket was created within the kernel, use * the sk API to release it here. - * In either case the tunnel resources are freed in the socket - * destructor when the tunnel socket goes away. */ - if (tunnel->fd >= 0) { - if (sock) - inet_shutdown(sock, 2); - } else { + if (tunnel->fd < 0) { if (sock) { kernel_sock_shutdown(sock, SHUT_RDWR); sock_release(sock); } } - l2tp_tunnel_sock_put(sk); -out: + /* Remove the tunnel struct from the tunnel list */ + pn = l2tp_pernet(tunnel->l2tp_net); + spin_lock_bh(&pn->l2tp_tunnel_list_lock); + list_del_rcu(&tunnel->list); + spin_unlock_bh(&pn->l2tp_tunnel_list_lock); + + /* drop initial ref */ + l2tp_tunnel_dec_refcount(tunnel); + + /* drop workqueue ref */ l2tp_tunnel_dec_refcount(tunnel); } @@ -1515,9 +1473,14 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 encap = cfg->encap; /* Quick sanity checks */ + err = -EPROTONOSUPPORT; + if (sk->sk_type != SOCK_DGRAM) { + pr_debug("tunl %hu: fd %d wrong socket type\n", + tunnel_id, fd); + goto err; + } switch (encap) { case L2TP_ENCAPTYPE_UDP: - err = -EPROTONOSUPPORT; if (sk->sk_protocol != IPPROTO_UDP) { pr_err("tunl %hu: fd %d wrong protocol, got %d, expected %d\n", tunnel_id, fd, sk->sk_protocol, IPPROTO_UDP); @@ -1525,7 +1488,6 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 } break; case L2TP_ENCAPTYPE_IP: - err = -EPROTONOSUPPORT; if (sk->sk_protocol != IPPROTO_L2TP) { pr_err("tunl %hu: fd %d wrong protocol, got %d, expected %d\n", tunnel_id, fd, sk->sk_protocol, IPPROTO_L2TP); @@ -1565,24 +1527,6 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 if (cfg != NULL) tunnel->debug = cfg->debug; -#if IS_ENABLED(CONFIG_IPV6) - if (sk->sk_family == PF_INET6) { - struct ipv6_pinfo *np = inet6_sk(sk); - - if (ipv6_addr_v4mapped(&np->saddr) && - ipv6_addr_v4mapped(&sk->sk_v6_daddr)) { - struct inet_sock *inet = inet_sk(sk); - - tunnel->v4mapped = true; - inet->inet_saddr = np->saddr.s6_addr32[3]; - inet->inet_rcv_saddr = sk->sk_v6_rcv_saddr.s6_addr32[3]; - inet->inet_daddr = sk->sk_v6_daddr.s6_addr32[3]; - } else { - tunnel->v4mapped = false; - } - } -#endif - /* Mark socket as an encapsulation socket. See net/ipv4/udp.c */ tunnel->encap = encap; if (encap == L2TP_ENCAPTYPE_UDP) { @@ -1598,13 +1542,22 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 sk->sk_user_data = tunnel; } + /* Bump the reference count. The tunnel context is deleted + * only when this drops to zero. A reference is also held on + * the tunnel socket to ensure that it is not released while + * the tunnel is extant. Must be done before sk_destruct is + * set. + */ + refcount_set(&tunnel->ref_count, 1); + sock_hold(sk); + tunnel->sock = sk; + tunnel->fd = fd; + /* Hook on the tunnel socket destructor so that we can cleanup * if the tunnel socket goes away. */ tunnel->old_sk_destruct = sk->sk_destruct; sk->sk_destruct = &l2tp_tunnel_destruct; - tunnel->sock = sk; - tunnel->fd = fd; lockdep_set_class_and_name(&sk->sk_lock.slock, &l2tp_socket_class, "l2tp_sock"); sk->sk_allocation = GFP_ATOMIC; @@ -1614,11 +1567,6 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 /* Add tunnel to our list */ INIT_LIST_HEAD(&tunnel->list); - - /* Bump the reference count. The tunnel context is deleted - * only when this drops to zero. Must be done before list insertion - */ - refcount_set(&tunnel->ref_count, 1); spin_lock_bh(&pn->l2tp_tunnel_list_lock); list_add_rcu(&tunnel->list, &pn->l2tp_tunnel_list); spin_unlock_bh(&pn->l2tp_tunnel_list_lock); @@ -1659,8 +1607,6 @@ void l2tp_session_free(struct l2tp_session *session) if (tunnel) { BUG_ON(tunnel->magic != L2TP_TUNNEL_MAGIC); - sock_put(tunnel->sock); - session->tunnel = NULL; l2tp_tunnel_dec_refcount(tunnel); } diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h index 9bbee90e9963..2718d0b284d0 100644 --- a/net/l2tp/l2tp_core.h +++ b/net/l2tp/l2tp_core.h @@ -188,9 +188,6 @@ struct l2tp_tunnel { struct sock *sock; /* Parent socket */ int fd; /* Parent fd, if tunnel socket * was created by userspace */ -#if IS_ENABLED(CONFIG_IPV6) - bool v4mapped; -#endif struct work_struct del_work; @@ -214,27 +211,8 @@ static inline void *l2tp_session_priv(struct l2tp_session *session) return &session->priv[0]; } -static inline struct l2tp_tunnel *l2tp_sock_to_tunnel(struct sock *sk) -{ - struct l2tp_tunnel *tunnel; - - if (sk == NULL) - return NULL; - - sock_hold(sk); - tunnel = (struct l2tp_tunnel *)(sk->sk_user_data); - if (tunnel == NULL) { - sock_put(sk); - goto out; - } - - BUG_ON(tunnel->magic != L2TP_TUNNEL_MAGIC); - -out: - return tunnel; -} - struct l2tp_tunnel *l2tp_tunnel_get(const struct net *net, u32 tunnel_id); +void l2tp_tunnel_free(struct l2tp_tunnel *tunnel); struct l2tp_session *l2tp_session_get(const struct net *net, struct l2tp_tunnel *tunnel, @@ -283,7 +261,7 @@ static inline void l2tp_tunnel_inc_refcount(struct l2tp_tunnel *tunnel) static inline void l2tp_tunnel_dec_refcount(struct l2tp_tunnel *tunnel) { if (refcount_dec_and_test(&tunnel->ref_count)) - kfree_rcu(tunnel, rcu); + l2tp_tunnel_free(tunnel); } /* Session reference counts. Incremented when code obtains a reference diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index 4614585e1720..a9c05b2bc1b0 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -234,17 +234,13 @@ static void l2tp_ip_close(struct sock *sk, long timeout) static void l2tp_ip_destroy_sock(struct sock *sk) { struct sk_buff *skb; - struct l2tp_tunnel *tunnel = l2tp_sock_to_tunnel(sk); + struct l2tp_tunnel *tunnel = sk->sk_user_data; while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) kfree_skb(skb); - if (tunnel) { - l2tp_tunnel_closeall(tunnel); - sock_put(sk); - } - - sk_refcnt_debug_dec(sk); + if (tunnel) + l2tp_tunnel_delete(tunnel); } static int l2tp_ip_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index efea58b66295..957369192ca1 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -248,16 +248,14 @@ static void l2tp_ip6_close(struct sock *sk, long timeout) static void l2tp_ip6_destroy_sock(struct sock *sk) { - struct l2tp_tunnel *tunnel = l2tp_sock_to_tunnel(sk); + struct l2tp_tunnel *tunnel = sk->sk_user_data; lock_sock(sk); ip6_flush_pending_frames(sk); release_sock(sk); - if (tunnel) { - l2tp_tunnel_closeall(tunnel); - sock_put(sk); - } + if (tunnel) + l2tp_tunnel_delete(tunnel); inet6_destroy_sock(sk); } diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index 0c4f49a6a0cb..d6deca11da19 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -416,20 +416,28 @@ abort: * Session (and tunnel control) socket create/destroy. *****************************************************************************/ +static void pppol2tp_put_sk(struct rcu_head *head) +{ + struct pppol2tp_session *ps; + + ps = container_of(head, typeof(*ps), rcu); + sock_put(ps->__sk); +} + /* Called by l2tp_core when a session socket is being closed. */ static void pppol2tp_session_close(struct l2tp_session *session) { - struct sock *sk; - - BUG_ON(session->magic != L2TP_SESSION_MAGIC); + struct pppol2tp_session *ps; - sk = pppol2tp_session_get_sock(session); - if (sk) { - if (sk->sk_socket) - inet_shutdown(sk->sk_socket, SEND_SHUTDOWN); - sock_put(sk); - } + ps = l2tp_session_priv(session); + mutex_lock(&ps->sk_lock); + ps->__sk = rcu_dereference_protected(ps->sk, + lockdep_is_held(&ps->sk_lock)); + RCU_INIT_POINTER(ps->sk, NULL); + if (ps->__sk) + call_rcu(&ps->rcu, pppol2tp_put_sk); + mutex_unlock(&ps->sk_lock); } /* Really kill the session socket. (Called from sock_put() if @@ -449,14 +457,6 @@ static void pppol2tp_session_destruct(struct sock *sk) } } -static void pppol2tp_put_sk(struct rcu_head *head) -{ - struct pppol2tp_session *ps; - - ps = container_of(head, typeof(*ps), rcu); - sock_put(ps->__sk); -} - /* Called when the PPPoX socket (session) is closed. */ static int pppol2tp_release(struct socket *sock) @@ -480,26 +480,17 @@ static int pppol2tp_release(struct socket *sock) sock_orphan(sk); sock->sk = NULL; + /* If the socket is associated with a session, + * l2tp_session_delete will call pppol2tp_session_close which + * will drop the session's ref on the socket. + */ session = pppol2tp_sock_to_session(sk); - - if (session != NULL) { - struct pppol2tp_session *ps; - + if (session) { l2tp_session_delete(session); - - ps = l2tp_session_priv(session); - mutex_lock(&ps->sk_lock); - ps->__sk = rcu_dereference_protected(ps->sk, - lockdep_is_held(&ps->sk_lock)); - RCU_INIT_POINTER(ps->sk, NULL); - mutex_unlock(&ps->sk_lock); - call_rcu(&ps->rcu, pppol2tp_put_sk); - - /* Rely on the sock_put() call at the end of the function for - * dropping the reference held by pppol2tp_sock_to_session(). - * The last reference will be dropped by pppol2tp_put_sk(). - */ + /* drop the ref obtained by pppol2tp_sock_to_session */ + sock_put(sk); } + release_sock(sk); /* This will delete the session context via @@ -796,6 +787,7 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr, out_no_ppp: /* This is how we get the session context from the socket. */ + sock_hold(sk); sk->sk_user_data = session; rcu_assign_pointer(ps->sk, sk); mutex_unlock(&ps->sk_lock); @@ -1750,7 +1742,7 @@ static __net_init int pppol2tp_init_net(struct net *net) struct proc_dir_entry *pde; int err = 0; - pde = proc_create("pppol2tp", S_IRUGO, net->proc_net, + pde = proc_create("pppol2tp", 0444, net->proc_net, &pppol2tp_proc_fops); if (!pde) { err = -ENOMEM; @@ -1770,7 +1762,6 @@ static struct pernet_operations pppol2tp_net_ops = { .init = pppol2tp_init_net, .exit = pppol2tp_exit_net, .id = &pppol2tp_net_id, - .async = true, }; /***************************************************************************** diff --git a/net/llc/llc_proc.c b/net/llc/llc_proc.c index 66821e8a2b7a..62ea0aed94b4 100644 --- a/net/llc/llc_proc.c +++ b/net/llc/llc_proc.c @@ -249,11 +249,11 @@ int __init llc_proc_init(void) if (!llc_proc_dir) goto out; - p = proc_create("socket", S_IRUGO, llc_proc_dir, &llc_seq_socket_fops); + p = proc_create("socket", 0444, llc_proc_dir, &llc_seq_socket_fops); if (!p) goto out_socket; - p = proc_create("core", S_IRUGO, llc_proc_dir, &llc_seq_core_fops); + p = proc_create("core", 0444, llc_proc_dir, &llc_seq_core_fops); if (!p) goto out_core; diff --git a/net/llc/llc_sap.c b/net/llc/llc_sap.c index d90928f50226..a7f7b8ff4729 100644 --- a/net/llc/llc_sap.c +++ b/net/llc/llc_sap.c @@ -394,8 +394,9 @@ static void llc_sap_mcast(struct llc_sap *sap, const struct llc_addr *laddr, struct sk_buff *skb) { - int i = 0, count = 256 / sizeof(struct sock *); - struct sock *sk, *stack[count]; + int i = 0; + struct sock *sk; + struct sock *stack[256 / sizeof(struct sock *)]; struct llc_sock *llc; struct hlist_head *dev_hb = llc_sk_dev_hash(sap, skb->dev->ifindex); @@ -408,7 +409,7 @@ static void llc_sap_mcast(struct llc_sap *sap, continue; sock_hold(sk); - if (i < count) + if (i < ARRAY_SIZE(stack)) stack[i++] = sk; else { llc_do_mcast(sap, skb, stack, i); diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index fd68f6fb02d7..85dbaa891059 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -4,6 +4,7 @@ * Copyright 2006-2010 Johannes Berg <johannes@sipsolutions.net> * Copyright 2013-2015 Intel Mobile Communications GmbH * Copyright (C) 2015-2017 Intel Deutschland GmbH + * Copyright (C) 2018 Intel Corporation * * This file is GPLv2 as found in COPYING. */ @@ -925,6 +926,8 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev, */ sdata->control_port_protocol = params->crypto.control_port_ethertype; sdata->control_port_no_encrypt = params->crypto.control_port_no_encrypt; + sdata->control_port_over_nl80211 = + params->crypto.control_port_over_nl80211; sdata->encrypt_headroom = ieee80211_cs_headroom(sdata->local, ¶ms->crypto, sdata->vif.type); @@ -934,6 +937,8 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev, params->crypto.control_port_ethertype; vlan->control_port_no_encrypt = params->crypto.control_port_no_encrypt; + vlan->control_port_over_nl80211 = + params->crypto.control_port_over_nl80211; vlan->encrypt_headroom = ieee80211_cs_headroom(sdata->local, ¶ms->crypto, @@ -2019,6 +2024,8 @@ static int ieee80211_join_mesh(struct wiphy *wiphy, struct net_device *dev, if (err) return err; + sdata->control_port_over_nl80211 = setup->control_port_over_nl80211; + /* can mesh use other SMPS modes? */ sdata->smps_mode = IEEE80211_SMPS_OFF; sdata->needed_rx_chains = sdata->local->rx_chains; @@ -2156,6 +2163,8 @@ static int ieee80211_set_txq_params(struct wiphy *wiphy, */ p.uapsd = false; + ieee80211_regulatory_limit_wmm_params(sdata, &p, params->ac); + sdata->tx_conf[params->ac] = p; if (drv_conf_tx(local, sdata, params->ac, &p)) { wiphy_debug(local->hw.wiphy, @@ -2313,6 +2322,8 @@ static int ieee80211_set_mcast_rate(struct wiphy *wiphy, struct net_device *dev, memcpy(sdata->vif.bss_conf.mcast_rate, rate, sizeof(int) * NUM_NL80211_BANDS); + ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_MCAST_RATE); + return 0; } @@ -3786,4 +3797,5 @@ const struct cfg80211_ops mac80211_config_ops = { .add_nan_func = ieee80211_add_nan_func, .del_nan_func = ieee80211_del_nan_func, .set_multicast_to_unicast = ieee80211_set_multicast_to_unicast, + .tx_control_port = ieee80211_tx_control_port, }; diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c index a75653affbf7..b5adf3625d16 100644 --- a/net/mac80211/debugfs.c +++ b/net/mac80211/debugfs.c @@ -213,6 +213,7 @@ static const char *hw_flag_names[] = { FLAG(SUPPORTS_TX_FRAG), FLAG(SUPPORTS_TDLS_BUFFER_STA), FLAG(DEAUTH_NEED_MGD_TX_PREP), + FLAG(DOESNT_SUPPORT_QOS_NDP), #undef FLAG }; diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c index d7523530d3f8..c78036a0ac94 100644 --- a/net/mac80211/ht.c +++ b/net/mac80211/ht.c @@ -466,6 +466,21 @@ void ieee80211_process_delba(struct ieee80211_sub_if_data *sdata, __ieee80211_stop_tx_ba_session(sta, tid, AGG_STOP_PEER_REQUEST); } +enum nl80211_smps_mode +ieee80211_smps_mode_to_smps_mode(enum ieee80211_smps_mode smps) +{ + switch (smps) { + case IEEE80211_SMPS_OFF: + return NL80211_SMPS_OFF; + case IEEE80211_SMPS_STATIC: + return NL80211_SMPS_STATIC; + case IEEE80211_SMPS_DYNAMIC: + return NL80211_SMPS_DYNAMIC; + default: + return NL80211_SMPS_OFF; + } +} + int ieee80211_send_smps_action(struct ieee80211_sub_if_data *sdata, enum ieee80211_smps_mode smps, const u8 *da, const u8 *bssid) diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index db07e0de9a03..6449a1c2283b 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -1839,11 +1839,12 @@ int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata, IEEE80211_HT_OP_MODE_PROTECTION_NONHT_MIXED | IEEE80211_HT_PARAM_RIFS_MODE; - changed |= BSS_CHANGED_HT; + changed |= BSS_CHANGED_HT | BSS_CHANGED_MCAST_RATE; ieee80211_bss_info_change_notify(sdata, changed); sdata->smps_mode = IEEE80211_SMPS_OFF; sdata->needed_rx_chains = local->rx_chains; + sdata->control_port_over_nl80211 = params->control_port_over_nl80211; ieee80211_queue_work(&local->hw, &sdata->work); diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index ae9c33cd8ada..6372dbdadf53 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -4,6 +4,7 @@ * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz> * Copyright 2007-2010 Johannes Berg <johannes@sipsolutions.net> * Copyright 2013-2015 Intel Mobile Communications GmbH + * Copyright (C) 2018 Intel Corporation * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -899,6 +900,7 @@ struct ieee80211_sub_if_data { u16 sequence_number; __be16 control_port_protocol; bool control_port_no_encrypt; + bool control_port_over_nl80211; int encrypt_headroom; atomic_t num_tx_queued; @@ -1734,6 +1736,9 @@ void ieee80211_check_fast_xmit(struct sta_info *sta); void ieee80211_check_fast_xmit_all(struct ieee80211_local *local); void ieee80211_check_fast_xmit_iface(struct ieee80211_sub_if_data *sdata); void ieee80211_clear_fast_xmit(struct sta_info *sta); +int ieee80211_tx_control_port(struct wiphy *wiphy, struct net_device *dev, + const u8 *buf, size_t len, + const u8 *dest, __be16 proto, bool unencrypted); /* HT */ void ieee80211_apply_htcap_overrides(struct ieee80211_sub_if_data *sdata, @@ -1788,6 +1793,8 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid); void ieee80211_release_reorder_timeout(struct sta_info *sta, int tid); u8 ieee80211_mcs_to_chains(const struct ieee80211_mcs_info *mcs); +enum nl80211_smps_mode +ieee80211_smps_mode_to_smps_mode(enum ieee80211_smps_mode smps); /* VHT */ void @@ -1814,6 +1821,8 @@ void ieee80211_apply_vhtcap_overrides(struct ieee80211_sub_if_data *sdata, struct ieee80211_sta_vht_cap *vht_cap); void ieee80211_get_vht_mask_from_cap(__le16 vht_cap, u16 vht_mask[NL80211_VHT_NSS_MAX]); +enum nl80211_chan_width +ieee80211_sta_rx_bw_to_chan_width(struct sta_info *sta); /* Spectrum management */ void ieee80211_process_measurement_req(struct ieee80211_sub_if_data *sdata, @@ -1865,6 +1874,9 @@ extern const void *const mac80211_wiphy_privid; /* for wiphy privid */ int ieee80211_frame_duration(enum nl80211_band band, size_t len, int rate, int erp, int short_preamble, int shift); +void ieee80211_regulatory_limit_wmm_params(struct ieee80211_sub_if_data *sdata, + struct ieee80211_tx_queue_params *qparam, + int ac); void ieee80211_set_wmm_default(struct ieee80211_sub_if_data *sdata, bool bss_notify, bool enable_qos); void ieee80211_xmit(struct ieee80211_sub_if_data *sdata, diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index d13ba064951f..555e389b7dfa 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -519,6 +519,8 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up) master->control_port_protocol; sdata->control_port_no_encrypt = master->control_port_no_encrypt; + sdata->control_port_over_nl80211 = + master->control_port_over_nl80211; sdata->vif.cab_queue = master->vif.cab_queue; memcpy(sdata->vif.hw_queue, master->vif.hw_queue, sizeof(sdata->vif.hw_queue)); diff --git a/net/mac80211/key.c b/net/mac80211/key.c index aee05ec3f7ea..ee0d0cc8dc3b 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -126,7 +126,7 @@ static void decrease_tailroom_need_count(struct ieee80211_sub_if_data *sdata, static int ieee80211_key_enable_hw_accel(struct ieee80211_key *key) { - struct ieee80211_sub_if_data *sdata; + struct ieee80211_sub_if_data *sdata = key->sdata; struct sta_info *sta; int ret = -EOPNOTSUPP; @@ -162,7 +162,6 @@ static int ieee80211_key_enable_hw_accel(struct ieee80211_key *key) if (sta && !sta->uploaded) goto out_unsupported; - sdata = key->sdata; if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) { /* * The driver doesn't know anything about VLAN interfaces. @@ -214,8 +213,11 @@ static int ieee80211_key_enable_hw_accel(struct ieee80211_key *key) /* all of these we can do in software - if driver can */ if (ret == 1) return 0; - if (ieee80211_hw_check(&key->local->hw, SW_CRYPTO_CONTROL)) + if (ieee80211_hw_check(&key->local->hw, SW_CRYPTO_CONTROL)) { + if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) + return 0; return -EINVAL; + } return 0; default: return -EINVAL; diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 0785d04a80bc..9ea17afaa237 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -554,6 +554,8 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len, NL80211_FEATURE_USERSPACE_MPM | NL80211_FEATURE_FULL_AP_CLIENT_STATE; wiphy_ext_feature_set(wiphy, NL80211_EXT_FEATURE_FILS_STA); + wiphy_ext_feature_set(wiphy, + NL80211_EXT_FEATURE_CONTROL_PORT_OVER_NL80211); if (!ops->hw_scan) wiphy->features |= NL80211_FEATURE_LOW_PRIORITY_SCAN | @@ -930,8 +932,12 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) IEEE80211_HT_CAP_SM_PS_SHIFT; } - /* if low-level driver supports AP, we also support VLAN */ - if (local->hw.wiphy->interface_modes & BIT(NL80211_IFTYPE_AP)) { + /* if low-level driver supports AP, we also support VLAN. + * drivers advertising SW_CRYPTO_CONTROL should enable AP_VLAN + * based on their support to transmit SW encrypted packets. + */ + if (local->hw.wiphy->interface_modes & BIT(NL80211_IFTYPE_AP) && + !ieee80211_hw_check(&local->hw, SW_CRYPTO_CONTROL)) { hw->wiphy->interface_modes |= BIT(NL80211_IFTYPE_AP_VLAN); hw->wiphy->software_iftypes |= BIT(NL80211_IFTYPE_AP_VLAN); } diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index 6a381cbe1e33..d51da26e9c18 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -880,7 +880,8 @@ int ieee80211_start_mesh(struct ieee80211_sub_if_data *sdata) BSS_CHANGED_BEACON_ENABLED | BSS_CHANGED_HT | BSS_CHANGED_BASIC_RATES | - BSS_CHANGED_BEACON_INT; + BSS_CHANGED_BEACON_INT | + BSS_CHANGED_MCAST_RATE; local->fif_other_bss++; /* mesh ifaces must set allmulti to forward mcast traffic */ diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 0024eff9bb84..69449db7e283 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -897,7 +897,8 @@ void ieee80211_send_nullfunc(struct ieee80211_local *local, struct ieee80211_hdr_3addr *nullfunc; struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; - skb = ieee80211_nullfunc_get(&local->hw, &sdata->vif, true); + skb = ieee80211_nullfunc_get(&local->hw, &sdata->vif, + !ieee80211_hw_check(&local->hw, DOESNT_SUPPORT_QOS_NDP)); if (!skb) return; @@ -1786,12 +1787,14 @@ static bool ieee80211_sta_wmm_params(struct ieee80211_local *local, params[ac].acm = acm; params[ac].uapsd = uapsd; - if (params[ac].cw_min > params[ac].cw_max) { + if (params->cw_min == 0 || + params[ac].cw_min > params[ac].cw_max) { sdata_info(sdata, "AP has invalid WMM params (CWmin/max=%d/%d for ACI %d), using defaults\n", params[ac].cw_min, params[ac].cw_max, aci); return false; } + ieee80211_regulatory_limit_wmm_params(sdata, ¶ms[ac], ac); } for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) { @@ -2010,8 +2013,6 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, /* deauthenticate/disassociate now */ if (tx || frame_buf) { - struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; - /* * In multi channel scenarios guarantee that the virtual * interface is granted immediate airtime to transmit the @@ -3306,82 +3307,14 @@ static const u64 care_about_ies = (1ULL << WLAN_EID_HT_OPERATION) | (1ULL << WLAN_EID_EXT_CHANSWITCH_ANN); -static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, - struct ieee80211_mgmt *mgmt, size_t len, - struct ieee80211_rx_status *rx_status) +static void ieee80211_handle_beacon_sig(struct ieee80211_sub_if_data *sdata, + struct ieee80211_if_managed *ifmgd, + struct ieee80211_bss_conf *bss_conf, + struct ieee80211_local *local, + struct ieee80211_rx_status *rx_status) { - struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; - struct ieee80211_bss_conf *bss_conf = &sdata->vif.bss_conf; - size_t baselen; - struct ieee802_11_elems elems; - struct ieee80211_local *local = sdata->local; - struct ieee80211_chanctx_conf *chanctx_conf; - struct ieee80211_channel *chan; - struct sta_info *sta; - u32 changed = 0; - bool erp_valid; - u8 erp_value = 0; - u32 ncrc; - u8 *bssid; - u8 deauth_buf[IEEE80211_DEAUTH_FRAME_LEN]; - - sdata_assert_lock(sdata); - - /* Process beacon from the current BSS */ - baselen = (u8 *) mgmt->u.beacon.variable - (u8 *) mgmt; - if (baselen > len) - return; - - rcu_read_lock(); - chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf); - if (!chanctx_conf) { - rcu_read_unlock(); - return; - } - - if (rx_status->freq != chanctx_conf->def.chan->center_freq) { - rcu_read_unlock(); - return; - } - chan = chanctx_conf->def.chan; - rcu_read_unlock(); - - if (ifmgd->assoc_data && ifmgd->assoc_data->need_beacon && - ether_addr_equal(mgmt->bssid, ifmgd->assoc_data->bss->bssid)) { - ieee802_11_parse_elems(mgmt->u.beacon.variable, - len - baselen, false, &elems); - - ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, &elems); - if (elems.tim && !elems.parse_error) { - const struct ieee80211_tim_ie *tim_ie = elems.tim; - ifmgd->dtim_period = tim_ie->dtim_period; - } - ifmgd->have_beacon = true; - ifmgd->assoc_data->need_beacon = false; - if (ieee80211_hw_check(&local->hw, TIMING_BEACON_ONLY)) { - sdata->vif.bss_conf.sync_tsf = - le64_to_cpu(mgmt->u.beacon.timestamp); - sdata->vif.bss_conf.sync_device_ts = - rx_status->device_timestamp; - if (elems.tim) - sdata->vif.bss_conf.sync_dtim_count = - elems.tim->dtim_count; - else - sdata->vif.bss_conf.sync_dtim_count = 0; - } - /* continue assoc process */ - ifmgd->assoc_data->timeout = jiffies; - ifmgd->assoc_data->timeout_started = true; - run_again(sdata, ifmgd->assoc_data->timeout); - return; - } - - if (!ifmgd->associated || - !ether_addr_equal(mgmt->bssid, ifmgd->associated->bssid)) - return; - bssid = ifmgd->associated->bssid; - /* Track average RSSI from the Beacon frames of the current AP */ + if (ifmgd->flags & IEEE80211_STA_RESET_SIGNAL_AVE) { ifmgd->flags &= ~IEEE80211_STA_RESET_SIGNAL_AVE; ewma_beacon_signal_init(&ifmgd->ave_beacon_signal); @@ -3468,6 +3401,86 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, sig, GFP_KERNEL); } } +} + +static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, + struct ieee80211_mgmt *mgmt, size_t len, + struct ieee80211_rx_status *rx_status) +{ + struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; + struct ieee80211_bss_conf *bss_conf = &sdata->vif.bss_conf; + size_t baselen; + struct ieee802_11_elems elems; + struct ieee80211_local *local = sdata->local; + struct ieee80211_chanctx_conf *chanctx_conf; + struct ieee80211_channel *chan; + struct sta_info *sta; + u32 changed = 0; + bool erp_valid; + u8 erp_value = 0; + u32 ncrc; + u8 *bssid; + u8 deauth_buf[IEEE80211_DEAUTH_FRAME_LEN]; + + sdata_assert_lock(sdata); + + /* Process beacon from the current BSS */ + baselen = (u8 *) mgmt->u.beacon.variable - (u8 *) mgmt; + if (baselen > len) + return; + + rcu_read_lock(); + chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf); + if (!chanctx_conf) { + rcu_read_unlock(); + return; + } + + if (rx_status->freq != chanctx_conf->def.chan->center_freq) { + rcu_read_unlock(); + return; + } + chan = chanctx_conf->def.chan; + rcu_read_unlock(); + + if (ifmgd->assoc_data && ifmgd->assoc_data->need_beacon && + ether_addr_equal(mgmt->bssid, ifmgd->assoc_data->bss->bssid)) { + ieee802_11_parse_elems(mgmt->u.beacon.variable, + len - baselen, false, &elems); + + ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, &elems); + if (elems.tim && !elems.parse_error) { + const struct ieee80211_tim_ie *tim_ie = elems.tim; + ifmgd->dtim_period = tim_ie->dtim_period; + } + ifmgd->have_beacon = true; + ifmgd->assoc_data->need_beacon = false; + if (ieee80211_hw_check(&local->hw, TIMING_BEACON_ONLY)) { + sdata->vif.bss_conf.sync_tsf = + le64_to_cpu(mgmt->u.beacon.timestamp); + sdata->vif.bss_conf.sync_device_ts = + rx_status->device_timestamp; + if (elems.tim) + sdata->vif.bss_conf.sync_dtim_count = + elems.tim->dtim_count; + else + sdata->vif.bss_conf.sync_dtim_count = 0; + } + /* continue assoc process */ + ifmgd->assoc_data->timeout = jiffies; + ifmgd->assoc_data->timeout_started = true; + run_again(sdata, ifmgd->assoc_data->timeout); + return; + } + + if (!ifmgd->associated || + !ether_addr_equal(mgmt->bssid, ifmgd->associated->bssid)) + return; + bssid = ifmgd->associated->bssid; + + if (!(rx_status->flag & RX_FLAG_NO_SIGNAL_VAL)) + ieee80211_handle_beacon_sig(sdata, ifmgd, bss_conf, + local, rx_status); if (ifmgd->flags & IEEE80211_STA_CONNECTION_POLL) { mlme_dbg_ratelimited(sdata, @@ -4844,6 +4857,8 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, sdata->control_port_protocol = req->crypto.control_port_ethertype; sdata->control_port_no_encrypt = req->crypto.control_port_no_encrypt; + sdata->control_port_over_nl80211 = + req->crypto.control_port_over_nl80211; sdata->encrypt_headroom = ieee80211_cs_headroom(local, &req->crypto, sdata->vif.type); diff --git a/net/mac80211/rc80211_minstrel.c b/net/mac80211/rc80211_minstrel.c index 9766c1cc4b0a..8221bc5582ab 100644 --- a/net/mac80211/rc80211_minstrel.c +++ b/net/mac80211/rc80211_minstrel.c @@ -690,7 +690,7 @@ minstrel_alloc(struct ieee80211_hw *hw, struct dentry *debugfsdir) #ifdef CONFIG_MAC80211_DEBUGFS mp->fixed_rate_idx = (u32) -1; mp->dbg_fixed_rate = debugfs_create_u32("fixed_rate_idx", - S_IRUGO | S_IWUGO, debugfsdir, &mp->fixed_rate_idx); + 0666, debugfsdir, &mp->fixed_rate_idx); #endif minstrel_init_cck_rates(mp); diff --git a/net/mac80211/rc80211_minstrel_debugfs.c b/net/mac80211/rc80211_minstrel_debugfs.c index 36fc971deb86..9ad7d63d3e5b 100644 --- a/net/mac80211/rc80211_minstrel_debugfs.c +++ b/net/mac80211/rc80211_minstrel_debugfs.c @@ -214,11 +214,11 @@ minstrel_add_sta_debugfs(void *priv, void *priv_sta, struct dentry *dir) { struct minstrel_sta_info *mi = priv_sta; - mi->dbg_stats = debugfs_create_file("rc_stats", S_IRUGO, dir, mi, - &minstrel_stat_fops); + mi->dbg_stats = debugfs_create_file("rc_stats", 0444, dir, mi, + &minstrel_stat_fops); - mi->dbg_stats_csv = debugfs_create_file("rc_stats_csv", S_IRUGO, dir, - mi, &minstrel_stat_csv_fops); + mi->dbg_stats_csv = debugfs_create_file("rc_stats_csv", 0444, dir, mi, + &minstrel_stat_csv_fops); } void diff --git a/net/mac80211/rc80211_minstrel_ht_debugfs.c b/net/mac80211/rc80211_minstrel_ht_debugfs.c index 7d969e300fb3..bfcc03152dc6 100644 --- a/net/mac80211/rc80211_minstrel_ht_debugfs.c +++ b/net/mac80211/rc80211_minstrel_ht_debugfs.c @@ -303,10 +303,10 @@ minstrel_ht_add_sta_debugfs(void *priv, void *priv_sta, struct dentry *dir) { struct minstrel_ht_sta_priv *msp = priv_sta; - msp->dbg_stats = debugfs_create_file("rc_stats", S_IRUGO, dir, msp, - &minstrel_ht_stat_fops); - msp->dbg_stats_csv = debugfs_create_file("rc_stats_csv", S_IRUGO, - dir, msp, &minstrel_ht_stat_csv_fops); + msp->dbg_stats = debugfs_create_file("rc_stats", 0444, dir, msp, + &minstrel_ht_stat_fops); + msp->dbg_stats_csv = debugfs_create_file("rc_stats_csv", 0444, dir, msp, + &minstrel_ht_stat_csv_fops); } void diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index de7d10732fd5..03102aff0953 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -2245,6 +2245,32 @@ static bool ieee80211_frame_allowed(struct ieee80211_rx_data *rx, __le16 fc) return true; } +static void ieee80211_deliver_skb_to_local_stack(struct sk_buff *skb, + struct ieee80211_rx_data *rx) +{ + struct ieee80211_sub_if_data *sdata = rx->sdata; + struct net_device *dev = sdata->dev; + + if (unlikely((skb->protocol == sdata->control_port_protocol || + skb->protocol == cpu_to_be16(ETH_P_PREAUTH)) && + sdata->control_port_over_nl80211)) { + struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); + bool noencrypt = status->flag & RX_FLAG_DECRYPTED; + struct ethhdr *ehdr = eth_hdr(skb); + + cfg80211_rx_control_port(dev, skb->data, skb->len, + ehdr->h_source, + be16_to_cpu(skb->protocol), noencrypt); + dev_kfree_skb(skb); + } else { + /* deliver to local stack */ + if (rx->napi) + napi_gro_receive(rx->napi, skb); + else + netif_receive_skb(skb); + } +} + /* * requires that rx->skb is a frame with ethernet header */ @@ -2329,13 +2355,10 @@ ieee80211_deliver_skb(struct ieee80211_rx_data *rx) #endif if (skb) { - /* deliver to local stack */ skb->protocol = eth_type_trans(skb, dev); memset(skb->cb, 0, sizeof(skb->cb)); - if (rx->napi) - napi_gro_receive(rx->napi, skb); - else - netif_receive_skb(skb); + + ieee80211_deliver_skb_to_local_stack(skb, rx); } if (xmit_skb) { @@ -2549,11 +2572,8 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx) fwd_skb = skb_copy_expand(skb, local->tx_headroom + sdata->encrypt_headroom, 0, GFP_ATOMIC); - if (!fwd_skb) { - net_info_ratelimited("%s: failed to clone mesh frame\n", - sdata->name); + if (!fwd_skb) goto out; - } fwd_hdr = (struct ieee80211_hdr *) fwd_skb->data; fwd_hdr->frame_control &= ~cpu_to_le16(IEEE80211_FCTL_RETRY); @@ -2807,7 +2827,8 @@ ieee80211_rx_h_mgmt_check(struct ieee80211_rx_data *rx) !(rx->flags & IEEE80211_RX_BEACON_REPORTED)) { int sig = 0; - if (ieee80211_hw_check(&rx->local->hw, SIGNAL_DBM)) + if (ieee80211_hw_check(&rx->local->hw, SIGNAL_DBM) && + !(status->flag & RX_FLAG_NO_SIGNAL_VAL)) sig = status->signal; cfg80211_report_obss_beacon(rx->local->hw.wiphy, @@ -2885,7 +2906,8 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) if (rx->sta->sta.smps_mode == smps_mode) goto handled; rx->sta->sta.smps_mode = smps_mode; - sta_opmode.smps_mode = smps_mode; + sta_opmode.smps_mode = + ieee80211_smps_mode_to_smps_mode(smps_mode); sta_opmode.changed = STA_OPMODE_SMPS_MODE_CHANGED; sband = rx->local->hw.wiphy->bands[status->band]; @@ -2923,7 +2945,8 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) rx->sta->sta.bandwidth = new_bw; sband = rx->local->hw.wiphy->bands[status->band]; - sta_opmode.bw = new_bw; + sta_opmode.bw = + ieee80211_sta_rx_bw_to_chan_width(rx->sta); sta_opmode.changed = STA_OPMODE_MAX_BW_CHANGED; rate_control_rate_update(local, sband, rx->sta, @@ -3148,7 +3171,8 @@ ieee80211_rx_h_userspace_mgmt(struct ieee80211_rx_data *rx) * it transmitted were processed or returned. */ - if (ieee80211_hw_check(&rx->local->hw, SIGNAL_DBM)) + if (ieee80211_hw_check(&rx->local->hw, SIGNAL_DBM) && + !(status->flag & RX_FLAG_NO_SIGNAL_VAL)) sig = status->signal; if (cfg80211_rx_mgmt(&rx->sdata->wdev, status->freq, sig, @@ -3965,7 +3989,7 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx, if ((hdr->frame_control & cpu_to_le16(IEEE80211_FCTL_FROMDS | IEEE80211_FCTL_TODS)) != fast_rx->expected_ds_bits) - goto drop; + return false; /* assign the key to drop unencrypted frames (later) * and strip the IV/MIC if necessary diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index ef2becaade50..a3b1bcc2b461 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -73,7 +73,9 @@ ieee80211_bss_info_update(struct ieee80211_local *local, bool signal_valid; struct ieee80211_sub_if_data *scan_sdata; - if (ieee80211_hw_check(&local->hw, SIGNAL_DBM)) + if (rx_status->flag & RX_FLAG_NO_SIGNAL_VAL) + bss_meta.signal = 0; /* invalid signal indication */ + else if (ieee80211_hw_check(&local->hw, SIGNAL_DBM)) bss_meta.signal = rx_status->signal * 100; else if (ieee80211_hw_check(&local->hw, SIGNAL_UNSPEC)) bss_meta.signal = (rx_status->signal * 100) / local->hw.max_signal; diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 7643178ef132..535de3161a78 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -3569,6 +3569,14 @@ void __ieee80211_subif_start_xmit(struct sk_buff *skb, if (!IS_ERR_OR_NULL(sta)) { struct ieee80211_fast_tx *fast_tx; + /* We need a bit of data queued to build aggregates properly, so + * instruct the TCP stack to allow more than a single ms of data + * to be queued in the stack. The value is a bit-shift of 1 + * second, so 8 is ~4ms of queued data. Only affects local TCP + * sockets. + */ + sk_pacing_shift_update(skb->sk, 8); + fast_tx = rcu_dereference(sta->fast_tx); if (fast_tx && @@ -4749,3 +4757,49 @@ void __ieee80211_tx_skb_tid_band(struct ieee80211_sub_if_data *sdata, ieee80211_xmit(sdata, NULL, skb); local_bh_enable(); } + +int ieee80211_tx_control_port(struct wiphy *wiphy, struct net_device *dev, + const u8 *buf, size_t len, + const u8 *dest, __be16 proto, bool unencrypted) +{ + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_local *local = sdata->local; + struct sk_buff *skb; + struct ethhdr *ehdr; + u32 flags; + + /* Only accept CONTROL_PORT_PROTOCOL configured in CONNECT/ASSOCIATE + * or Pre-Authentication + */ + if (proto != sdata->control_port_protocol && + proto != cpu_to_be16(ETH_P_PREAUTH)) + return -EINVAL; + + if (unencrypted) + flags = IEEE80211_TX_INTFL_DONT_ENCRYPT; + else + flags = 0; + + skb = dev_alloc_skb(local->hw.extra_tx_headroom + + sizeof(struct ethhdr) + len); + if (!skb) + return -ENOMEM; + + skb_reserve(skb, local->hw.extra_tx_headroom + sizeof(struct ethhdr)); + + skb_put_data(skb, buf, len); + + ehdr = skb_push(skb, sizeof(struct ethhdr)); + memcpy(ehdr->h_dest, dest, ETH_ALEN); + memcpy(ehdr->h_source, sdata->vif.addr, ETH_ALEN); + ehdr->h_proto = proto; + + skb->dev = dev; + skb->protocol = htons(ETH_P_802_3); + skb_reset_network_header(skb); + skb_reset_mac_header(skb); + + __ieee80211_subif_start_xmit(skb, skb->dev, flags); + + return 0; +} diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 1f82191ce601..11f9cfc016d9 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -5,6 +5,7 @@ * Copyright 2007 Johannes Berg <johannes@sipsolutions.net> * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright (C) 2015-2017 Intel Deutschland GmbH + * Copyright (C) 2018 Intel Corporation * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -1113,6 +1114,48 @@ u32 ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action, return crc; } +void ieee80211_regulatory_limit_wmm_params(struct ieee80211_sub_if_data *sdata, + struct ieee80211_tx_queue_params + *qparam, int ac) +{ + struct ieee80211_chanctx_conf *chanctx_conf; + const struct ieee80211_reg_rule *rrule; + struct ieee80211_wmm_ac *wmm_ac; + u16 center_freq = 0; + + if (sdata->vif.type != NL80211_IFTYPE_AP && + sdata->vif.type != NL80211_IFTYPE_STATION) + return; + + rcu_read_lock(); + chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf); + if (chanctx_conf) + center_freq = chanctx_conf->def.chan->center_freq; + + if (!center_freq) { + rcu_read_unlock(); + return; + } + + rrule = freq_reg_info(sdata->wdev.wiphy, MHZ_TO_KHZ(center_freq)); + + if (IS_ERR_OR_NULL(rrule) || !rrule->wmm_rule) { + rcu_read_unlock(); + return; + } + + if (sdata->vif.type == NL80211_IFTYPE_AP) + wmm_ac = &rrule->wmm_rule->ap[ac]; + else + wmm_ac = &rrule->wmm_rule->client[ac]; + qparam->cw_min = max_t(u16, qparam->cw_min, wmm_ac->cw_min); + qparam->cw_max = max_t(u16, qparam->cw_max, wmm_ac->cw_max); + qparam->aifs = max_t(u8, qparam->aifs, wmm_ac->aifsn); + qparam->txop = !qparam->txop ? wmm_ac->cot / 32 : + min_t(u16, qparam->txop, wmm_ac->cot / 32); + rcu_read_unlock(); +} + void ieee80211_set_wmm_default(struct ieee80211_sub_if_data *sdata, bool bss_notify, bool enable_qos) { @@ -1206,6 +1249,7 @@ void ieee80211_set_wmm_default(struct ieee80211_sub_if_data *sdata, break; } } + ieee80211_regulatory_limit_wmm_params(sdata, &qparam, ac); qparam.uapsd = false; @@ -1968,7 +2012,8 @@ int ieee80211_reconfig(struct ieee80211_local *local) BSS_CHANGED_CQM | BSS_CHANGED_QOS | BSS_CHANGED_IDLE | - BSS_CHANGED_TXPOWER; + BSS_CHANGED_TXPOWER | + BSS_CHANGED_MCAST_RATE; if (sdata->vif.mu_mimo_owner) changed |= BSS_CHANGED_MU_GROUPS; diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c index 5714dee76b12..259325cbcc31 100644 --- a/net/mac80211/vht.c +++ b/net/mac80211/vht.c @@ -358,6 +358,36 @@ enum nl80211_chan_width ieee80211_sta_cap_chan_bw(struct sta_info *sta) return NL80211_CHAN_WIDTH_80; } +enum nl80211_chan_width +ieee80211_sta_rx_bw_to_chan_width(struct sta_info *sta) +{ + enum ieee80211_sta_rx_bandwidth cur_bw = sta->sta.bandwidth; + struct ieee80211_sta_vht_cap *vht_cap = &sta->sta.vht_cap; + u32 cap_width; + + switch (cur_bw) { + case IEEE80211_STA_RX_BW_20: + if (!sta->sta.ht_cap.ht_supported) + return NL80211_CHAN_WIDTH_20_NOHT; + else + return NL80211_CHAN_WIDTH_20; + case IEEE80211_STA_RX_BW_40: + return NL80211_CHAN_WIDTH_40; + case IEEE80211_STA_RX_BW_80: + return NL80211_CHAN_WIDTH_80; + case IEEE80211_STA_RX_BW_160: + cap_width = + vht_cap->cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK; + + if (cap_width == IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ) + return NL80211_CHAN_WIDTH_160; + + return NL80211_CHAN_WIDTH_80P80; + default: + return NL80211_CHAN_WIDTH_20; + } +} + enum ieee80211_sta_rx_bandwidth ieee80211_chan_width_to_rx_bw(enum nl80211_chan_width width) { @@ -484,7 +514,7 @@ u32 __ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata, new_bw = ieee80211_sta_cur_vht_bw(sta); if (new_bw != sta->sta.bandwidth) { sta->sta.bandwidth = new_bw; - sta_opmode.bw = new_bw; + sta_opmode.bw = ieee80211_sta_rx_bw_to_chan_width(sta); changed |= IEEE80211_RC_BW_CHANGED; sta_opmode.changed |= STA_OPMODE_MAX_BW_CHANGED; } diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index e545a3c9365f..7a4de6d618b1 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -122,7 +122,7 @@ bool mpls_pkt_too_big(const struct sk_buff *skb, unsigned int mtu) if (skb->len <= mtu) return false; - if (skb_is_gso(skb) && skb_gso_validate_mtu(skb, mtu)) + if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu)) return false; return true; diff --git a/net/ncsi/ncsi-netlink.c b/net/ncsi/ncsi-netlink.c index d4201665a580..8d7e849d4825 100644 --- a/net/ncsi/ncsi-netlink.c +++ b/net/ncsi/ncsi-netlink.c @@ -183,13 +183,17 @@ static int ncsi_pkg_info_nl(struct sk_buff *msg, struct genl_info *info) hdr = genlmsg_put(skb, info->snd_portid, info->snd_seq, &ncsi_genl_family, 0, NCSI_CMD_PKG_INFO); if (!hdr) { - kfree(skb); + kfree_skb(skb); return -EMSGSIZE; } package_id = nla_get_u32(info->attrs[NCSI_ATTR_PACKAGE_ID]); attr = nla_nest_start(skb, NCSI_ATTR_PACKAGE_LIST); + if (!attr) { + kfree_skb(skb); + return -EMSGSIZE; + } rc = ncsi_write_package_info(skb, ndp, package_id); if (rc) { @@ -204,7 +208,7 @@ static int ncsi_pkg_info_nl(struct sk_buff *msg, struct genl_info *info) err: genlmsg_cancel(skb, hdr); - kfree(skb); + kfree_skb(skb); return rc; } @@ -299,6 +303,7 @@ static int ncsi_set_interface_nl(struct sk_buff *msg, struct genl_info *info) package = np; if (!package) { /* The user has set a package that does not exist */ + spin_unlock_irqrestore(&ndp->lock, flags); return -ERANGE; } @@ -317,6 +322,7 @@ static int ncsi_set_interface_nl(struct sk_buff *msg, struct genl_info *info) /* The user has set a channel that does not exist on this * package */ + spin_unlock_irqrestore(&ndp->lock, flags); netdev_info(ndp->ndev.dev, "NCSI: Channel %u does not exist!\n", channel_id); return -ERANGE; diff --git a/net/netfilter/core.c b/net/netfilter/core.c index d72cc786c7b7..0f6b8172fb9a 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -629,7 +629,6 @@ static void __net_exit netfilter_net_exit(struct net *net) static struct pernet_operations netfilter_net_ops = { .init = netfilter_net_init, .exit = netfilter_net_exit, - .async = true, }; int __init netfilter_init(void) diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 2523ebe2b3cc..bc4bd247bb7d 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -2095,7 +2095,6 @@ static struct pernet_operations ip_set_net_ops = { .exit = ip_set_net_exit, .id = &ip_set_net_id, .size = sizeof(struct ip_set_net), - .async = true, }; static int __init diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c index 3e17d32b629d..58d5d05aec24 100644 --- a/net/netfilter/ipvs/ip_vs_ftp.c +++ b/net/netfilter/ipvs/ip_vs_ftp.c @@ -260,7 +260,7 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp, buf_len = strlen(buf); ct = nf_ct_get(skb, &ctinfo); - if (ct && (ct->status & IPS_NAT_MASK)) { + if (ct) { bool mangled; /* If mangling fails this function will return 0 diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c index 942e835caf7f..3057e453bf31 100644 --- a/net/netfilter/ipvs/ip_vs_lblc.c +++ b/net/netfilter/ipvs/ip_vs_lblc.c @@ -604,7 +604,6 @@ static void __net_exit __ip_vs_lblc_exit(struct net *net) { } static struct pernet_operations ip_vs_lblc_ops = { .init = __ip_vs_lblc_init, .exit = __ip_vs_lblc_exit, - .async = true, }; static int __init ip_vs_lblc_init(void) diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c index a5acab25c36b..92adc04557ed 100644 --- a/net/netfilter/ipvs/ip_vs_lblcr.c +++ b/net/netfilter/ipvs/ip_vs_lblcr.c @@ -789,7 +789,6 @@ static void __net_exit __ip_vs_lblcr_exit(struct net *net) { } static struct pernet_operations ip_vs_lblcr_ops = { .init = __ip_vs_lblcr_init, .exit = __ip_vs_lblcr_exit, - .async = true, }; static int __init ip_vs_lblcr_init(void) diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 705198de671d..41ff04ee2554 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1763,14 +1763,14 @@ nf_ct_iterate_destroy(int (*iter)(struct nf_conn *i, void *data), void *data) { struct net *net; - rtnl_lock(); + down_read(&net_rwsem); for_each_net(net) { if (atomic_read(&net->ct.count) == 0) continue; __nf_ct_unconfirmed_destroy(net); nf_queue_nf_hook_drop(net); } - rtnl_unlock(); + up_read(&net_rwsem); /* Need to wait for netns cleanup worker to finish, if its * running -- it might have deleted a net namespace from diff --git a/net/netfilter/nf_conntrack_netbios_ns.c b/net/netfilter/nf_conntrack_netbios_ns.c index a4a59dc7cf17..bac5848f1c8e 100644 --- a/net/netfilter/nf_conntrack_netbios_ns.c +++ b/net/netfilter/nf_conntrack_netbios_ns.c @@ -33,7 +33,7 @@ MODULE_ALIAS("ip_conntrack_netbios_ns"); MODULE_ALIAS_NFCT_HELPER("netbios_ns"); static unsigned int timeout __read_mostly = 3; -module_param(timeout, uint, S_IRUSR); +module_param(timeout, uint, 0400); MODULE_PARM_DESC(timeout, "timeout for master connection/replies in seconds"); static struct nf_conntrack_expect_policy exp_policy = { diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index b00e84bf4107..4c1d0c5bc268 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -3497,7 +3497,6 @@ static void __net_exit ctnetlink_net_exit_batch(struct list_head *net_exit_list) static struct pernet_operations ctnetlink_net_ops = { .init = ctnetlink_net_init, .exit_batch = ctnetlink_net_exit_batch, - .async = true, }; static int __init ctnetlink_init(void) diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c index 9bcd72fe91f9..d049ea5a3770 100644 --- a/net/netfilter/nf_conntrack_proto_gre.c +++ b/net/netfilter/nf_conntrack_proto_gre.c @@ -406,7 +406,6 @@ static struct pernet_operations proto_gre_net_ops = { .exit = proto_gre_net_exit, .id = &proto_gre_net_id, .size = sizeof(struct netns_proto_gre), - .async = true, }; static int __init nf_ct_proto_gre_init(void) diff --git a/net/netfilter/nf_conntrack_snmp.c b/net/netfilter/nf_conntrack_snmp.c index 2d0f8e010821..b8e0a22ca1a9 100644 --- a/net/netfilter/nf_conntrack_snmp.c +++ b/net/netfilter/nf_conntrack_snmp.c @@ -26,7 +26,7 @@ MODULE_LICENSE("GPL"); MODULE_ALIAS_NFCT_HELPER("snmp"); static unsigned int timeout __read_mostly = 30; -module_param(timeout, uint, S_IRUSR); +module_param(timeout, uint, 0400); MODULE_PARM_DESC(timeout, "timeout for master connection/replies in seconds"); int (*nf_nat_snmp_hook)(struct sk_buff *skb, diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 3cdce391362e..037fec54c850 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -495,7 +495,7 @@ static int nf_conntrack_standalone_init_proc(struct net *net) if (uid_valid(root_uid) && gid_valid(root_gid)) proc_set_user(pde, root_uid, root_gid); - pde = proc_create("nf_conntrack", S_IRUGO, net->proc_net_stat, + pde = proc_create("nf_conntrack", 0444, net->proc_net_stat, &ct_cpu_seq_fops); if (!pde) goto out_stat_nf_conntrack; @@ -705,7 +705,6 @@ static void nf_conntrack_pernet_exit(struct list_head *net_exit_list) static struct pernet_operations nf_conntrack_net_ops = { .init = nf_conntrack_pernet_init, .exit_batch = nf_conntrack_pernet_exit, - .async = true, }; static int __init nf_conntrack_standalone_init(void) diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index 1ba3da51050d..6d0357817cda 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -549,7 +549,7 @@ static int __net_init nf_log_net_init(struct net *net) int ret = -ENOMEM; #ifdef CONFIG_PROC_FS - if (!proc_create("nf_log", S_IRUGO, + if (!proc_create("nf_log", 0444, net->nf.proc_netfilter, &nflog_file_ops)) return ret; #endif @@ -577,7 +577,6 @@ static void __net_exit nf_log_net_exit(struct net *net) static struct pernet_operations nf_log_net_ops = { .init = nf_log_net_init, .exit = nf_log_net_exit, - .async = true, }; int __init netfilter_log_init(void) diff --git a/net/netfilter/nf_log_netdev.c b/net/netfilter/nf_log_netdev.c index 254c2c6bde48..350eb147754d 100644 --- a/net/netfilter/nf_log_netdev.c +++ b/net/netfilter/nf_log_netdev.c @@ -47,7 +47,6 @@ static void __net_exit nf_log_netdev_net_exit(struct net *net) static struct pernet_operations nf_log_netdev_net_ops = { .init = nf_log_netdev_net_init, .exit = nf_log_netdev_net_exit, - .async = true, }; static int __init nf_log_netdev_init(void) diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c index 64b875e452ca..6039b350abbe 100644 --- a/net/netfilter/nf_synproxy_core.c +++ b/net/netfilter/nf_synproxy_core.c @@ -325,7 +325,7 @@ static const struct file_operations synproxy_cpu_seq_fops = { static int __net_init synproxy_proc_init(struct net *net) { - if (!proc_create("synproxy", S_IRUGO, net->proc_net_stat, + if (!proc_create("synproxy", 0444, net->proc_net_stat, &synproxy_cpu_seq_fops)) return -ENOMEM; return 0; @@ -398,7 +398,6 @@ static struct pernet_operations synproxy_net_ops = { .exit = synproxy_net_exit, .id = &synproxy_net_id, .size = sizeof(struct synproxy_net), - .async = true, }; static int __init synproxy_core_init(void) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 2bd80fa9b070..6e93782bbe4f 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -5041,9 +5041,9 @@ static int nf_tables_newflowtable(struct net *net, struct sock *nlsk, { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); const struct nf_flowtable_type *type; + struct nft_flowtable *flowtable, *ft; u8 genmask = nft_genmask_next(net); int family = nfmsg->nfgen_family; - struct nft_flowtable *flowtable; struct nft_table *table; struct nft_ctx ctx; int err, i, k; @@ -5103,6 +5103,22 @@ static int nf_tables_newflowtable(struct net *net, struct sock *nlsk, goto err3; for (i = 0; i < flowtable->ops_len; i++) { + if (!flowtable->ops[i].dev) + continue; + + list_for_each_entry(ft, &table->flowtables, list) { + for (k = 0; k < ft->ops_len; k++) { + if (!ft->ops[k].dev) + continue; + + if (flowtable->ops[i].dev == ft->ops[k].dev && + flowtable->ops[i].pf == ft->ops[k].pf) { + err = -EBUSY; + goto err4; + } + } + } + err = nf_register_net_hook(net, &flowtable->ops[i]); if (err < 0) goto err4; @@ -5124,7 +5140,7 @@ err5: i = flowtable->ops_len; err4: for (k = i - 1; k >= 0; k--) - nf_unregister_net_hook(net, &flowtable->ops[i]); + nf_unregister_net_hook(net, &flowtable->ops[k]); kfree(flowtable->ops); err3: @@ -5149,6 +5165,11 @@ static int nf_tables_delflowtable(struct net *net, struct sock *nlsk, struct nft_table *table; struct nft_ctx ctx; + if (!nla[NFTA_FLOWTABLE_TABLE] || + (!nla[NFTA_FLOWTABLE_NAME] && + !nla[NFTA_FLOWTABLE_HANDLE])) + return -EINVAL; + table = nf_tables_table_lookup(net, nla[NFTA_FLOWTABLE_TABLE], family, genmask); if (IS_ERR(table)) @@ -5406,6 +5427,7 @@ err: static void nf_tables_flowtable_destroy(struct nft_flowtable *flowtable) { cancel_delayed_work_sync(&flowtable->data.gc_work); + kfree(flowtable->ops); kfree(flowtable->name); flowtable->data.type->free(&flowtable->data); rhashtable_destroy(&flowtable->data.rhashtable); diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c index 3f1624ee056f..d40591fe1b2f 100644 --- a/net/netfilter/nft_set_hash.c +++ b/net/netfilter/nft_set_hash.c @@ -674,7 +674,7 @@ static const struct nft_set_ops * nft_hash_select_ops(const struct nft_ctx *ctx, const struct nft_set_desc *desc, u32 flags) { - if (desc->size) { + if (desc->size && !(flags & NFT_SET_TIMEOUT)) { switch (desc->klen) { case 4: return &nft_hash_fast_ops; diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 75cd5196b29b..71325fef647d 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -424,6 +424,36 @@ textify_hooks(char *buf, size_t size, unsigned int mask, uint8_t nfproto) return buf; } +/** + * xt_check_proc_name - check that name is suitable for /proc file creation + * + * @name: file name candidate + * @size: length of buffer + * + * some x_tables modules wish to create a file in /proc. + * This function makes sure that the name is suitable for this + * purpose, it checks that name is NUL terminated and isn't a 'special' + * name, like "..". + * + * returns negative number on error or 0 if name is useable. + */ +int xt_check_proc_name(const char *name, unsigned int size) +{ + if (name[0] == '\0') + return -EINVAL; + + if (strnlen(name, size) == size) + return -ENAMETOOLONG; + + if (strcmp(name, ".") == 0 || + strcmp(name, "..") == 0 || + strchr(name, '/')) + return -EINVAL; + + return 0; +} +EXPORT_SYMBOL(xt_check_proc_name); + int xt_check_match(struct xt_mtchk_param *par, unsigned int size, u_int8_t proto, bool inv_proto) { @@ -1916,7 +1946,6 @@ static void __net_exit xt_net_exit(struct net *net) static struct pernet_operations xt_net_ops = { .init = xt_net_init, .exit = xt_net_exit, - .async = true, }; static int __init xt_init(void) diff --git a/net/netfilter/xt_IDLETIMER.c b/net/netfilter/xt_IDLETIMER.c index 1ac6600bfafd..5ee859193783 100644 --- a/net/netfilter/xt_IDLETIMER.c +++ b/net/netfilter/xt_IDLETIMER.c @@ -132,7 +132,7 @@ static int idletimer_tg_create(struct idletimer_tg_info *info) ret = -ENOMEM; goto out_free_timer; } - info->timer->attr.attr.mode = S_IRUGO; + info->timer->attr.attr.mode = 0444; info->timer->attr.show = idletimer_tg_show; ret = sysfs_create_file(idletimer_tg_kobj, &info->timer->attr.attr); diff --git a/net/netfilter/xt_TEE.c b/net/netfilter/xt_TEE.c index 86b0580b2216..475957cfcf50 100644 --- a/net/netfilter/xt_TEE.c +++ b/net/netfilter/xt_TEE.c @@ -20,7 +20,7 @@ #include <linux/netfilter/xt_TEE.h> struct xt_tee_priv { - struct notifier_block notifier; + struct list_head list; struct xt_tee_tginfo *tginfo; int oif; }; @@ -51,29 +51,35 @@ tee_tg6(struct sk_buff *skb, const struct xt_action_param *par) } #endif +static DEFINE_MUTEX(priv_list_mutex); +static LIST_HEAD(priv_list); + static int tee_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct xt_tee_priv *priv; - priv = container_of(this, struct xt_tee_priv, notifier); - switch (event) { - case NETDEV_REGISTER: - if (!strcmp(dev->name, priv->tginfo->oif)) - priv->oif = dev->ifindex; - break; - case NETDEV_UNREGISTER: - if (dev->ifindex == priv->oif) - priv->oif = -1; - break; - case NETDEV_CHANGENAME: - if (!strcmp(dev->name, priv->tginfo->oif)) - priv->oif = dev->ifindex; - else if (dev->ifindex == priv->oif) - priv->oif = -1; - break; + mutex_lock(&priv_list_mutex); + list_for_each_entry(priv, &priv_list, list) { + switch (event) { + case NETDEV_REGISTER: + if (!strcmp(dev->name, priv->tginfo->oif)) + priv->oif = dev->ifindex; + break; + case NETDEV_UNREGISTER: + if (dev->ifindex == priv->oif) + priv->oif = -1; + break; + case NETDEV_CHANGENAME: + if (!strcmp(dev->name, priv->tginfo->oif)) + priv->oif = dev->ifindex; + else if (dev->ifindex == priv->oif) + priv->oif = -1; + break; + } } + mutex_unlock(&priv_list_mutex); return NOTIFY_DONE; } @@ -89,8 +95,6 @@ static int tee_tg_check(const struct xt_tgchk_param *par) return -EINVAL; if (info->oif[0]) { - int ret; - if (info->oif[sizeof(info->oif)-1] != '\0') return -EINVAL; @@ -100,14 +104,11 @@ static int tee_tg_check(const struct xt_tgchk_param *par) priv->tginfo = info; priv->oif = -1; - priv->notifier.notifier_call = tee_netdev_event; info->priv = priv; - ret = register_netdevice_notifier(&priv->notifier); - if (ret) { - kfree(priv); - return ret; - } + mutex_lock(&priv_list_mutex); + list_add(&priv->list, &priv_list); + mutex_unlock(&priv_list_mutex); } else info->priv = NULL; @@ -120,7 +121,9 @@ static void tee_tg_destroy(const struct xt_tgdtor_param *par) struct xt_tee_tginfo *info = par->targinfo; if (info->priv) { - unregister_netdevice_notifier(&info->priv->notifier); + mutex_lock(&priv_list_mutex); + list_del(&info->priv->list); + mutex_unlock(&priv_list_mutex); kfree(info->priv); } static_key_slow_dec(&xt_tee_enabled); @@ -153,13 +156,29 @@ static struct xt_target tee_tg_reg[] __read_mostly = { #endif }; +static struct notifier_block tee_netdev_notifier = { + .notifier_call = tee_netdev_event, +}; + static int __init tee_tg_init(void) { - return xt_register_targets(tee_tg_reg, ARRAY_SIZE(tee_tg_reg)); + int ret; + + ret = xt_register_targets(tee_tg_reg, ARRAY_SIZE(tee_tg_reg)); + if (ret) + return ret; + ret = register_netdevice_notifier(&tee_netdev_notifier); + if (ret) { + xt_unregister_targets(tee_tg_reg, ARRAY_SIZE(tee_tg_reg)); + return ret; + } + + return 0; } static void __exit tee_tg_exit(void) { + unregister_netdevice_notifier(&tee_netdev_notifier); xt_unregister_targets(tee_tg_reg, ARRAY_SIZE(tee_tg_reg)); } diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index 64fc3721d74c..0cd73567e7ff 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -916,8 +916,9 @@ static int hashlimit_mt_check_v1(const struct xt_mtchk_param *par) struct hashlimit_cfg3 cfg = {}; int ret; - if (info->name[sizeof(info->name) - 1] != '\0') - return -EINVAL; + ret = xt_check_proc_name(info->name, sizeof(info->name)); + if (ret) + return ret; ret = cfg_copy(&cfg, (void *)&info->cfg, 1); @@ -934,8 +935,9 @@ static int hashlimit_mt_check_v2(const struct xt_mtchk_param *par) struct hashlimit_cfg3 cfg = {}; int ret; - if (info->name[sizeof(info->name) - 1] != '\0') - return -EINVAL; + ret = xt_check_proc_name(info->name, sizeof(info->name)); + if (ret) + return ret; ret = cfg_copy(&cfg, (void *)&info->cfg, 2); @@ -949,9 +951,11 @@ static int hashlimit_mt_check_v2(const struct xt_mtchk_param *par) static int hashlimit_mt_check(const struct xt_mtchk_param *par) { struct xt_hashlimit_mtinfo3 *info = par->matchinfo; + int ret; - if (info->name[sizeof(info->name) - 1] != '\0') - return -EINVAL; + ret = xt_check_proc_name(info->name, sizeof(info->name)); + if (ret) + return ret; return hashlimit_mt_check_common(par, &info->hinfo, &info->cfg, info->name, 3); @@ -1344,7 +1348,6 @@ static struct pernet_operations hashlimit_net_ops = { .exit = hashlimit_net_exit, .id = &hashlimit_net_id, .size = sizeof(struct hashlimit_net), - .async = true, }; static int __init hashlimit_mt_init(void) diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c index 19efdb757944..9bbfc17ce3ec 100644 --- a/net/netfilter/xt_recent.c +++ b/net/netfilter/xt_recent.c @@ -51,8 +51,8 @@ static unsigned int ip_list_gid __read_mostly; module_param(ip_list_tot, uint, 0400); module_param(ip_list_hash_size, uint, 0400); module_param(ip_list_perms, uint, 0400); -module_param(ip_list_uid, uint, S_IRUGO | S_IWUSR); -module_param(ip_list_gid, uint, S_IRUGO | S_IWUSR); +module_param(ip_list_uid, uint, 0644); +module_param(ip_list_gid, uint, 0644); MODULE_PARM_DESC(ip_list_tot, "number of IPs to remember per list"); MODULE_PARM_DESC(ip_list_hash_size, "size of hash table used to look up IPs"); MODULE_PARM_DESC(ip_list_perms, "permissions on /proc/net/xt_recent/* files"); @@ -361,9 +361,9 @@ static int recent_mt_check(const struct xt_mtchk_param *par, info->hit_count, XT_RECENT_MAX_NSTAMPS - 1); return -EINVAL; } - if (info->name[0] == '\0' || - strnlen(info->name, XT_RECENT_NAME_LEN) == XT_RECENT_NAME_LEN) - return -EINVAL; + ret = xt_check_proc_name(info->name, sizeof(info->name)); + if (ret) + return ret; if (ip_pkt_list_tot && info->hit_count < ip_pkt_list_tot) nstamp_mask = roundup_pow_of_two(ip_pkt_list_tot) - 1; @@ -687,7 +687,6 @@ static struct pernet_operations recent_net_ops = { .exit = recent_net_exit, .id = &recent_net_id, .size = sizeof(struct recent_net), - .async = true, }; static struct xt_match recent_mt_reg[] __read_mostly = { diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 5d10dcfe6411..f1b02d87e336 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -253,7 +253,6 @@ static struct pernet_operations netlink_tap_net_ops = { .exit = netlink_tap_exit_net, .id = &netlink_tap_net_id, .size = sizeof(struct netlink_tap_net), - .async = true, }; static bool netlink_filter_tap(const struct sk_buff *skb) @@ -2726,7 +2725,6 @@ static void __init netlink_add_usersock_entry(void) static struct pernet_operations __net_initdata netlink_net_ops = { .init = netlink_net_init, .exit = netlink_net_exit, - .async = true, }; static inline u32 netlink_hash(const void *data, u32 len, u32 seed) diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index a6f63a5faee7..b9ce82c9440f 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -1035,7 +1035,6 @@ static void __net_exit genl_pernet_exit(struct net *net) static struct pernet_operations genl_pernet_ops = { .init = genl_pernet_init, .exit = genl_pernet_exit, - .async = true, }; static int __init genl_init(void) @@ -1107,7 +1106,7 @@ static int genlmsg_mcast(struct sk_buff *skb, u32 portid, unsigned long group, if (!err) delivered = true; else if (err != -ESRCH) - goto error; + return err; return delivered ? 0 : -ESRCH; error: kfree_skb(skb); diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 35bb6807927f..4221d98a314b 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -1450,9 +1450,9 @@ static int __init nr_proto_init(void) nr_loopback_init(); - proc_create("nr", S_IRUGO, init_net.proc_net, &nr_info_fops); - proc_create("nr_neigh", S_IRUGO, init_net.proc_net, &nr_neigh_fops); - proc_create("nr_nodes", S_IRUGO, init_net.proc_net, &nr_nodes_fops); + proc_create("nr", 0444, init_net.proc_net, &nr_info_fops); + proc_create("nr_neigh", 0444, init_net.proc_net, &nr_neigh_fops); + proc_create("nr_nodes", 0444, init_net.proc_net, &nr_nodes_fops); out: return rc; fail: diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index ef38e5aecd28..015e24e08909 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -2363,10 +2363,10 @@ static void __net_exit ovs_exit_net(struct net *dnet) list_for_each_entry_safe(dp, dp_next, &ovs_net->dps, list_node) __dp_destroy(dp); - rtnl_lock(); + down_read(&net_rwsem); for_each_net(net) list_vports_from_net(net, dnet, &head); - rtnl_unlock(); + up_read(&net_rwsem); /* Detach all vports from given namespace. */ list_for_each_entry_safe(vport, vport_next, &head, detach_list) { diff --git a/net/openvswitch/meter.c b/net/openvswitch/meter.c index 04b94281a30b..b891a91577f8 100644 --- a/net/openvswitch/meter.c +++ b/net/openvswitch/meter.c @@ -242,14 +242,20 @@ static struct dp_meter *dp_meter_create(struct nlattr **a) band->type = nla_get_u32(attr[OVS_BAND_ATTR_TYPE]); band->rate = nla_get_u32(attr[OVS_BAND_ATTR_RATE]); + if (band->rate == 0) { + err = -EINVAL; + goto exit_free_meter; + } + band->burst_size = nla_get_u32(attr[OVS_BAND_ATTR_BURST]); /* Figure out max delta_t that is enough to fill any bucket. * Keep max_delta_t size to the bucket units: * pkts => 1/1000 packets, kilobits => bits. + * + * Start with a full bucket. */ - band_max_delta_t = (band->burst_size + band->rate) * 1000; - /* Start with a full bucket. */ - band->bucket = band_max_delta_t; + band->bucket = (band->burst_size + band->rate) * 1000; + band_max_delta_t = band->bucket / band->rate; if (band_max_delta_t > meter->max_delta_t) meter->max_delta_t = band_max_delta_t; band++; diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c index b6c8524032a0..f81c1d0ddff4 100644 --- a/net/openvswitch/vport.c +++ b/net/openvswitch/vport.c @@ -464,10 +464,10 @@ int ovs_vport_receive(struct vport *vport, struct sk_buff *skb, return 0; } -static unsigned int packet_length(const struct sk_buff *skb, - struct net_device *dev) +static int packet_length(const struct sk_buff *skb, + struct net_device *dev) { - unsigned int length = skb->len - dev->hard_header_len; + int length = skb->len - dev->hard_header_len; if (!skb_vlan_tag_present(skb) && eth_type_vlan(skb->protocol)) @@ -478,7 +478,7 @@ static unsigned int packet_length(const struct sk_buff *skb, * account for 802.1ad. e.g. is_skb_forwardable(). */ - return length; + return length > 0 ? length : 0; } void ovs_vport_send(struct vport *vport, struct sk_buff *skb, u8 mac_proto) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 2c5a6fe5d749..616cb9c18f88 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -4557,7 +4557,6 @@ static void __net_exit packet_net_exit(struct net *net) static struct pernet_operations packet_net_ops = { .init = packet_net_init, .exit = packet_net_exit, - .async = true, }; diff --git a/net/phonet/pn_dev.c b/net/phonet/pn_dev.c index 9454e8393793..77787512fc32 100644 --- a/net/phonet/pn_dev.c +++ b/net/phonet/pn_dev.c @@ -342,7 +342,6 @@ static struct pernet_operations phonet_net_ops = { .exit = phonet_exit_net, .id = &phonet_net_id, .size = sizeof(struct phonet_net), - .async = true, }; /* Initialize Phonet devices list */ diff --git a/net/qrtr/smd.c b/net/qrtr/smd.c index 50615d5efac1..9cf089b9754e 100644 --- a/net/qrtr/smd.c +++ b/net/qrtr/smd.c @@ -114,5 +114,6 @@ static struct rpmsg_driver qcom_smd_qrtr_driver = { module_rpmsg_driver(qcom_smd_qrtr_driver); +MODULE_ALIAS("rpmsg:IPCRTR"); MODULE_DESCRIPTION("Qualcomm IPC-Router SMD interface driver"); MODULE_LICENSE("GPL v2"); diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c index f7126108a811..ab751a150f70 100644 --- a/net/rds/af_rds.c +++ b/net/rds/af_rds.c @@ -77,7 +77,7 @@ static int rds_release(struct socket *sock) rds_send_drop_to(rs, NULL); rds_rdma_drop_keys(rs); rds_notify_queue_get(rs, NULL); - __skb_queue_purge(&rs->rs_zcookie_queue); + rds_notify_msg_zcopy_purge(&rs->rs_zcookie_queue); spin_lock_bh(&rds_sock_lock); list_del_init(&rs->rs_item); @@ -180,7 +180,7 @@ static __poll_t rds_poll(struct file *file, struct socket *sock, } if (!list_empty(&rs->rs_recv_queue) || !list_empty(&rs->rs_notify_queue) || - !skb_queue_empty(&rs->rs_zcookie_queue)) + !list_empty(&rs->rs_zcookie_queue.zcookie_head)) mask |= (EPOLLIN | EPOLLRDNORM); if (rs->rs_snd_bytes < rds_sk_sndbuf(rs)) mask |= (EPOLLOUT | EPOLLWRNORM); @@ -515,7 +515,7 @@ static int __rds_create(struct socket *sock, struct sock *sk, int protocol) INIT_LIST_HEAD(&rs->rs_recv_queue); INIT_LIST_HEAD(&rs->rs_notify_queue); INIT_LIST_HEAD(&rs->rs_cong_list); - skb_queue_head_init(&rs->rs_zcookie_queue); + rds_message_zcopy_queue_init(&rs->rs_zcookie_queue); spin_lock_init(&rs->rs_rdma_lock); rs->rs_rdma_keys = RB_ROOT; rs->rs_rx_traces = 0; diff --git a/net/rds/connection.c b/net/rds/connection.c index 2da3176bf792..abef75da89a7 100644 --- a/net/rds/connection.c +++ b/net/rds/connection.c @@ -540,9 +540,9 @@ void rds_for_each_conn_info(struct socket *sock, unsigned int len, struct rds_info_iterator *iter, struct rds_info_lengths *lens, int (*visitor)(struct rds_connection *, void *), + u64 *buffer, size_t item_len) { - uint64_t buffer[(item_len + 7) / 8]; struct hlist_head *head; struct rds_connection *conn; size_t i; @@ -578,9 +578,9 @@ static void rds_walk_conn_path_info(struct socket *sock, unsigned int len, struct rds_info_iterator *iter, struct rds_info_lengths *lens, int (*visitor)(struct rds_conn_path *, void *), + u64 *buffer, size_t item_len) { - u64 buffer[(item_len + 7) / 8]; struct hlist_head *head; struct rds_connection *conn; size_t i; @@ -649,8 +649,11 @@ static void rds_conn_info(struct socket *sock, unsigned int len, struct rds_info_iterator *iter, struct rds_info_lengths *lens) { + u64 buffer[(sizeof(struct rds_info_connection) + 7) / 8]; + rds_walk_conn_path_info(sock, len, iter, lens, rds_conn_info_visitor, + buffer, sizeof(struct rds_info_connection)); } diff --git a/net/rds/ib.c b/net/rds/ib.c index 50a88f3e7e39..02deee29e7f1 100644 --- a/net/rds/ib.c +++ b/net/rds/ib.c @@ -321,8 +321,11 @@ static void rds_ib_ic_info(struct socket *sock, unsigned int len, struct rds_info_iterator *iter, struct rds_info_lengths *lens) { + u64 buffer[(sizeof(struct rds_info_rdma_connection) + 7) / 8]; + rds_for_each_conn_info(sock, len, iter, lens, rds_ib_conn_info_visitor, + buffer, sizeof(struct rds_info_rdma_connection)); } diff --git a/net/rds/message.c b/net/rds/message.c index 116cf87ccb89..a35f76971984 100644 --- a/net/rds/message.c +++ b/net/rds/message.c @@ -48,7 +48,6 @@ static unsigned int rds_exthdr_size[__RDS_EXTHDR_MAX] = { [RDS_EXTHDR_GEN_NUM] = sizeof(u32), }; - void rds_message_addref(struct rds_message *rm) { rdsdebug("addref rm %p ref %d\n", rm, refcount_read(&rm->m_refcount)); @@ -56,9 +55,9 @@ void rds_message_addref(struct rds_message *rm) } EXPORT_SYMBOL_GPL(rds_message_addref); -static inline bool skb_zcookie_add(struct sk_buff *skb, u32 cookie) +static inline bool rds_zcookie_add(struct rds_msg_zcopy_info *info, u32 cookie) { - struct rds_zcopy_cookies *ck = (struct rds_zcopy_cookies *)skb->cb; + struct rds_zcopy_cookies *ck = &info->zcookies; int ncookies = ck->num; if (ncookies == RDS_MAX_ZCOOKIES) @@ -68,38 +67,61 @@ static inline bool skb_zcookie_add(struct sk_buff *skb, u32 cookie) return true; } +static struct rds_msg_zcopy_info *rds_info_from_znotifier(struct rds_znotifier *znotif) +{ + return container_of(znotif, struct rds_msg_zcopy_info, znotif); +} + +void rds_notify_msg_zcopy_purge(struct rds_msg_zcopy_queue *q) +{ + unsigned long flags; + LIST_HEAD(copy); + struct rds_msg_zcopy_info *info, *tmp; + + spin_lock_irqsave(&q->lock, flags); + list_splice(&q->zcookie_head, ©); + INIT_LIST_HEAD(&q->zcookie_head); + spin_unlock_irqrestore(&q->lock, flags); + + list_for_each_entry_safe(info, tmp, ©, rs_zcookie_next) { + list_del(&info->rs_zcookie_next); + kfree(info); + } +} + static void rds_rm_zerocopy_callback(struct rds_sock *rs, struct rds_znotifier *znotif) { - struct sk_buff *skb, *tail; - unsigned long flags; - struct sk_buff_head *q; + struct rds_msg_zcopy_info *info; + struct rds_msg_zcopy_queue *q; u32 cookie = znotif->z_cookie; struct rds_zcopy_cookies *ck; + struct list_head *head; + unsigned long flags; + mm_unaccount_pinned_pages(&znotif->z_mmp); q = &rs->rs_zcookie_queue; spin_lock_irqsave(&q->lock, flags); - tail = skb_peek_tail(q); - - if (tail && skb_zcookie_add(tail, cookie)) { - spin_unlock_irqrestore(&q->lock, flags); - mm_unaccount_pinned_pages(&znotif->z_mmp); - consume_skb(rds_skb_from_znotifier(znotif)); - /* caller invokes rds_wake_sk_sleep() */ - return; + head = &q->zcookie_head; + if (!list_empty(head)) { + info = list_entry(head, struct rds_msg_zcopy_info, + rs_zcookie_next); + if (info && rds_zcookie_add(info, cookie)) { + spin_unlock_irqrestore(&q->lock, flags); + kfree(rds_info_from_znotifier(znotif)); + /* caller invokes rds_wake_sk_sleep() */ + return; + } } - skb = rds_skb_from_znotifier(znotif); - ck = (struct rds_zcopy_cookies *)skb->cb; + info = rds_info_from_znotifier(znotif); + ck = &info->zcookies; memset(ck, 0, sizeof(*ck)); - WARN_ON(!skb_zcookie_add(skb, cookie)); - - __skb_queue_tail(q, skb); + WARN_ON(!rds_zcookie_add(info, cookie)); + list_add_tail(&q->zcookie_head, &info->rs_zcookie_next); spin_unlock_irqrestore(&q->lock, flags); /* caller invokes rds_wake_sk_sleep() */ - - mm_unaccount_pinned_pages(&znotif->z_mmp); } /* @@ -333,14 +355,13 @@ struct rds_message *rds_message_map_pages(unsigned long *page_addrs, unsigned in return rm; } -int rds_message_copy_from_user(struct rds_message *rm, struct iov_iter *from, - bool zcopy) +static int rds_message_zcopy_from_user(struct rds_message *rm, struct iov_iter *from) { - unsigned long to_copy, nbytes; - unsigned long sg_off; struct scatterlist *sg; int ret = 0; int length = iov_iter_count(from); + int total_copied = 0; + struct rds_msg_zcopy_info *info; rm->m_inc.i_hdr.h_len = cpu_to_be32(iov_iter_count(from)); @@ -348,56 +369,66 @@ int rds_message_copy_from_user(struct rds_message *rm, struct iov_iter *from, * now allocate and copy in the data payload. */ sg = rm->data.op_sg; - sg_off = 0; /* Dear gcc, sg->page will be null from kzalloc. */ - if (zcopy) { - int total_copied = 0; - struct sk_buff *skb; - - skb = alloc_skb(0, GFP_KERNEL); - if (!skb) - return -ENOMEM; - BUILD_BUG_ON(sizeof(skb->cb) < - max_t(int, sizeof(struct rds_znotifier), - sizeof(struct rds_zcopy_cookies))); - rm->data.op_mmp_znotifier = RDS_ZCOPY_SKB(skb); - if (mm_account_pinned_pages(&rm->data.op_mmp_znotifier->z_mmp, - length)) { - ret = -ENOMEM; + info = kzalloc(sizeof(*info), GFP_KERNEL); + if (!info) + return -ENOMEM; + INIT_LIST_HEAD(&info->rs_zcookie_next); + rm->data.op_mmp_znotifier = &info->znotif; + if (mm_account_pinned_pages(&rm->data.op_mmp_znotifier->z_mmp, + length)) { + ret = -ENOMEM; + goto err; + } + while (iov_iter_count(from)) { + struct page *pages; + size_t start; + ssize_t copied; + + copied = iov_iter_get_pages(from, &pages, PAGE_SIZE, + 1, &start); + if (copied < 0) { + struct mmpin *mmp; + int i; + + for (i = 0; i < rm->data.op_nents; i++) + put_page(sg_page(&rm->data.op_sg[i])); + mmp = &rm->data.op_mmp_znotifier->z_mmp; + mm_unaccount_pinned_pages(mmp); + ret = -EFAULT; goto err; } - while (iov_iter_count(from)) { - struct page *pages; - size_t start; - ssize_t copied; - - copied = iov_iter_get_pages(from, &pages, PAGE_SIZE, - 1, &start); - if (copied < 0) { - struct mmpin *mmp; - int i; - - for (i = 0; i < rm->data.op_nents; i++) - put_page(sg_page(&rm->data.op_sg[i])); - mmp = &rm->data.op_mmp_znotifier->z_mmp; - mm_unaccount_pinned_pages(mmp); - ret = -EFAULT; - goto err; - } - total_copied += copied; - iov_iter_advance(from, copied); - length -= copied; - sg_set_page(sg, pages, copied, start); - rm->data.op_nents++; - sg++; - } - WARN_ON_ONCE(length != 0); - return ret; + total_copied += copied; + iov_iter_advance(from, copied); + length -= copied; + sg_set_page(sg, pages, copied, start); + rm->data.op_nents++; + sg++; + } + WARN_ON_ONCE(length != 0); + return ret; err: - consume_skb(skb); - rm->data.op_mmp_znotifier = NULL; - return ret; - } /* zcopy */ + kfree(info); + rm->data.op_mmp_znotifier = NULL; + return ret; +} + +int rds_message_copy_from_user(struct rds_message *rm, struct iov_iter *from, + bool zcopy) +{ + unsigned long to_copy, nbytes; + unsigned long sg_off; + struct scatterlist *sg; + int ret = 0; + + rm->m_inc.i_hdr.h_len = cpu_to_be32(iov_iter_count(from)); + + /* now allocate and copy in the data payload. */ + sg = rm->data.op_sg; + sg_off = 0; /* Dear gcc, sg->page will be null from kzalloc. */ + + if (zcopy) + return rds_message_zcopy_from_user(rm, from); while (iov_iter_count(from)) { if (!sg_page(sg)) { diff --git a/net/rds/rds.h b/net/rds/rds.h index 33b16353d8f3..b04c333d9d1c 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -357,16 +357,27 @@ static inline u32 rds_rdma_cookie_offset(rds_rdma_cookie_t cookie) #define RDS_MSG_FLUSH 8 struct rds_znotifier { - struct list_head z_list; struct mmpin z_mmp; u32 z_cookie; }; -#define RDS_ZCOPY_SKB(__skb) ((struct rds_znotifier *)&((__skb)->cb[0])) +struct rds_msg_zcopy_info { + struct list_head rs_zcookie_next; + union { + struct rds_znotifier znotif; + struct rds_zcopy_cookies zcookies; + }; +}; -static inline struct sk_buff *rds_skb_from_znotifier(struct rds_znotifier *z) +struct rds_msg_zcopy_queue { + struct list_head zcookie_head; + spinlock_t lock; /* protects zcookie_head queue */ +}; + +static inline void rds_message_zcopy_queue_init(struct rds_msg_zcopy_queue *q) { - return container_of((void *)z, struct sk_buff, cb); + spin_lock_init(&q->lock); + INIT_LIST_HEAD(&q->zcookie_head); } struct rds_message { @@ -603,8 +614,7 @@ struct rds_sock { /* Socket receive path trace points*/ u8 rs_rx_traces; u8 rs_rx_trace[RDS_MSG_RX_DGRAM_TRACE_MAX]; - - struct sk_buff_head rs_zcookie_queue; + struct rds_msg_zcopy_queue rs_zcookie_queue; }; static inline struct rds_sock *rds_sk_to_rs(const struct sock *sk) @@ -725,6 +735,7 @@ void rds_for_each_conn_info(struct socket *sock, unsigned int len, struct rds_info_iterator *iter, struct rds_info_lengths *lens, int (*visitor)(struct rds_connection *, void *), + u64 *buffer, size_t item_len); __printf(2, 3) @@ -803,6 +814,7 @@ void rds_message_addref(struct rds_message *rm); void rds_message_put(struct rds_message *rm); void rds_message_wait(struct rds_message *rm); void rds_message_unmapped(struct rds_message *rm); +void rds_notify_msg_zcopy_purge(struct rds_msg_zcopy_queue *info); static inline void rds_message_make_checksum(struct rds_header *hdr) { diff --git a/net/rds/recv.c b/net/rds/recv.c index d50747725221..de50e2126e40 100644 --- a/net/rds/recv.c +++ b/net/rds/recv.c @@ -579,9 +579,10 @@ out: static bool rds_recvmsg_zcookie(struct rds_sock *rs, struct msghdr *msg) { - struct sk_buff *skb; - struct sk_buff_head *q = &rs->rs_zcookie_queue; + struct rds_msg_zcopy_queue *q = &rs->rs_zcookie_queue; + struct rds_msg_zcopy_info *info = NULL; struct rds_zcopy_cookies *done; + unsigned long flags; if (!msg->msg_control) return false; @@ -590,16 +591,24 @@ static bool rds_recvmsg_zcookie(struct rds_sock *rs, struct msghdr *msg) msg->msg_controllen < CMSG_SPACE(sizeof(*done))) return false; - skb = skb_dequeue(q); - if (!skb) + spin_lock_irqsave(&q->lock, flags); + if (!list_empty(&q->zcookie_head)) { + info = list_entry(q->zcookie_head.next, + struct rds_msg_zcopy_info, rs_zcookie_next); + list_del(&info->rs_zcookie_next); + } + spin_unlock_irqrestore(&q->lock, flags); + if (!info) return false; - done = (struct rds_zcopy_cookies *)skb->cb; + done = &info->zcookies; if (put_cmsg(msg, SOL_RDS, RDS_CMSG_ZCOPY_COMPLETION, sizeof(*done), done)) { - skb_queue_head(q, skb); + spin_lock_irqsave(&q->lock, flags); + list_add(&info->rs_zcookie_next, &q->zcookie_head); + spin_unlock_irqrestore(&q->lock, flags); return false; } - consume_skb(skb); + kfree(info); return true; } diff --git a/net/rds/tcp.c b/net/rds/tcp.c index 08230a145042..351a28474667 100644 --- a/net/rds/tcp.c +++ b/net/rds/tcp.c @@ -272,13 +272,14 @@ static int rds_tcp_laddr_check(struct net *net, __be32 addr) static void rds_tcp_conn_free(void *arg) { struct rds_tcp_connection *tc = arg; + unsigned long flags; rdsdebug("freeing tc %p\n", tc); - spin_lock_bh(&rds_tcp_conn_lock); + spin_lock_irqsave(&rds_tcp_conn_lock, flags); if (!tc->t_tcp_node_detached) list_del(&tc->t_tcp_node); - spin_unlock_bh(&rds_tcp_conn_lock); + spin_unlock_irqrestore(&rds_tcp_conn_lock, flags); kmem_cache_free(rds_tcp_conn_slab, tc); } @@ -308,13 +309,13 @@ static int rds_tcp_conn_alloc(struct rds_connection *conn, gfp_t gfp) rdsdebug("rds_conn_path [%d] tc %p\n", i, conn->c_path[i].cp_transport_data); } - spin_lock_bh(&rds_tcp_conn_lock); + spin_lock_irq(&rds_tcp_conn_lock); for (i = 0; i < RDS_MPATH_WORKERS; i++) { tc = conn->c_path[i].cp_transport_data; tc->t_tcp_node_detached = false; list_add_tail(&tc->t_tcp_node, &rds_tcp_conn_list); } - spin_unlock_bh(&rds_tcp_conn_lock); + spin_unlock_irq(&rds_tcp_conn_lock); fail: if (ret) { for (j = 0; j < i; j++) @@ -484,39 +485,6 @@ fail: return err; } -static void __net_exit rds_tcp_exit_net(struct net *net) -{ - struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid); - - if (rtn->rds_tcp_sysctl) - unregister_net_sysctl_table(rtn->rds_tcp_sysctl); - - if (net != &init_net && rtn->ctl_table) - kfree(rtn->ctl_table); - - /* If rds_tcp_exit_net() is called as a result of netns deletion, - * the rds_tcp_kill_sock() device notifier would already have cleaned - * up the listen socket, thus there is no work to do in this function. - * - * If rds_tcp_exit_net() is called as a result of module unload, - * i.e., due to rds_tcp_exit() -> unregister_pernet_subsys(), then - * we do need to clean up the listen socket here. - */ - if (rtn->rds_tcp_listen_sock) { - struct socket *lsock = rtn->rds_tcp_listen_sock; - - rtn->rds_tcp_listen_sock = NULL; - rds_tcp_listen_stop(lsock, &rtn->rds_tcp_accept_w); - } -} - -static struct pernet_operations rds_tcp_net_ops = { - .init = rds_tcp_init_net, - .exit = rds_tcp_exit_net, - .id = &rds_tcp_netid, - .size = sizeof(struct rds_tcp_net), -}; - static void rds_tcp_kill_sock(struct net *net) { struct rds_tcp_connection *tc, *_tc; @@ -526,7 +494,7 @@ static void rds_tcp_kill_sock(struct net *net) rtn->rds_tcp_listen_sock = NULL; rds_tcp_listen_stop(lsock, &rtn->rds_tcp_accept_w); - spin_lock_bh(&rds_tcp_conn_lock); + spin_lock_irq(&rds_tcp_conn_lock); list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) { struct net *c_net = read_pnet(&tc->t_cpath->cp_conn->c_net); @@ -539,45 +507,42 @@ static void rds_tcp_kill_sock(struct net *net) tc->t_tcp_node_detached = true; } } - spin_unlock_bh(&rds_tcp_conn_lock); + spin_unlock_irq(&rds_tcp_conn_lock); list_for_each_entry_safe(tc, _tc, &tmp_list, t_tcp_node) rds_conn_destroy(tc->t_cpath->cp_conn); } -void *rds_tcp_listen_sock_def_readable(struct net *net) +static void __net_exit rds_tcp_exit_net(struct net *net) { struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid); - struct socket *lsock = rtn->rds_tcp_listen_sock; - if (!lsock) - return NULL; + rds_tcp_kill_sock(net); - return lsock->sk->sk_user_data; + if (rtn->rds_tcp_sysctl) + unregister_net_sysctl_table(rtn->rds_tcp_sysctl); + + if (net != &init_net && rtn->ctl_table) + kfree(rtn->ctl_table); } -static int rds_tcp_dev_event(struct notifier_block *this, - unsigned long event, void *ptr) +static struct pernet_operations rds_tcp_net_ops = { + .init = rds_tcp_init_net, + .exit = rds_tcp_exit_net, + .id = &rds_tcp_netid, + .size = sizeof(struct rds_tcp_net), +}; + +void *rds_tcp_listen_sock_def_readable(struct net *net) { - struct net_device *dev = netdev_notifier_info_to_dev(ptr); + struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid); + struct socket *lsock = rtn->rds_tcp_listen_sock; - /* rds-tcp registers as a pernet subys, so the ->exit will only - * get invoked after network acitivity has quiesced. We need to - * clean up all sockets to quiesce network activity, and use - * the unregistration of the per-net loopback device as a trigger - * to start that cleanup. - */ - if (event == NETDEV_UNREGISTER_FINAL && - dev->ifindex == LOOPBACK_IFINDEX) - rds_tcp_kill_sock(dev_net(dev)); + if (!lsock) + return NULL; - return NOTIFY_DONE; + return lsock->sk->sk_user_data; } -static struct notifier_block rds_tcp_dev_notifier = { - .notifier_call = rds_tcp_dev_event, - .priority = -10, /* must be called after other network notifiers */ -}; - /* when sysctl is used to modify some kernel socket parameters,this * function resets the RDS connections in that netns so that we can * restart with new parameters. The assumption is that such reset @@ -587,7 +552,7 @@ static void rds_tcp_sysctl_reset(struct net *net) { struct rds_tcp_connection *tc, *_tc; - spin_lock_bh(&rds_tcp_conn_lock); + spin_lock_irq(&rds_tcp_conn_lock); list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) { struct net *c_net = read_pnet(&tc->t_cpath->cp_conn->c_net); @@ -597,7 +562,7 @@ static void rds_tcp_sysctl_reset(struct net *net) /* reconnect with new parameters */ rds_conn_path_drop(tc->t_cpath, false); } - spin_unlock_bh(&rds_tcp_conn_lock); + spin_unlock_irq(&rds_tcp_conn_lock); } static int rds_tcp_skbuf_handler(struct ctl_table *ctl, int write, @@ -623,9 +588,7 @@ static void rds_tcp_exit(void) rds_tcp_set_unloading(); synchronize_rcu(); rds_info_deregister_func(RDS_INFO_TCP_SOCKETS, rds_tcp_tc_info); - unregister_pernet_subsys(&rds_tcp_net_ops); - if (unregister_netdevice_notifier(&rds_tcp_dev_notifier)) - pr_warn("could not unregister rds_tcp_dev_notifier\n"); + unregister_pernet_device(&rds_tcp_net_ops); rds_tcp_destroy_conns(); rds_trans_unregister(&rds_tcp_transport); rds_tcp_recv_exit(); @@ -649,24 +612,15 @@ static int rds_tcp_init(void) if (ret) goto out_slab; - ret = register_pernet_subsys(&rds_tcp_net_ops); + ret = register_pernet_device(&rds_tcp_net_ops); if (ret) goto out_recv; - ret = register_netdevice_notifier(&rds_tcp_dev_notifier); - if (ret) { - pr_warn("could not register rds_tcp_dev_notifier\n"); - goto out_pernet; - } - rds_trans_register(&rds_tcp_transport); rds_info_register_func(RDS_INFO_TCP_SOCKETS, rds_tcp_tc_info); goto out; - -out_pernet: - unregister_pernet_subsys(&rds_tcp_net_ops); out_recv: rds_tcp_recv_exit(); out_slab: diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c index c061d6eb465d..22571189f21e 100644 --- a/net/rds/tcp_listen.c +++ b/net/rds/tcp_listen.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 Oracle. All rights reserved. + * Copyright (c) 2006, 2018 Oracle. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -142,12 +142,20 @@ int rds_tcp_accept_one(struct socket *sock) if (ret) goto out; - new_sock->type = sock->type; - new_sock->ops = sock->ops; ret = sock->ops->accept(sock, new_sock, O_NONBLOCK, true); if (ret < 0) goto out; + /* sock_create_lite() does not get a hold on the owner module so we + * need to do it here. Note that sock_release() uses sock->ops to + * determine if it needs to decrement the reference count. So set + * sock->ops after calling accept() in case that fails. And there's + * no need to do try_module_get() as the listener should have a hold + * already. + */ + new_sock->ops = sock->ops; + __module_get(new_sock->ops->owner); + ret = rds_tcp_keepalive(new_sock); if (ret < 0) goto out; diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index 5170373b797c..9ff5e0a76593 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -1567,12 +1567,12 @@ static int __init rose_proto_init(void) rose_add_loopback_neigh(); - proc_create("rose", S_IRUGO, init_net.proc_net, &rose_info_fops); - proc_create("rose_neigh", S_IRUGO, init_net.proc_net, + proc_create("rose", 0444, init_net.proc_net, &rose_info_fops); + proc_create("rose_neigh", 0444, init_net.proc_net, &rose_neigh_fops); - proc_create("rose_nodes", S_IRUGO, init_net.proc_net, + proc_create("rose_nodes", 0444, init_net.proc_net, &rose_nodes_fops); - proc_create("rose_routes", S_IRUGO, init_net.proc_net, + proc_create("rose_routes", 0444, init_net.proc_net, &rose_routes_fops); out: return rc; diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index 0c9c18aa7c77..ec5ec68be1aa 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -32,7 +32,7 @@ MODULE_LICENSE("GPL"); MODULE_ALIAS_NETPROTO(PF_RXRPC); unsigned int rxrpc_debug; // = RXRPC_DEBUG_KPROTO; -module_param_named(debug, rxrpc_debug, uint, S_IWUSR | S_IRUGO); +module_param_named(debug, rxrpc_debug, uint, 0644); MODULE_PARM_DESC(debug, "RxRPC debugging mask"); static struct proto rxrpc_proto; @@ -40,6 +40,7 @@ static const struct proto_ops rxrpc_rpc_ops; /* current debugging ID */ atomic_t rxrpc_debug_id; +EXPORT_SYMBOL(rxrpc_debug_id); /* count of skbs currently in use */ atomic_t rxrpc_n_tx_skbs, rxrpc_n_rx_skbs; @@ -267,6 +268,7 @@ static int rxrpc_listen(struct socket *sock, int backlog) * @gfp: The allocation constraints * @notify_rx: Where to send notifications instead of socket queue * @upgrade: Request service upgrade for call + * @debug_id: The debug ID for tracing to be assigned to the call * * Allow a kernel service to begin a call on the nominated socket. This just * sets up all the internal tracking structures and allocates connection and @@ -282,7 +284,8 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock, s64 tx_total_len, gfp_t gfp, rxrpc_notify_rx_t notify_rx, - bool upgrade) + bool upgrade, + unsigned int debug_id) { struct rxrpc_conn_parameters cp; struct rxrpc_call_params p; @@ -314,7 +317,7 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock, cp.exclusive = false; cp.upgrade = upgrade; cp.service_id = srx->srx_service; - call = rxrpc_new_client_call(rx, &cp, srx, &p, gfp); + call = rxrpc_new_client_call(rx, &cp, srx, &p, gfp, debug_id); /* The socket has been unlocked. */ if (!IS_ERR(call)) { call->notify_rx = notify_rx; diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 416688381eb7..21cf164b6d85 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -691,7 +691,6 @@ struct rxrpc_send_params { * af_rxrpc.c */ extern atomic_t rxrpc_n_tx_skbs, rxrpc_n_rx_skbs; -extern atomic_t rxrpc_debug_id; extern struct workqueue_struct *rxrpc_workqueue; /* @@ -732,11 +731,12 @@ extern unsigned int rxrpc_max_call_lifetime; extern struct kmem_cache *rxrpc_call_jar; struct rxrpc_call *rxrpc_find_call_by_user_ID(struct rxrpc_sock *, unsigned long); -struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *, gfp_t); +struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *, gfp_t, unsigned int); struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *, struct rxrpc_conn_parameters *, struct sockaddr_rxrpc *, - struct rxrpc_call_params *, gfp_t); + struct rxrpc_call_params *, gfp_t, + unsigned int); int rxrpc_retry_client_call(struct rxrpc_sock *, struct rxrpc_call *, struct rxrpc_conn_parameters *, @@ -778,6 +778,7 @@ static inline bool __rxrpc_set_call_completion(struct rxrpc_call *call, call->error = error; call->completion = compl, call->state = RXRPC_CALL_COMPLETE; + trace_rxrpc_call_complete(call); wake_up(&call->waitq); return true; } @@ -822,7 +823,7 @@ static inline bool __rxrpc_abort_call(const char *why, struct rxrpc_call *call, rxrpc_seq_t seq, u32 abort_code, int error) { - trace_rxrpc_abort(why, call->cid, call->call_id, seq, + trace_rxrpc_abort(call->debug_id, why, call->cid, call->call_id, seq, abort_code, error); return __rxrpc_set_call_completion(call, RXRPC_CALL_LOCALLY_ABORTED, abort_code, error); diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c index 3028298ca561..92ebd1d7e0bb 100644 --- a/net/rxrpc/call_accept.c +++ b/net/rxrpc/call_accept.c @@ -34,7 +34,8 @@ static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx, struct rxrpc_backlog *b, rxrpc_notify_rx_t notify_rx, rxrpc_user_attach_call_t user_attach_call, - unsigned long user_call_ID, gfp_t gfp) + unsigned long user_call_ID, gfp_t gfp, + unsigned int debug_id) { const void *here = __builtin_return_address(0); struct rxrpc_call *call; @@ -94,7 +95,7 @@ static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx, /* Now it gets complicated, because calls get registered with the * socket here, particularly if a user ID is preassigned by the user. */ - call = rxrpc_alloc_call(rx, gfp); + call = rxrpc_alloc_call(rx, gfp, debug_id); if (!call) return -ENOMEM; call->flags |= (1 << RXRPC_CALL_IS_SERVICE); @@ -174,7 +175,8 @@ int rxrpc_service_prealloc(struct rxrpc_sock *rx, gfp_t gfp) if (rx->discard_new_call) return 0; - while (rxrpc_service_prealloc_one(rx, b, NULL, NULL, 0, gfp) == 0) + while (rxrpc_service_prealloc_one(rx, b, NULL, NULL, 0, gfp, + atomic_inc_return(&rxrpc_debug_id)) == 0) ; return 0; @@ -347,7 +349,7 @@ struct rxrpc_call *rxrpc_new_incoming_call(struct rxrpc_local *local, service_id == rx->second_service)) goto found_service; - trace_rxrpc_abort("INV", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, + trace_rxrpc_abort(0, "INV", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, RX_INVALID_OPERATION, EOPNOTSUPP); skb->mark = RXRPC_SKB_MARK_LOCAL_ABORT; skb->priority = RX_INVALID_OPERATION; @@ -358,7 +360,7 @@ found_service: spin_lock(&rx->incoming_lock); if (rx->sk.sk_state == RXRPC_SERVER_LISTEN_DISABLED || rx->sk.sk_state == RXRPC_CLOSE) { - trace_rxrpc_abort("CLS", sp->hdr.cid, sp->hdr.callNumber, + trace_rxrpc_abort(0, "CLS", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, RX_INVALID_OPERATION, ESHUTDOWN); skb->mark = RXRPC_SKB_MARK_LOCAL_ABORT; skb->priority = RX_INVALID_OPERATION; @@ -635,6 +637,7 @@ out_discard: * @user_attach_call: Func to attach call to user_call_ID * @user_call_ID: The tag to attach to the preallocated call * @gfp: The allocation conditions. + * @debug_id: The tracing debug ID. * * Charge up the socket with preallocated calls, each with a user ID. A * function should be provided to effect the attachment from the user's side. @@ -645,7 +648,8 @@ out_discard: int rxrpc_kernel_charge_accept(struct socket *sock, rxrpc_notify_rx_t notify_rx, rxrpc_user_attach_call_t user_attach_call, - unsigned long user_call_ID, gfp_t gfp) + unsigned long user_call_ID, gfp_t gfp, + unsigned int debug_id) { struct rxrpc_sock *rx = rxrpc_sk(sock->sk); struct rxrpc_backlog *b = rx->backlog; @@ -655,6 +659,6 @@ int rxrpc_kernel_charge_accept(struct socket *sock, return rxrpc_service_prealloc_one(rx, b, notify_rx, user_attach_call, user_call_ID, - gfp); + gfp, debug_id); } EXPORT_SYMBOL(rxrpc_kernel_charge_accept); diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index ad2ab1103189..6a62e42e1d8d 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -195,6 +195,7 @@ static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j) * the packets in the Tx buffer we're going to resend and what the new * resend timeout will be. */ + trace_rxrpc_resend(call, (cursor + 1) & RXRPC_RXTX_BUFF_MASK); oldest = now; for (seq = cursor + 1; before_eq(seq, top); seq++) { ix = seq & RXRPC_RXTX_BUFF_MASK; diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index 0b2db38dd32d..147657dfe757 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -99,7 +99,8 @@ found_extant_call: /* * allocate a new call */ -struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *rx, gfp_t gfp) +struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *rx, gfp_t gfp, + unsigned int debug_id) { struct rxrpc_call *call; @@ -138,7 +139,7 @@ struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *rx, gfp_t gfp) spin_lock_init(&call->notify_lock); rwlock_init(&call->state_lock); atomic_set(&call->usage, 1); - call->debug_id = atomic_inc_return(&rxrpc_debug_id); + call->debug_id = debug_id; call->tx_total_len = -1; call->next_rx_timo = 20 * HZ; call->next_req_timo = 1 * HZ; @@ -166,14 +167,15 @@ nomem: */ static struct rxrpc_call *rxrpc_alloc_client_call(struct rxrpc_sock *rx, struct sockaddr_rxrpc *srx, - gfp_t gfp) + gfp_t gfp, + unsigned int debug_id) { struct rxrpc_call *call; ktime_t now; _enter(""); - call = rxrpc_alloc_call(rx, gfp); + call = rxrpc_alloc_call(rx, gfp, debug_id); if (!call) return ERR_PTR(-ENOMEM); call->state = RXRPC_CALL_CLIENT_AWAIT_CONN; @@ -214,7 +216,8 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, struct rxrpc_conn_parameters *cp, struct sockaddr_rxrpc *srx, struct rxrpc_call_params *p, - gfp_t gfp) + gfp_t gfp, + unsigned int debug_id) __releases(&rx->sk.sk_lock.slock) { struct rxrpc_call *call, *xcall; @@ -225,7 +228,7 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, _enter("%p,%lx", rx, p->user_call_ID); - call = rxrpc_alloc_client_call(rx, srx, gfp); + call = rxrpc_alloc_client_call(rx, srx, gfp, debug_id); if (IS_ERR(call)) { release_sock(&rx->sk); _leave(" = %ld", PTR_ERR(call)); diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c index b1dfae107431..d2ec3fd593e8 100644 --- a/net/rxrpc/conn_event.c +++ b/net/rxrpc/conn_event.c @@ -160,7 +160,8 @@ static void rxrpc_abort_calls(struct rxrpc_connection *conn, lockdep_is_held(&conn->channel_lock)); if (call) { if (compl == RXRPC_CALL_LOCALLY_ABORTED) - trace_rxrpc_abort("CON", call->cid, + trace_rxrpc_abort(call->debug_id, + "CON", call->cid, call->call_id, 0, abort_code, error); if (rxrpc_set_call_completion(call, compl, diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 6fc61400337f..2a868fdab0ae 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -1307,21 +1307,21 @@ out_unlock: wrong_security: rcu_read_unlock(); - trace_rxrpc_abort("SEC", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, + trace_rxrpc_abort(0, "SEC", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, RXKADINCONSISTENCY, EBADMSG); skb->priority = RXKADINCONSISTENCY; goto post_abort; reupgrade: rcu_read_unlock(); - trace_rxrpc_abort("UPG", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, + trace_rxrpc_abort(0, "UPG", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, RX_PROTOCOL_ERROR, EBADMSG); goto protocol_error; bad_message_unlock: rcu_read_unlock(); bad_message: - trace_rxrpc_abort("BAD", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, + trace_rxrpc_abort(0, "BAD", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, RX_PROTOCOL_ERROR, EBADMSG); protocol_error: skb->priority = RX_PROTOCOL_ERROR; diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index 9d45d8b56744..7bff716e911e 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -272,7 +272,7 @@ static int rxrpc_locate_data(struct rxrpc_call *call, struct sk_buff *skb, unsigned int *_offset, unsigned int *_len) { unsigned int offset = sizeof(struct rxrpc_wire_header); - unsigned int len = *_len; + unsigned int len; int ret; u8 annotation = *_annotation; diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 09f2a3e05221..8503f279b467 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -579,7 +579,8 @@ rxrpc_new_client_call_for_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, cp.exclusive = rx->exclusive | p->exclusive; cp.upgrade = p->upgrade; cp.service_id = srx->srx_service; - call = rxrpc_new_client_call(rx, &cp, srx, &p->call, GFP_KERNEL); + call = rxrpc_new_client_call(rx, &cp, srx, &p->call, GFP_KERNEL, + atomic_inc_return(&rxrpc_debug_id)); /* The socket is now unlocked */ _leave(" = %p\n", call); diff --git a/net/sched/act_api.c b/net/sched/act_api.c index a54fa7b8c217..0d78b58e1898 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -109,6 +109,42 @@ int __tcf_idr_release(struct tc_action *p, bool bind, bool strict) } EXPORT_SYMBOL(__tcf_idr_release); +static size_t tcf_action_shared_attrs_size(const struct tc_action *act) +{ + u32 cookie_len = 0; + + if (act->act_cookie) + cookie_len = nla_total_size(act->act_cookie->len); + + return nla_total_size(0) /* action number nested */ + + nla_total_size(IFNAMSIZ) /* TCA_ACT_KIND */ + + cookie_len /* TCA_ACT_COOKIE */ + + nla_total_size(0) /* TCA_ACT_STATS nested */ + /* TCA_STATS_BASIC */ + + nla_total_size_64bit(sizeof(struct gnet_stats_basic)) + /* TCA_STATS_QUEUE */ + + nla_total_size_64bit(sizeof(struct gnet_stats_queue)) + + nla_total_size(0) /* TCA_OPTIONS nested */ + + nla_total_size(sizeof(struct tcf_t)); /* TCA_GACT_TM */ +} + +static size_t tcf_action_full_attrs_size(size_t sz) +{ + return NLMSG_HDRLEN /* struct nlmsghdr */ + + sizeof(struct tcamsg) + + nla_total_size(0) /* TCA_ACT_TAB nested */ + + sz; +} + +static size_t tcf_action_fill_size(const struct tc_action *act) +{ + size_t sz = tcf_action_shared_attrs_size(act); + + if (act->ops->get_fill_size) + return act->ops->get_fill_size(act) + sz; + return sz; +} + static int tcf_dump_walker(struct tcf_idrinfo *idrinfo, struct sk_buff *skb, struct netlink_callback *cb) { @@ -260,14 +296,6 @@ bool tcf_idr_check(struct tc_action_net *tn, u32 index, struct tc_action **a, } EXPORT_SYMBOL(tcf_idr_check); -void tcf_idr_cleanup(struct tc_action *a, struct nlattr *est) -{ - if (est) - gen_kill_estimator(&a->tcfa_rate_est); - free_tcf(a); -} -EXPORT_SYMBOL(tcf_idr_cleanup); - int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est, struct tc_action **a, const struct tc_action_ops *ops, int bind, bool cpustats) @@ -741,10 +769,12 @@ static void cleanup_a(struct list_head *actions, int ovr) int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla, struct nlattr *est, char *name, int ovr, int bind, - struct list_head *actions, struct netlink_ext_ack *extack) + struct list_head *actions, size_t *attr_size, + struct netlink_ext_ack *extack) { struct nlattr *tb[TCA_ACT_MAX_PRIO + 1]; struct tc_action *act; + size_t sz = 0; int err; int i; @@ -760,11 +790,14 @@ int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla, goto err; } act->order = i; + sz += tcf_action_fill_size(act); if (ovr) act->tcfa_refcnt++; list_add_tail(&act->list, actions); } + *attr_size = tcf_action_full_attrs_size(sz); + /* Remove the temp refcnt which was necessary to protect against * destroying an existing action which was being replaced */ @@ -994,12 +1027,13 @@ err_out: static int tcf_del_notify(struct net *net, struct nlmsghdr *n, struct list_head *actions, - u32 portid, struct netlink_ext_ack *extack) + u32 portid, size_t attr_size, struct netlink_ext_ack *extack) { int ret; struct sk_buff *skb; - skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); + skb = alloc_skb(attr_size <= NLMSG_GOODSIZE ? NLMSG_GOODSIZE : attr_size, + GFP_KERNEL); if (!skb) return -ENOBUFS; @@ -1032,6 +1066,7 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n, int i, ret; struct nlattr *tb[TCA_ACT_MAX_PRIO + 1]; struct tc_action *act; + size_t attr_size = 0; LIST_HEAD(actions); ret = nla_parse_nested(tb, TCA_ACT_MAX_PRIO, nla, NULL, extack); @@ -1053,13 +1088,16 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n, goto err; } act->order = i; + attr_size += tcf_action_fill_size(act); list_add_tail(&act->list, &actions); } + attr_size = tcf_action_full_attrs_size(attr_size); + if (event == RTM_GETACTION) ret = tcf_get_notify(net, portid, n, &actions, event, extack); else { /* delete */ - ret = tcf_del_notify(net, n, &actions, portid, extack); + ret = tcf_del_notify(net, n, &actions, portid, attr_size, extack); if (ret) goto err; return ret; @@ -1072,12 +1110,13 @@ err: static int tcf_add_notify(struct net *net, struct nlmsghdr *n, struct list_head *actions, - u32 portid, struct netlink_ext_ack *extack) + u32 portid, size_t attr_size, struct netlink_ext_ack *extack) { struct sk_buff *skb; int err = 0; - skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); + skb = alloc_skb(attr_size <= NLMSG_GOODSIZE ? NLMSG_GOODSIZE : attr_size, + GFP_KERNEL); if (!skb) return -ENOBUFS; @@ -1099,15 +1138,16 @@ static int tcf_action_add(struct net *net, struct nlattr *nla, struct nlmsghdr *n, u32 portid, int ovr, struct netlink_ext_ack *extack) { + size_t attr_size = 0; int ret = 0; LIST_HEAD(actions); ret = tcf_action_init(net, NULL, nla, NULL, NULL, ovr, 0, &actions, - extack); + &attr_size, extack); if (ret) return ret; - return tcf_add_notify(net, n, &actions, portid, extack); + return tcf_add_notify(net, n, &actions, portid, attr_size, extack); } static u32 tcaa_root_flags_allowed = TCA_FLAG_LARGE_DUMP_ON; @@ -1493,7 +1533,6 @@ static struct pernet_operations tcf_action_net_ops = { .exit = tcf_action_net_exit, .id = &tcf_action_net_id, .size = sizeof(struct tcf_action_net), - .async = true, }; static int __init tc_action_init(void) diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c index da72e0cf2b1f..9092531d45d8 100644 --- a/net/sched/act_bpf.c +++ b/net/sched/act_bpf.c @@ -352,7 +352,7 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla, return res; out: if (res == ACT_P_CREATED) - tcf_idr_cleanup(*act, est); + tcf_idr_release(*act, bind); return ret; } @@ -413,7 +413,6 @@ static struct pernet_operations bpf_net_ops = { .exit_batch = bpf_exit_net, .id = &bpf_net_id, .size = sizeof(struct tc_action_net), - .async = true, }; static int __init bpf_init_module(void) diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c index 371e5e4ab3e2..e4b880fa51fe 100644 --- a/net/sched/act_connmark.c +++ b/net/sched/act_connmark.c @@ -222,7 +222,6 @@ static struct pernet_operations connmark_net_ops = { .exit_batch = connmark_exit_net, .id = &connmark_net_id, .size = sizeof(struct tc_action_net), - .async = true, }; static int __init connmark_init_module(void) diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c index 1fb1f1f6a555..7e28b2ce1437 100644 --- a/net/sched/act_csum.c +++ b/net/sched/act_csum.c @@ -350,7 +350,7 @@ static int tcf_csum_sctp(struct sk_buff *skb, unsigned int ihl, { struct sctphdr *sctph; - if (skb_is_gso(skb) && skb_shinfo(skb)->gso_type & SKB_GSO_SCTP) + if (skb_is_gso(skb) && skb_is_gso_sctp(skb)) return 1; sctph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*sctph)); @@ -626,7 +626,8 @@ static void tcf_csum_cleanup(struct tc_action *a) struct tcf_csum_params *params; params = rcu_dereference_protected(p->params, 1); - kfree_rcu(params, rcu); + if (params) + kfree_rcu(params, rcu); } static int tcf_csum_walker(struct net *net, struct sk_buff *skb, @@ -677,7 +678,6 @@ static struct pernet_operations csum_net_ops = { .exit_batch = csum_exit_net, .id = &csum_net_id, .size = sizeof(struct tc_action_net), - .async = true, }; MODULE_DESCRIPTION("Checksum updating actions"); diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index 74563254e676..4dc4f153cad8 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c @@ -217,6 +217,19 @@ static int tcf_gact_search(struct net *net, struct tc_action **a, u32 index, return tcf_idr_search(tn, a, index); } +static size_t tcf_gact_get_fill_size(const struct tc_action *act) +{ + size_t sz = nla_total_size(sizeof(struct tc_gact)); /* TCA_GACT_PARMS */ + +#ifdef CONFIG_GACT_PROB + if (to_gact(act)->tcfg_ptype) + /* TCA_GACT_PROB */ + sz += nla_total_size(sizeof(struct tc_gact_p)); +#endif + + return sz; +} + static struct tc_action_ops act_gact_ops = { .kind = "gact", .type = TCA_ACT_GACT, @@ -227,6 +240,7 @@ static struct tc_action_ops act_gact_ops = { .init = tcf_gact_init, .walk = tcf_gact_walker, .lookup = tcf_gact_search, + .get_fill_size = tcf_gact_get_fill_size, .size = sizeof(struct tcf_gact), }; @@ -247,7 +261,6 @@ static struct pernet_operations gact_net_ops = { .exit_batch = gact_exit_net, .id = &gact_net_id, .size = sizeof(struct tc_action_net), - .async = true, }; MODULE_AUTHOR("Jamal Hadi Salim(2002-4)"); diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index 555b1caeff72..a5994cf0512b 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -870,7 +870,6 @@ static struct pernet_operations ife_net_ops = { .exit_batch = ife_exit_net, .id = &ife_net_id, .size = sizeof(struct tc_action_net), - .async = true, }; static int __init ife_init_module(void) diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index 10866717f88e..14c312d7908f 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -80,9 +80,12 @@ static void ipt_destroy_target(struct xt_entry_target *t) static void tcf_ipt_release(struct tc_action *a) { struct tcf_ipt *ipt = to_ipt(a); - ipt_destroy_target(ipt->tcfi_t); + + if (ipt->tcfi_t) { + ipt_destroy_target(ipt->tcfi_t); + kfree(ipt->tcfi_t); + } kfree(ipt->tcfi_tname); - kfree(ipt->tcfi_t); } static const struct nla_policy ipt_policy[TCA_IPT_MAX + 1] = { @@ -187,7 +190,7 @@ err2: kfree(tname); err1: if (ret == ACT_P_CREATED) - tcf_idr_cleanup(*a, est); + tcf_idr_release(*a, bind); return err; } @@ -349,7 +352,6 @@ static struct pernet_operations ipt_net_ops = { .exit_batch = ipt_exit_net, .id = &ipt_net_id, .size = sizeof(struct tc_action_net), - .async = true, }; static int tcf_xt_walker(struct net *net, struct sk_buff *skb, @@ -400,7 +402,6 @@ static struct pernet_operations xt_net_ops = { .exit_batch = xt_exit_net, .id = &xt_net_id, .size = sizeof(struct tc_action_net), - .async = true, }; MODULE_AUTHOR("Jamal Hadi Salim(2002-13)"); diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 64c86579c3d9..fd34015331ab 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -353,7 +353,6 @@ static struct pernet_operations mirred_net_ops = { .exit_batch = mirred_exit_net, .id = &mirred_net_id, .size = sizeof(struct tc_action_net), - .async = true, }; MODULE_AUTHOR("Jamal Hadi Salim(2002)"); diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c index b1bc757f6491..4b5848b6c252 100644 --- a/net/sched/act_nat.c +++ b/net/sched/act_nat.c @@ -323,7 +323,6 @@ static struct pernet_operations nat_net_ops = { .exit_batch = nat_exit_net, .id = &nat_net_id, .size = sizeof(struct tc_action_net), - .async = true, }; MODULE_DESCRIPTION("Stateless NAT actions"); diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index 5e8cc8f63acd..8a925c72db5f 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -176,7 +176,7 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla, p = to_pedit(*a); keys = kmalloc(ksize, GFP_KERNEL); if (keys == NULL) { - tcf_idr_cleanup(*a, est); + tcf_idr_release(*a, bind); kfree(keys_ex); return -ENOMEM; } @@ -465,7 +465,6 @@ static struct pernet_operations pedit_net_ops = { .exit_batch = pedit_exit_net, .id = &pedit_net_id, .size = sizeof(struct tc_action_net), - .async = true, }; MODULE_AUTHOR("Jamal Hadi Salim(2002-4)"); diff --git a/net/sched/act_police.c b/net/sched/act_police.c index 51fe4fe343f7..4e72bc2a0dfb 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -196,7 +196,7 @@ failure: qdisc_put_rtab(P_tab); qdisc_put_rtab(R_tab); if (ret == ACT_P_CREATED) - tcf_idr_cleanup(*a, est); + tcf_idr_release(*a, bind); return err; } @@ -347,7 +347,6 @@ static struct pernet_operations police_net_ops = { .exit_batch = police_exit_net, .id = &police_net_id, .size = sizeof(struct tc_action_net), - .async = true, }; static int __init police_init_module(void) diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c index 238dfd27e995..5db358497c9e 100644 --- a/net/sched/act_sample.c +++ b/net/sched/act_sample.c @@ -103,7 +103,8 @@ static void tcf_sample_cleanup(struct tc_action *a) psample_group = rtnl_dereference(s->psample_group); RCU_INIT_POINTER(s->psample_group, NULL); - psample_group_put(psample_group); + if (psample_group) + psample_group_put(psample_group); } static bool tcf_sample_dev_ok_push(struct net_device *dev) @@ -248,7 +249,6 @@ static struct pernet_operations sample_net_ops = { .exit_batch = sample_exit_net, .id = &sample_net_id, .size = sizeof(struct tc_action_net), - .async = true, }; static int __init sample_init_module(void) diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c index 91816d73f3f3..9618b4a83cee 100644 --- a/net/sched/act_simple.c +++ b/net/sched/act_simple.c @@ -121,7 +121,7 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla, d = to_defact(*a); ret = alloc_defdata(d, defdata); if (ret < 0) { - tcf_idr_cleanup(*a, est); + tcf_idr_release(*a, bind); return ret; } d->tcf_action = parm->action; @@ -216,7 +216,6 @@ static struct pernet_operations simp_net_ops = { .exit_batch = simp_exit_net, .id = &simp_net_id, .size = sizeof(struct tc_action_net), - .async = true, }; MODULE_AUTHOR("Jamal Hadi Salim(2005)"); diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c index 7971510fe61b..ddf69fc01bdf 100644 --- a/net/sched/act_skbedit.c +++ b/net/sched/act_skbedit.c @@ -253,7 +253,6 @@ static struct pernet_operations skbedit_net_ops = { .exit_batch = skbedit_exit_net, .id = &skbedit_net_id, .size = sizeof(struct tc_action_net), - .async = true, }; MODULE_AUTHOR("Alexander Duyck, <alexander.h.duyck@intel.com>"); diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c index febec75f4f7a..bbcbdce732cc 100644 --- a/net/sched/act_skbmod.c +++ b/net/sched/act_skbmod.c @@ -152,7 +152,7 @@ static int tcf_skbmod_init(struct net *net, struct nlattr *nla, ASSERT_RTNL(); p = kzalloc(sizeof(struct tcf_skbmod_params), GFP_KERNEL); if (unlikely(!p)) { - if (ovr) + if (ret == ACT_P_CREATED) tcf_idr_release(*a, bind); return -ENOMEM; } @@ -190,7 +190,8 @@ static void tcf_skbmod_cleanup(struct tc_action *a) struct tcf_skbmod_params *p; p = rcu_dereference_protected(d->skbmod_p, 1); - kfree_rcu(p, rcu); + if (p) + kfree_rcu(p, rcu); } static int tcf_skbmod_dump(struct sk_buff *skb, struct tc_action *a, @@ -278,7 +279,6 @@ static struct pernet_operations skbmod_net_ops = { .exit_batch = skbmod_exit_net, .id = &skbmod_net_id, .size = sizeof(struct tc_action_net), - .async = true, }; MODULE_AUTHOR("Jamal Hadi Salim, <jhs@mojatatu.com>"); diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c index 9169b7e78ada..626dac81a48a 100644 --- a/net/sched/act_tunnel_key.c +++ b/net/sched/act_tunnel_key.c @@ -153,6 +153,7 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla, metadata->u.tun_info.mode |= IP_TUNNEL_INFO_TX; break; default: + ret = -EINVAL; goto err_out; } @@ -207,11 +208,12 @@ static void tunnel_key_release(struct tc_action *a) struct tcf_tunnel_key_params *params; params = rcu_dereference_protected(t->params, 1); + if (params) { + if (params->tcft_action == TCA_TUNNEL_KEY_ACT_SET) + dst_release(¶ms->tcft_enc_metadata->dst); - if (params->tcft_action == TCA_TUNNEL_KEY_ACT_SET) - dst_release(¶ms->tcft_enc_metadata->dst); - - kfree_rcu(params, rcu); + kfree_rcu(params, rcu); + } } static int tunnel_key_dump_addresses(struct sk_buff *skb, @@ -337,7 +339,6 @@ static struct pernet_operations tunnel_key_net_ops = { .exit_batch = tunnel_key_exit_net, .id = &tunnel_key_net_id, .size = sizeof(struct tc_action_net), - .async = true, }; static int __init tunnel_key_init_module(void) diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c index c2ee7fd51cc9..853604685965 100644 --- a/net/sched/act_vlan.c +++ b/net/sched/act_vlan.c @@ -117,7 +117,7 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla, struct tc_vlan *parm; struct tcf_vlan *v; int action; - __be16 push_vid = 0; + u16 push_vid = 0; __be16 push_proto = 0; u8 push_prio = 0; bool exists = false; @@ -195,7 +195,7 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla, ASSERT_RTNL(); p = kzalloc(sizeof(*p), GFP_KERNEL); if (!p) { - if (ovr) + if (ret == ACT_P_CREATED) tcf_idr_release(*a, bind); return -ENOMEM; } @@ -225,7 +225,8 @@ static void tcf_vlan_cleanup(struct tc_action *a) struct tcf_vlan_params *p; p = rcu_dereference_protected(v->vlan_p, 1); - kfree_rcu(p, rcu); + if (p) + kfree_rcu(p, rcu); } static int tcf_vlan_dump(struct sk_buff *skb, struct tc_action *a, @@ -313,7 +314,6 @@ static struct pernet_operations vlan_net_ops = { .exit_batch = vlan_exit_net, .id = &vlan_net_id, .size = sizeof(struct tc_action_net), - .async = true, }; static int __init vlan_init_module(void) diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 19f9f421d5b7..b66754f52a9f 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -1433,6 +1433,7 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, #ifdef CONFIG_NET_CLS_ACT { struct tc_action *act; + size_t attr_size = 0; if (exts->police && tb[exts->police]) { act = tcf_action_init_1(net, tp, tb[exts->police], @@ -1450,7 +1451,7 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, err = tcf_action_init(net, tp, tb[exts->action], rate_tlv, NULL, ovr, TCA_ACT_BIND, - &actions, extack); + &actions, &attr_size, extack); if (err) return err; list_for_each_entry(act, &actions, list) @@ -1618,7 +1619,6 @@ static struct pernet_operations tcf_net_ops = { .exit = tcf_net_exit, .id = &tcf_net_id, .size = sizeof(struct tcf_net), - .async = true, }; static int __init tc_filter_init(void) diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 7d0ce2c40f93..d964e60c730e 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -511,6 +511,9 @@ static int fl_set_key_flags(struct nlattr **tb, fl_set_key_flag(key, mask, flags_key, flags_mask, TCA_FLOWER_KEY_FLAGS_IS_FRAGMENT, FLOW_DIS_IS_FRAGMENT); + fl_set_key_flag(key, mask, flags_key, flags_mask, + TCA_FLOWER_KEY_FLAGS_FRAG_IS_FIRST, + FLOW_DIS_FIRST_FRAG); return 0; } @@ -1130,6 +1133,9 @@ static int fl_dump_key_flags(struct sk_buff *skb, u32 flags_key, u32 flags_mask) fl_get_key_flag(flags_key, flags_mask, &key, &mask, TCA_FLOWER_KEY_FLAGS_IS_FRAGMENT, FLOW_DIS_IS_FRAGMENT); + fl_get_key_flag(flags_key, flags_mask, &key, &mask, + TCA_FLOWER_KEY_FLAGS_FRAG_IS_FIRST, + FLOW_DIS_FIRST_FRAG); _key = cpu_to_be32(key); _mask = cpu_to_be32(mask); diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 68f9d942bed4..106dae7e4818 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -2133,7 +2133,6 @@ static void __net_exit psched_net_exit(struct net *net) static struct pernet_operations psched_net_ops = { .init = psched_net_init, .exit = psched_net_exit, - .async = true, }; static int __init pktsched_init(void) diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 190570f21b20..7e3fbe9cc936 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -106,6 +106,14 @@ static inline void qdisc_enqueue_skb_bad_txq(struct Qdisc *q, __skb_queue_tail(&q->skb_bad_txq, skb); + if (qdisc_is_percpu_stats(q)) { + qdisc_qstats_cpu_backlog_inc(q, skb); + qdisc_qstats_cpu_qlen_inc(q); + } else { + qdisc_qstats_backlog_inc(q, skb); + q->q.qlen++; + } + if (lock) spin_unlock(lock); } @@ -196,14 +204,6 @@ static void try_bulk_dequeue_skb_slow(struct Qdisc *q, break; if (unlikely(skb_get_queue_mapping(nskb) != mapping)) { qdisc_enqueue_skb_bad_txq(q, nskb); - - if (qdisc_is_percpu_stats(q)) { - qdisc_qstats_cpu_backlog_inc(q, nskb); - qdisc_qstats_cpu_qlen_inc(q); - } else { - qdisc_qstats_backlog_inc(q, nskb); - q->q.qlen++; - } break; } skb->next = nskb; @@ -628,6 +628,7 @@ static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc *qdisc, int band = prio2band[skb->priority & TC_PRIO_MAX]; struct pfifo_fast_priv *priv = qdisc_priv(qdisc); struct skb_array *q = band2list(priv, band); + unsigned int pkt_len = qdisc_pkt_len(skb); int err; err = skb_array_produce(q, skb); @@ -636,7 +637,10 @@ static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc *qdisc, return qdisc_drop_cpu(skb, qdisc, to_free); qdisc_qstats_cpu_qlen_inc(qdisc); - qdisc_qstats_cpu_backlog_inc(qdisc, skb); + /* Note: skb can not be used after skb_array_produce(), + * so we better not use qdisc_qstats_cpu_backlog_inc() + */ + this_cpu_add(qdisc->cpu_qstats->backlog, pkt_len); return NET_XMIT_SUCCESS; } diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 7c179addebcd..7d6801fc5340 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -509,7 +509,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, } if (unlikely(sch->q.qlen >= sch->limit)) - return qdisc_drop(skb, sch, to_free); + return qdisc_drop_all(skb, sch, to_free); qdisc_qstats_backlog_inc(sch, skb); diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index 229172d509cc..03225a8df973 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -188,7 +188,8 @@ static int tbf_enqueue(struct sk_buff *skb, struct Qdisc *sch, int ret; if (qdisc_pkt_len(skb) > q->max_size) { - if (skb_is_gso(skb) && skb_gso_mac_seglen(skb) <= q->max_size) + if (skb_is_gso(skb) && + skb_gso_validate_mac_len(skb, q->max_size)) return tbf_segment(skb, sch, to_free); return qdisc_drop(skb, sch, to_free); } diff --git a/net/sctp/auth.c b/net/sctp/auth.c index 00667c50efa7..e64630cd3331 100644 --- a/net/sctp/auth.c +++ b/net/sctp/auth.c @@ -101,13 +101,14 @@ struct sctp_shared_key *sctp_auth_shkey_create(__u16 key_id, gfp_t gfp) return NULL; INIT_LIST_HEAD(&new->key_list); + refcount_set(&new->refcnt, 1); new->key_id = key_id; return new; } /* Free the shared key structure */ -static void sctp_auth_shkey_free(struct sctp_shared_key *sh_key) +static void sctp_auth_shkey_destroy(struct sctp_shared_key *sh_key) { BUG_ON(!list_empty(&sh_key->key_list)); sctp_auth_key_put(sh_key->key); @@ -115,6 +116,17 @@ static void sctp_auth_shkey_free(struct sctp_shared_key *sh_key) kfree(sh_key); } +void sctp_auth_shkey_release(struct sctp_shared_key *sh_key) +{ + if (refcount_dec_and_test(&sh_key->refcnt)) + sctp_auth_shkey_destroy(sh_key); +} + +void sctp_auth_shkey_hold(struct sctp_shared_key *sh_key) +{ + refcount_inc(&sh_key->refcnt); +} + /* Destroy the entire key list. This is done during the * associon and endpoint free process. */ @@ -128,7 +140,7 @@ void sctp_auth_destroy_keys(struct list_head *keys) key_for_each_safe(ep_key, tmp, keys) { list_del_init(&ep_key->key_list); - sctp_auth_shkey_free(ep_key); + sctp_auth_shkey_release(ep_key); } } @@ -409,13 +421,19 @@ int sctp_auth_asoc_init_active_key(struct sctp_association *asoc, gfp_t gfp) sctp_auth_key_put(asoc->asoc_shared_key); asoc->asoc_shared_key = secret; + asoc->shkey = ep_key; /* Update send queue in case any chunk already in there now * needs authenticating */ list_for_each_entry(chunk, &asoc->outqueue.out_chunk_list, list) { - if (sctp_auth_send_cid(chunk->chunk_hdr->type, asoc)) + if (sctp_auth_send_cid(chunk->chunk_hdr->type, asoc)) { chunk->auth = 1; + if (!chunk->shkey) { + chunk->shkey = asoc->shkey; + sctp_auth_shkey_hold(chunk->shkey); + } + } } return 0; @@ -431,8 +449,11 @@ struct sctp_shared_key *sctp_auth_get_shkey( /* First search associations set of endpoint pair shared keys */ key_for_each(key, &asoc->endpoint_shared_keys) { - if (key->key_id == key_id) - return key; + if (key->key_id == key_id) { + if (!key->deactivated) + return key; + break; + } } return NULL; @@ -703,16 +724,15 @@ int sctp_auth_recv_cid(enum sctp_cid chunk, const struct sctp_association *asoc) * after the AUTH chunk in the SCTP packet. */ void sctp_auth_calculate_hmac(const struct sctp_association *asoc, - struct sk_buff *skb, - struct sctp_auth_chunk *auth, - gfp_t gfp) + struct sk_buff *skb, struct sctp_auth_chunk *auth, + struct sctp_shared_key *ep_key, gfp_t gfp) { - struct crypto_shash *tfm; struct sctp_auth_bytes *asoc_key; + struct crypto_shash *tfm; __u16 key_id, hmac_id; - __u8 *digest; unsigned char *end; int free_key = 0; + __u8 *digest; /* Extract the info we need: * - hmac id @@ -724,12 +744,7 @@ void sctp_auth_calculate_hmac(const struct sctp_association *asoc, if (key_id == asoc->active_key_id) asoc_key = asoc->asoc_shared_key; else { - struct sctp_shared_key *ep_key; - - ep_key = sctp_auth_get_shkey(asoc, key_id); - if (!ep_key) - return; - + /* ep_key can't be NULL here */ asoc_key = sctp_auth_asoc_create_secret(asoc, ep_key, gfp); if (!asoc_key) return; @@ -829,7 +844,7 @@ int sctp_auth_set_key(struct sctp_endpoint *ep, struct sctp_association *asoc, struct sctp_authkey *auth_key) { - struct sctp_shared_key *cur_key = NULL; + struct sctp_shared_key *cur_key, *shkey; struct sctp_auth_bytes *key; struct list_head *sh_keys; int replace = 0; @@ -842,46 +857,34 @@ int sctp_auth_set_key(struct sctp_endpoint *ep, else sh_keys = &ep->endpoint_shared_keys; - key_for_each(cur_key, sh_keys) { - if (cur_key->key_id == auth_key->sca_keynumber) { + key_for_each(shkey, sh_keys) { + if (shkey->key_id == auth_key->sca_keynumber) { replace = 1; break; } } - /* If we are not replacing a key id, we need to allocate - * a shared key. - */ - if (!replace) { - cur_key = sctp_auth_shkey_create(auth_key->sca_keynumber, - GFP_KERNEL); - if (!cur_key) - return -ENOMEM; - } + cur_key = sctp_auth_shkey_create(auth_key->sca_keynumber, GFP_KERNEL); + if (!cur_key) + return -ENOMEM; /* Create a new key data based on the info passed in */ key = sctp_auth_create_key(auth_key->sca_keylength, GFP_KERNEL); - if (!key) - goto nomem; + if (!key) { + kfree(cur_key); + return -ENOMEM; + } memcpy(key->data, &auth_key->sca_key[0], auth_key->sca_keylength); + cur_key->key = key; - /* If we are replacing, remove the old keys data from the - * key id. If we are adding new key id, add it to the - * list. - */ - if (replace) - sctp_auth_key_put(cur_key->key); - else - list_add(&cur_key->key_list, sh_keys); + if (replace) { + list_del_init(&shkey->key_list); + sctp_auth_shkey_release(shkey); + } + list_add(&cur_key->key_list, sh_keys); - cur_key->key = key; return 0; -nomem: - if (!replace) - sctp_auth_shkey_free(cur_key); - - return -ENOMEM; } int sctp_auth_set_active_key(struct sctp_endpoint *ep, @@ -905,7 +908,7 @@ int sctp_auth_set_active_key(struct sctp_endpoint *ep, } } - if (!found) + if (!found || key->deactivated) return -EINVAL; if (asoc) { @@ -952,7 +955,58 @@ int sctp_auth_del_key_id(struct sctp_endpoint *ep, /* Delete the shared key */ list_del_init(&key->key_list); - sctp_auth_shkey_free(key); + sctp_auth_shkey_release(key); + + return 0; +} + +int sctp_auth_deact_key_id(struct sctp_endpoint *ep, + struct sctp_association *asoc, __u16 key_id) +{ + struct sctp_shared_key *key; + struct list_head *sh_keys; + int found = 0; + + /* The key identifier MUST NOT be the current active key + * The key identifier MUST correst to an existing key + */ + if (asoc) { + if (asoc->active_key_id == key_id) + return -EINVAL; + + sh_keys = &asoc->endpoint_shared_keys; + } else { + if (ep->active_key_id == key_id) + return -EINVAL; + + sh_keys = &ep->endpoint_shared_keys; + } + + key_for_each(key, sh_keys) { + if (key->key_id == key_id) { + found = 1; + break; + } + } + + if (!found) + return -EINVAL; + + /* refcnt == 1 and !list_empty mean it's not being used anywhere + * and deactivated will be set, so it's time to notify userland + * that this shkey can be freed. + */ + if (asoc && !list_empty(&key->key_list) && + refcount_read(&key->refcnt) == 1) { + struct sctp_ulpevent *ev; + + ev = sctp_ulpevent_make_authkey(asoc, key->key_id, + SCTP_AUTH_FREE_KEY, GFP_KERNEL); + if (ev) + asoc->stream.si->enqueue_event(&asoc->ulpq, ev); + } + + key->deactivated = 1; return 0; } diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c index 991a530c6b31..f889a84f264d 100644 --- a/net/sctp/chunk.c +++ b/net/sctp/chunk.c @@ -168,6 +168,7 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc, { size_t len, first_len, max_data, remaining; size_t msg_len = iov_iter_count(from); + struct sctp_shared_key *shkey = NULL; struct list_head *pos, *temp; struct sctp_chunk *chunk; struct sctp_datamsg *msg; @@ -204,6 +205,17 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc, if (hmac_desc) max_data -= SCTP_PAD4(sizeof(struct sctp_auth_chunk) + hmac_desc->hmac_len); + + if (sinfo->sinfo_tsn && + sinfo->sinfo_ssn != asoc->active_key_id) { + shkey = sctp_auth_get_shkey(asoc, sinfo->sinfo_ssn); + if (!shkey) { + err = -EINVAL; + goto errout; + } + } else { + shkey = asoc->shkey; + } } /* Check what's our max considering the above */ @@ -275,6 +287,8 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc, if (err < 0) goto errout_chunk_free; + chunk->shkey = shkey; + /* Put the chunk->skb back into the form expected by send. */ __skb_pull(chunk->skb, (__u8 *)chunk->chunk_hdr - chunk->skb->data); diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c index 8b3146816519..e2f5a3ee41a7 100644 --- a/net/sctp/endpointola.c +++ b/net/sctp/endpointola.c @@ -349,8 +349,8 @@ out: /* Look for any peeled off association from the endpoint that matches the * given peer address. */ -int sctp_endpoint_is_peeled_off(struct sctp_endpoint *ep, - const union sctp_addr *paddr) +bool sctp_endpoint_is_peeled_off(struct sctp_endpoint *ep, + const union sctp_addr *paddr) { struct sctp_sockaddr_entry *addr; struct sctp_bind_addr *bp; @@ -362,10 +362,10 @@ int sctp_endpoint_is_peeled_off(struct sctp_endpoint *ep, */ list_for_each_entry(addr, &bp->address_list, list) { if (sctp_has_association(net, &addr->a, paddr)) - return 1; + return true; } - return 0; + return false; } /* Do delayed input processing. This is scheduled by sctp_rcv(). diff --git a/net/sctp/input.c b/net/sctp/input.c index 0247cc432e02..ba8a6e6c36fa 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -106,6 +106,7 @@ int sctp_rcv(struct sk_buff *skb) int family; struct sctp_af *af; struct net *net = dev_net(skb->dev); + bool is_gso = skb_is_gso(skb) && skb_is_gso_sctp(skb); if (skb->pkt_type != PACKET_HOST) goto discard_it; @@ -123,8 +124,7 @@ int sctp_rcv(struct sk_buff *skb) * it's better to just linearize it otherwise crc computing * takes longer. */ - if ((!(skb_shinfo(skb)->gso_type & SKB_GSO_SCTP) && - skb_linearize(skb)) || + if ((!is_gso && skb_linearize(skb)) || !pskb_may_pull(skb, sizeof(struct sctphdr))) goto discard_it; @@ -135,7 +135,7 @@ int sctp_rcv(struct sk_buff *skb) if (skb_csum_unnecessary(skb)) __skb_decr_checksum_unnecessary(skb); else if (!sctp_checksum_disable && - !(skb_shinfo(skb)->gso_type & SKB_GSO_SCTP) && + !is_gso && sctp_rcv_checksum(net, skb) < 0) goto discard_it; skb->csum_valid = 1; @@ -1010,19 +1010,18 @@ struct sctp_association *sctp_lookup_association(struct net *net, } /* Is there an association matching the given local and peer addresses? */ -int sctp_has_association(struct net *net, - const union sctp_addr *laddr, - const union sctp_addr *paddr) +bool sctp_has_association(struct net *net, + const union sctp_addr *laddr, + const union sctp_addr *paddr) { - struct sctp_association *asoc; struct sctp_transport *transport; - if ((asoc = sctp_lookup_association(net, laddr, paddr, &transport))) { + if (sctp_lookup_association(net, laddr, paddr, &transport)) { sctp_transport_put(transport); - return 1; + return true; } - return 0; + return false; } /* @@ -1218,7 +1217,7 @@ static struct sctp_association *__sctp_rcv_lookup_harder(struct net *net, * issue as packets hitting this are mostly INIT or INIT-ACK and * those cannot be on GSO-style anyway. */ - if ((skb_shinfo(skb)->gso_type & SKB_GSO_SCTP) == SKB_GSO_SCTP) + if (skb_is_gso(skb) && skb_is_gso_sctp(skb)) return NULL; ch = (struct sctp_chunkhdr *)skb->data; diff --git a/net/sctp/inqueue.c b/net/sctp/inqueue.c index 48392552ee7c..23ebc5318edc 100644 --- a/net/sctp/inqueue.c +++ b/net/sctp/inqueue.c @@ -170,7 +170,7 @@ next_chunk: chunk = list_entry(entry, struct sctp_chunk, list); - if ((skb_shinfo(chunk->skb)->gso_type & SKB_GSO_SCTP) == SKB_GSO_SCTP) { + if (skb_is_gso(chunk->skb) && skb_is_gso_sctp(chunk->skb)) { /* GSO-marked skbs but without frags, handle * them normally */ diff --git a/net/sctp/objcnt.c b/net/sctp/objcnt.c index aeea6da81441..fd2684ad94c8 100644 --- a/net/sctp/objcnt.c +++ b/net/sctp/objcnt.c @@ -130,11 +130,3 @@ void sctp_dbg_objcnt_init(struct net *net) if (!ent) pr_warn("sctp_dbg_objcnt: Unable to create /proc entry.\n"); } - -/* Cleanup the objcount entry in the proc filesystem. */ -void sctp_dbg_objcnt_exit(struct net *net) -{ - remove_proc_entry("sctp_dbg_objcnt", net->sctp.proc_net_sctp); -} - - diff --git a/net/sctp/offload.c b/net/sctp/offload.c index 35bc7106d182..123e9f2dc226 100644 --- a/net/sctp/offload.c +++ b/net/sctp/offload.c @@ -45,7 +45,7 @@ static struct sk_buff *sctp_gso_segment(struct sk_buff *skb, struct sk_buff *segs = ERR_PTR(-EINVAL); struct sctphdr *sh; - if (!(skb_shinfo(skb)->gso_type & SKB_GSO_SCTP)) + if (!skb_is_gso_sctp(skb)) goto out; sh = sctp_hdr(skb); diff --git a/net/sctp/output.c b/net/sctp/output.c index 01a26ee051e3..d6e1c90cc09a 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -241,10 +241,13 @@ static enum sctp_xmit sctp_packet_bundle_auth(struct sctp_packet *pkt, if (!chunk->auth) return retval; - auth = sctp_make_auth(asoc); + auth = sctp_make_auth(asoc, chunk->shkey->key_id); if (!auth) return retval; + auth->shkey = chunk->shkey; + sctp_auth_shkey_hold(auth->shkey); + retval = __sctp_packet_append_chunk(pkt, auth); if (retval != SCTP_XMIT_OK) @@ -490,7 +493,8 @@ merge: } if (auth) { - sctp_auth_calculate_hmac(tp->asoc, nskb, auth, gfp); + sctp_auth_calculate_hmac(tp->asoc, nskb, auth, + packet->auth->shkey, gfp); /* free auth if no more chunks, or add it back */ if (list_empty(&packet->chunk_list)) sctp_chunk_free(packet->auth); @@ -770,6 +774,16 @@ static enum sctp_xmit sctp_packet_will_fit(struct sctp_packet *packet, enum sctp_xmit retval = SCTP_XMIT_OK; size_t psize, pmtu, maxsize; + /* Don't bundle in this packet if this chunk's auth key doesn't + * match other chunks already enqueued on this packet. Also, + * don't bundle the chunk with auth key if other chunks in this + * packet don't have auth key. + */ + if ((packet->auth && chunk->shkey != packet->auth->shkey) || + (!packet->auth && chunk->shkey && + chunk->chunk_hdr->type != SCTP_CID_AUTH)) + return SCTP_XMIT_PMTU_FULL; + psize = packet->size; if (packet->transport->asoc) pmtu = packet->transport->asoc->pathmtu; diff --git a/net/sctp/proc.c b/net/sctp/proc.c index 537545ebcb0e..1d9ccc6dab2b 100644 --- a/net/sctp/proc.c +++ b/net/sctp/proc.c @@ -101,25 +101,6 @@ static const struct file_operations sctp_snmp_seq_fops = { .release = single_release_net, }; -/* Set up the proc fs entry for 'snmp' object. */ -int __net_init sctp_snmp_proc_init(struct net *net) -{ - struct proc_dir_entry *p; - - p = proc_create("snmp", S_IRUGO, net->sctp.proc_net_sctp, - &sctp_snmp_seq_fops); - if (!p) - return -ENOMEM; - - return 0; -} - -/* Cleanup the proc fs entry for 'snmp' object. */ -void sctp_snmp_proc_exit(struct net *net) -{ - remove_proc_entry("snmp", net->sctp.proc_net_sctp); -} - /* Dump local addresses of an association/endpoint. */ static void sctp_seq_dump_local_addrs(struct seq_file *seq, struct sctp_ep_common *epb) { @@ -259,25 +240,6 @@ static const struct file_operations sctp_eps_seq_fops = { .release = seq_release_net, }; -/* Set up the proc fs entry for 'eps' object. */ -int __net_init sctp_eps_proc_init(struct net *net) -{ - struct proc_dir_entry *p; - - p = proc_create("eps", S_IRUGO, net->sctp.proc_net_sctp, - &sctp_eps_seq_fops); - if (!p) - return -ENOMEM; - - return 0; -} - -/* Cleanup the proc fs entry for 'eps' object. */ -void sctp_eps_proc_exit(struct net *net) -{ - remove_proc_entry("eps", net->sctp.proc_net_sctp); -} - struct sctp_ht_iter { struct seq_net_private p; struct rhashtable_iter hti; @@ -390,25 +352,6 @@ static const struct file_operations sctp_assocs_seq_fops = { .release = seq_release_net, }; -/* Set up the proc fs entry for 'assocs' object. */ -int __net_init sctp_assocs_proc_init(struct net *net) -{ - struct proc_dir_entry *p; - - p = proc_create("assocs", S_IRUGO, net->sctp.proc_net_sctp, - &sctp_assocs_seq_fops); - if (!p) - return -ENOMEM; - - return 0; -} - -/* Cleanup the proc fs entry for 'assocs' object. */ -void sctp_assocs_proc_exit(struct net *net) -{ - remove_proc_entry("assocs", net->sctp.proc_net_sctp); -} - static int sctp_remaddr_seq_show(struct seq_file *seq, void *v) { struct sctp_association *assoc; @@ -488,12 +431,6 @@ static const struct seq_operations sctp_remaddr_ops = { .show = sctp_remaddr_seq_show, }; -/* Cleanup the proc fs entry for 'remaddr' object. */ -void sctp_remaddr_proc_exit(struct net *net) -{ - remove_proc_entry("remaddr", net->sctp.proc_net_sctp); -} - static int sctp_remaddr_seq_open(struct inode *inode, struct file *file) { return seq_open_net(inode, file, &sctp_remaddr_ops, @@ -507,13 +444,28 @@ static const struct file_operations sctp_remaddr_seq_fops = { .release = seq_release_net, }; -int __net_init sctp_remaddr_proc_init(struct net *net) +/* Set up the proc fs entry for the SCTP protocol. */ +int __net_init sctp_proc_init(struct net *net) { - struct proc_dir_entry *p; - - p = proc_create("remaddr", S_IRUGO, net->sctp.proc_net_sctp, - &sctp_remaddr_seq_fops); - if (!p) + net->sctp.proc_net_sctp = proc_net_mkdir(net, "sctp", net->proc_net); + if (!net->sctp.proc_net_sctp) return -ENOMEM; + if (!proc_create("snmp", 0444, net->sctp.proc_net_sctp, + &sctp_snmp_seq_fops)) + goto cleanup; + if (!proc_create("eps", 0444, net->sctp.proc_net_sctp, + &sctp_eps_seq_fops)) + goto cleanup; + if (!proc_create("assocs", 0444, net->sctp.proc_net_sctp, + &sctp_assocs_seq_fops)) + goto cleanup; + if (!proc_create("remaddr", 0444, net->sctp.proc_net_sctp, + &sctp_remaddr_seq_fops)) + goto cleanup; return 0; + +cleanup: + remove_proc_subtree("sctp", net->proc_net); + net->sctp.proc_net_sctp = NULL; + return -ENOMEM; } diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 91813e686c67..a24cde236330 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -80,56 +80,6 @@ long sysctl_sctp_mem[3]; int sysctl_sctp_rmem[3]; int sysctl_sctp_wmem[3]; -/* Set up the proc fs entry for the SCTP protocol. */ -static int __net_init sctp_proc_init(struct net *net) -{ -#ifdef CONFIG_PROC_FS - net->sctp.proc_net_sctp = proc_net_mkdir(net, "sctp", net->proc_net); - if (!net->sctp.proc_net_sctp) - goto out_proc_net_sctp; - if (sctp_snmp_proc_init(net)) - goto out_snmp_proc_init; - if (sctp_eps_proc_init(net)) - goto out_eps_proc_init; - if (sctp_assocs_proc_init(net)) - goto out_assocs_proc_init; - if (sctp_remaddr_proc_init(net)) - goto out_remaddr_proc_init; - - return 0; - -out_remaddr_proc_init: - sctp_assocs_proc_exit(net); -out_assocs_proc_init: - sctp_eps_proc_exit(net); -out_eps_proc_init: - sctp_snmp_proc_exit(net); -out_snmp_proc_init: - remove_proc_entry("sctp", net->proc_net); - net->sctp.proc_net_sctp = NULL; -out_proc_net_sctp: - return -ENOMEM; -#endif /* CONFIG_PROC_FS */ - return 0; -} - -/* Clean up the proc fs entry for the SCTP protocol. - * Note: Do not make this __exit as it is used in the init error - * path. - */ -static void sctp_proc_exit(struct net *net) -{ -#ifdef CONFIG_PROC_FS - sctp_snmp_proc_exit(net); - sctp_eps_proc_exit(net); - sctp_assocs_proc_exit(net); - sctp_remaddr_proc_exit(net); - - remove_proc_entry("sctp", net->proc_net); - net->sctp.proc_net_sctp = NULL; -#endif -} - /* Private helper to extract ipv4 address and stash them in * the protocol structure. */ @@ -1285,10 +1235,12 @@ static int __net_init sctp_defaults_init(struct net *net) if (status) goto err_init_mibs; +#ifdef CONFIG_PROC_FS /* Initialize proc fs directory. */ status = sctp_proc_init(net); if (status) goto err_init_proc; +#endif sctp_dbg_objcnt_init(net); @@ -1306,8 +1258,10 @@ static int __net_init sctp_defaults_init(struct net *net) return 0; +#ifdef CONFIG_PROC_FS err_init_proc: cleanup_sctp_mibs(net); +#endif err_init_mibs: sctp_sysctl_net_unregister(net); err_sysctl_register: @@ -1320,9 +1274,10 @@ static void __net_exit sctp_defaults_exit(struct net *net) sctp_free_addr_wq(net); sctp_free_local_addr_list(net); - sctp_dbg_objcnt_exit(net); - - sctp_proc_exit(net); +#ifdef CONFIG_PROC_FS + remove_proc_subtree("sctp", net->proc_net); + net->sctp.proc_net_sctp = NULL; +#endif cleanup_sctp_mibs(net); sctp_sysctl_net_unregister(net); } diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index d01475f5f710..cc20bc39ee7c 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -87,7 +87,28 @@ static void *sctp_addto_chunk_fixed(struct sctp_chunk *, int len, /* Control chunk destructor */ static void sctp_control_release_owner(struct sk_buff *skb) { - /*TODO: do memory release */ + struct sctp_chunk *chunk = skb_shinfo(skb)->destructor_arg; + + if (chunk->shkey) { + struct sctp_shared_key *shkey = chunk->shkey; + struct sctp_association *asoc = chunk->asoc; + + /* refcnt == 2 and !list_empty mean after this release, it's + * not being used anywhere, and it's time to notify userland + * that this shkey can be freed if it's been deactivated. + */ + if (shkey->deactivated && !list_empty(&shkey->key_list) && + refcount_read(&shkey->refcnt) == 2) { + struct sctp_ulpevent *ev; + + ev = sctp_ulpevent_make_authkey(asoc, shkey->key_id, + SCTP_AUTH_FREE_KEY, + GFP_KERNEL); + if (ev) + asoc->stream.si->enqueue_event(&asoc->ulpq, ev); + } + sctp_auth_shkey_release(chunk->shkey); + } } static void sctp_control_set_owner_w(struct sctp_chunk *chunk) @@ -102,7 +123,12 @@ static void sctp_control_set_owner_w(struct sctp_chunk *chunk) * * For now don't do anything for now. */ + if (chunk->auth) { + chunk->shkey = asoc->shkey; + sctp_auth_shkey_hold(chunk->shkey); + } skb->sk = asoc ? asoc->base.sk : NULL; + skb_shinfo(skb)->destructor_arg = chunk; skb->destructor = sctp_control_release_owner; } @@ -1271,7 +1297,8 @@ nodata: return retval; } -struct sctp_chunk *sctp_make_auth(const struct sctp_association *asoc) +struct sctp_chunk *sctp_make_auth(const struct sctp_association *asoc, + __u16 key_id) { struct sctp_authhdr auth_hdr; struct sctp_hmac *hmac_desc; @@ -1289,7 +1316,7 @@ struct sctp_chunk *sctp_make_auth(const struct sctp_association *asoc) return NULL; auth_hdr.hmac_id = htons(hmac_desc->hmac_id); - auth_hdr.shkey_id = htons(asoc->active_key_id); + auth_hdr.shkey_id = htons(key_id); retval->subh.auth_hdr = sctp_addto_chunk(retval, sizeof(auth_hdr), &auth_hdr); diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index b71e7fb0a20a..298112ca8c06 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -1049,6 +1049,16 @@ static void sctp_cmd_assoc_change(struct sctp_cmd_seq *commands, asoc->stream.si->enqueue_event(&asoc->ulpq, ev); } +static void sctp_cmd_peer_no_auth(struct sctp_cmd_seq *commands, + struct sctp_association *asoc) +{ + struct sctp_ulpevent *ev; + + ev = sctp_ulpevent_make_authkey(asoc, 0, SCTP_AUTH_NO_AUTH, GFP_ATOMIC); + if (ev) + asoc->stream.si->enqueue_event(&asoc->ulpq, ev); +} + /* Helper function to generate an adaptation indication event */ static void sctp_cmd_adaptation_ind(struct sctp_cmd_seq *commands, struct sctp_association *asoc) @@ -1755,6 +1765,9 @@ static int sctp_cmd_interpreter(enum sctp_event event_type, case SCTP_CMD_ADAPTATION_IND: sctp_cmd_adaptation_ind(commands, asoc); break; + case SCTP_CMD_PEER_NO_AUTH: + sctp_cmd_peer_no_auth(commands, asoc); + break; case SCTP_CMD_ASSOC_SHKEY: error = sctp_auth_asoc_init_active_key(asoc, diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index eb7905ffe5f2..cc56a67dbb4d 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -659,7 +659,7 @@ enum sctp_disposition sctp_sf_do_5_1D_ce(struct net *net, void *arg, struct sctp_cmd_seq *commands) { - struct sctp_ulpevent *ev, *ai_ev = NULL; + struct sctp_ulpevent *ev, *ai_ev = NULL, *auth_ev = NULL; struct sctp_association *new_asoc; struct sctp_init_chunk *peer_init; struct sctp_chunk *chunk = arg; @@ -820,6 +820,14 @@ enum sctp_disposition sctp_sf_do_5_1D_ce(struct net *net, goto nomem_aiev; } + if (!new_asoc->peer.auth_capable) { + auth_ev = sctp_ulpevent_make_authkey(new_asoc, 0, + SCTP_AUTH_NO_AUTH, + GFP_ATOMIC); + if (!auth_ev) + goto nomem_authev; + } + /* Add all the state machine commands now since we've created * everything. This way we don't introduce memory corruptions * during side-effect processing and correclty count established @@ -847,8 +855,14 @@ enum sctp_disposition sctp_sf_do_5_1D_ce(struct net *net, sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, SCTP_ULPEVENT(ai_ev)); + if (auth_ev) + sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, + SCTP_ULPEVENT(auth_ev)); + return SCTP_DISPOSITION_CONSUME; +nomem_authev: + sctp_ulpevent_free(ai_ev); nomem_aiev: sctp_ulpevent_free(ev); nomem_ev: @@ -953,6 +967,15 @@ enum sctp_disposition sctp_sf_do_5_1E_ca(struct net *net, SCTP_ULPEVENT(ev)); } + if (!asoc->peer.auth_capable) { + ev = sctp_ulpevent_make_authkey(asoc, 0, SCTP_AUTH_NO_AUTH, + GFP_ATOMIC); + if (!ev) + goto nomem; + sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, + SCTP_ULPEVENT(ev)); + } + return SCTP_DISPOSITION_CONSUME; nomem: return SCTP_DISPOSITION_NOMEM; @@ -1908,6 +1931,9 @@ static enum sctp_disposition sctp_sf_do_dupcook_b( if (asoc->peer.adaptation_ind) sctp_add_cmd_sf(commands, SCTP_CMD_ADAPTATION_IND, SCTP_NULL()); + if (!asoc->peer.auth_capable) + sctp_add_cmd_sf(commands, SCTP_CMD_PEER_NO_AUTH, SCTP_NULL()); + return SCTP_DISPOSITION_CONSUME; nomem: @@ -1954,7 +1980,7 @@ static enum sctp_disposition sctp_sf_do_dupcook_d( struct sctp_cmd_seq *commands, struct sctp_association *new_asoc) { - struct sctp_ulpevent *ev = NULL, *ai_ev = NULL; + struct sctp_ulpevent *ev = NULL, *ai_ev = NULL, *auth_ev = NULL; struct sctp_chunk *repl; /* Clarification from Implementor's Guide: @@ -2001,6 +2027,14 @@ static enum sctp_disposition sctp_sf_do_dupcook_d( goto nomem; } + + if (!asoc->peer.auth_capable) { + auth_ev = sctp_ulpevent_make_authkey(asoc, 0, + SCTP_AUTH_NO_AUTH, + GFP_ATOMIC); + if (!auth_ev) + goto nomem; + } } repl = sctp_make_cookie_ack(new_asoc, chunk); @@ -2015,10 +2049,15 @@ static enum sctp_disposition sctp_sf_do_dupcook_d( if (ai_ev) sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, SCTP_ULPEVENT(ai_ev)); + if (auth_ev) + sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, + SCTP_ULPEVENT(auth_ev)); return SCTP_DISPOSITION_CONSUME; nomem: + if (auth_ev) + sctp_ulpevent_free(auth_ev); if (ai_ev) sctp_ulpevent_free(ai_ev); if (ev) @@ -4114,6 +4153,7 @@ static enum sctp_ierror sctp_sf_authenticate( const union sctp_subtype type, struct sctp_chunk *chunk) { + struct sctp_shared_key *sh_key = NULL; struct sctp_authhdr *auth_hdr; __u8 *save_digest, *digest; struct sctp_hmac *hmac; @@ -4135,9 +4175,11 @@ static enum sctp_ierror sctp_sf_authenticate( * configured */ key_id = ntohs(auth_hdr->shkey_id); - if (key_id != asoc->active_key_id && !sctp_auth_get_shkey(asoc, key_id)) - return SCTP_IERROR_AUTH_BAD_KEYID; - + if (key_id != asoc->active_key_id) { + sh_key = sctp_auth_get_shkey(asoc, key_id); + if (!sh_key) + return SCTP_IERROR_AUTH_BAD_KEYID; + } /* Make sure that the length of the signature matches what * we expect. @@ -4166,7 +4208,7 @@ static enum sctp_ierror sctp_sf_authenticate( sctp_auth_calculate_hmac(asoc, chunk->skb, (struct sctp_auth_chunk *)chunk->chunk_hdr, - GFP_ATOMIC); + sh_key, GFP_ATOMIC); /* Discard the packet if the digests do not match */ if (memcmp(save_digest, digest, sig_len)) { @@ -4243,7 +4285,7 @@ enum sctp_disposition sctp_sf_eat_auth(struct net *net, struct sctp_ulpevent *ev; ev = sctp_ulpevent_make_authkey(asoc, ntohs(auth_hdr->shkey_id), - SCTP_AUTH_NEWKEY, GFP_ATOMIC); + SCTP_AUTH_NEW_KEY, GFP_ATOMIC); if (!ev) return -ENOMEM; diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 7fa76031bb08..7a10ae3c3d82 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -156,6 +156,9 @@ static inline void sctp_set_owner_w(struct sctp_chunk *chunk) /* The sndbuf space is tracked per association. */ sctp_association_hold(asoc); + if (chunk->shkey) + sctp_auth_shkey_hold(chunk->shkey); + skb_set_owner_w(chunk->skb, sk); chunk->skb->destructor = sctp_wfree; @@ -1644,6 +1647,12 @@ static int sctp_sendmsg_parse(struct sock *sk, struct sctp_cmsgs *cmsgs, srinfo->sinfo_assoc_id = cmsgs->sinfo->snd_assoc_id; } + if (cmsgs->prinfo) { + srinfo->sinfo_timetolive = cmsgs->prinfo->pr_value; + SCTP_PR_SET_POLICY(srinfo->sinfo_flags, + cmsgs->prinfo->pr_policy); + } + sflags = srinfo->sinfo_flags; if (!sflags && msg_len) return 0; @@ -1670,7 +1679,8 @@ static int sctp_sendmsg_new_asoc(struct sock *sk, __u16 sflags, struct net *net = sock_net(sk); struct sctp_association *asoc; enum sctp_scope scope; - int err = -EINVAL; + struct cmsghdr *cmsg; + int err; *tp = NULL; @@ -1735,6 +1745,71 @@ static int sctp_sendmsg_new_asoc(struct sock *sk, __u16 sflags, goto free; } + if (!cmsgs->addrs_msg) + return 0; + + /* sendv addr list parse */ + for_each_cmsghdr(cmsg, cmsgs->addrs_msg) { + struct sctp_transport *transport; + struct sctp_association *old; + union sctp_addr _daddr; + int dlen; + + if (cmsg->cmsg_level != IPPROTO_SCTP || + (cmsg->cmsg_type != SCTP_DSTADDRV4 && + cmsg->cmsg_type != SCTP_DSTADDRV6)) + continue; + + daddr = &_daddr; + memset(daddr, 0, sizeof(*daddr)); + dlen = cmsg->cmsg_len - sizeof(struct cmsghdr); + if (cmsg->cmsg_type == SCTP_DSTADDRV4) { + if (dlen < sizeof(struct in_addr)) { + err = -EINVAL; + goto free; + } + + dlen = sizeof(struct in_addr); + daddr->v4.sin_family = AF_INET; + daddr->v4.sin_port = htons(asoc->peer.port); + memcpy(&daddr->v4.sin_addr, CMSG_DATA(cmsg), dlen); + } else { + if (dlen < sizeof(struct in6_addr)) { + err = -EINVAL; + goto free; + } + + dlen = sizeof(struct in6_addr); + daddr->v6.sin6_family = AF_INET6; + daddr->v6.sin6_port = htons(asoc->peer.port); + memcpy(&daddr->v6.sin6_addr, CMSG_DATA(cmsg), dlen); + } + err = sctp_verify_addr(sk, daddr, sizeof(*daddr)); + if (err) + goto free; + + old = sctp_endpoint_lookup_assoc(ep, daddr, &transport); + if (old && old != asoc) { + if (old->state >= SCTP_STATE_ESTABLISHED) + err = -EISCONN; + else + err = -EALREADY; + goto free; + } + + if (sctp_endpoint_is_peeled_off(ep, daddr)) { + err = -EADDRNOTAVAIL; + goto free; + } + + transport = sctp_assoc_add_peer(asoc, daddr, GFP_KERNEL, + SCTP_UNKNOWN); + if (!transport) { + err = -ENOMEM; + goto free; + } + } + return 0; free: @@ -1752,6 +1827,10 @@ static int sctp_sendmsg_check_sflags(struct sctp_association *asoc, if (sctp_state(asoc, CLOSED) && sctp_style(sk, TCP)) return -EPIPE; + if ((sflags & SCTP_SENDALL) && sctp_style(sk, UDP) && + !sctp_state(asoc, ESTABLISHED)) + return 0; + if (sflags & SCTP_EOF) { pr_debug("%s: shutting down association:%p\n", __func__, asoc); sctp_primitive_SHUTDOWN(net, asoc, NULL); @@ -1804,6 +1883,19 @@ static int sctp_sendmsg_to_asoc(struct sctp_association *asoc, goto err; } + if (asoc->pmtu_pending) + sctp_assoc_pending_pmtu(asoc); + + if (sctp_wspace(asoc) < msg_len) + sctp_prsctp_prune(asoc, sinfo, msg_len - sctp_wspace(asoc)); + + if (!sctp_wspace(asoc)) { + timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); + err = sctp_wait_for_sndbuf(asoc, &timeo, msg_len); + if (err) + goto err; + } + if (sctp_state(asoc, CLOSED)) { err = sctp_primitive_ASSOCIATE(net, asoc, NULL); if (err) @@ -1821,19 +1913,6 @@ static int sctp_sendmsg_to_asoc(struct sctp_association *asoc, pr_debug("%s: we associated primitively\n", __func__); } - if (asoc->pmtu_pending) - sctp_assoc_pending_pmtu(asoc); - - if (sctp_wspace(asoc) < msg_len) - sctp_prsctp_prune(asoc, sinfo, msg_len - sctp_wspace(asoc)); - - if (!sctp_wspace(asoc)) { - timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); - err = sctp_wait_for_sndbuf(asoc, &timeo, msg_len); - if (err) - goto err; - } - datamsg = sctp_datamsg_from_user(asoc, sinfo, &msg->msg_iter); if (IS_ERR(datamsg)) { err = PTR_ERR(datamsg); @@ -1901,10 +1980,21 @@ static void sctp_sendmsg_update_sinfo(struct sctp_association *asoc, sinfo->sinfo_ppid = asoc->default_ppid; sinfo->sinfo_context = asoc->default_context; sinfo->sinfo_assoc_id = sctp_assoc2id(asoc); + + if (!cmsgs->prinfo) + sinfo->sinfo_flags = asoc->default_flags; } - if (!cmsgs->srinfo) + if (!cmsgs->srinfo && !cmsgs->prinfo) sinfo->sinfo_timetolive = asoc->default_timetolive; + + if (cmsgs->authinfo) { + /* Reuse sinfo_tsn to indicate that authinfo was set and + * sinfo_ssn to save the keyid on tx path. + */ + sinfo->sinfo_tsn = 1; + sinfo->sinfo_ssn = cmsgs->authinfo->auth_keynumber; + } } static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len) @@ -1936,6 +2026,29 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len) lock_sock(sk); + /* SCTP_SENDALL process */ + if ((sflags & SCTP_SENDALL) && sctp_style(sk, UDP)) { + list_for_each_entry(asoc, &ep->asocs, asocs) { + err = sctp_sendmsg_check_sflags(asoc, sflags, msg, + msg_len); + if (err == 0) + continue; + if (err < 0) + goto out_unlock; + + sctp_sendmsg_update_sinfo(asoc, sinfo, &cmsgs); + + err = sctp_sendmsg_to_asoc(asoc, msg, msg_len, + NULL, sinfo); + if (err < 0) + goto out_unlock; + + iov_iter_revert(&msg->msg_iter, err); + } + + goto out_unlock; + } + /* Get and check or create asoc */ if (daddr) { asoc = sctp_endpoint_lookup_assoc(ep, daddr, &transport); @@ -3534,6 +3647,33 @@ static int sctp_setsockopt_del_key(struct sock *sk, } /* + * 8.3.4 Deactivate a Shared Key (SCTP_AUTH_DEACTIVATE_KEY) + * + * This set option will deactivate a shared secret key. + */ +static int sctp_setsockopt_deactivate_key(struct sock *sk, char __user *optval, + unsigned int optlen) +{ + struct sctp_endpoint *ep = sctp_sk(sk)->ep; + struct sctp_authkeyid val; + struct sctp_association *asoc; + + if (!ep->auth_enable) + return -EACCES; + + if (optlen != sizeof(struct sctp_authkeyid)) + return -EINVAL; + if (copy_from_user(&val, optval, optlen)) + return -EFAULT; + + asoc = sctp_id2assoc(sk, val.scact_assoc_id); + if (!asoc && val.scact_assoc_id && sctp_style(sk, UDP)) + return -EINVAL; + + return sctp_auth_deact_key_id(ep, asoc, val.scact_keynumber); +} + +/* * 8.1.23 SCTP_AUTO_ASCONF * * This option will enable or disable the use of the automatic generation of @@ -4125,6 +4265,9 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname, case SCTP_AUTH_DELETE_KEY: retval = sctp_setsockopt_del_key(sk, optval, optlen); break; + case SCTP_AUTH_DEACTIVATE_KEY: + retval = sctp_setsockopt_deactivate_key(sk, optval, optlen); + break; case SCTP_AUTO_ASCONF: retval = sctp_setsockopt_auto_asconf(sk, optval, optlen); break; @@ -7099,6 +7242,7 @@ static int sctp_getsockopt(struct sock *sk, int level, int optname, case SCTP_AUTH_KEY: case SCTP_AUTH_CHUNK: case SCTP_AUTH_DELETE_KEY: + case SCTP_AUTH_DEACTIVATE_KEY: retval = -EOPNOTSUPP; break; case SCTP_HMAC_IDENT: @@ -7721,8 +7865,8 @@ static int sctp_msghdr_parse(const struct msghdr *msg, struct sctp_cmsgs *cmsgs) if (cmsgs->srinfo->sinfo_flags & ~(SCTP_UNORDERED | SCTP_ADDR_OVER | - SCTP_SACK_IMMEDIATELY | SCTP_PR_SCTP_MASK | - SCTP_ABORT | SCTP_EOF)) + SCTP_SACK_IMMEDIATELY | SCTP_SENDALL | + SCTP_PR_SCTP_MASK | SCTP_ABORT | SCTP_EOF)) return -EINVAL; break; @@ -7745,10 +7889,60 @@ static int sctp_msghdr_parse(const struct msghdr *msg, struct sctp_cmsgs *cmsgs) if (cmsgs->sinfo->snd_flags & ~(SCTP_UNORDERED | SCTP_ADDR_OVER | - SCTP_SACK_IMMEDIATELY | SCTP_PR_SCTP_MASK | - SCTP_ABORT | SCTP_EOF)) + SCTP_SACK_IMMEDIATELY | SCTP_SENDALL | + SCTP_PR_SCTP_MASK | SCTP_ABORT | SCTP_EOF)) return -EINVAL; break; + case SCTP_PRINFO: + /* SCTP Socket API Extension + * 5.3.7 SCTP PR-SCTP Information Structure (SCTP_PRINFO) + * + * This cmsghdr structure specifies SCTP options for sendmsg(). + * + * cmsg_level cmsg_type cmsg_data[] + * ------------ ------------ --------------------- + * IPPROTO_SCTP SCTP_PRINFO struct sctp_prinfo + */ + if (cmsg->cmsg_len != CMSG_LEN(sizeof(struct sctp_prinfo))) + return -EINVAL; + + cmsgs->prinfo = CMSG_DATA(cmsg); + if (cmsgs->prinfo->pr_policy & ~SCTP_PR_SCTP_MASK) + return -EINVAL; + + if (cmsgs->prinfo->pr_policy == SCTP_PR_SCTP_NONE) + cmsgs->prinfo->pr_value = 0; + break; + case SCTP_AUTHINFO: + /* SCTP Socket API Extension + * 5.3.8 SCTP AUTH Information Structure (SCTP_AUTHINFO) + * + * This cmsghdr structure specifies SCTP options for sendmsg(). + * + * cmsg_level cmsg_type cmsg_data[] + * ------------ ------------ --------------------- + * IPPROTO_SCTP SCTP_AUTHINFO struct sctp_authinfo + */ + if (cmsg->cmsg_len != CMSG_LEN(sizeof(struct sctp_authinfo))) + return -EINVAL; + + cmsgs->authinfo = CMSG_DATA(cmsg); + break; + case SCTP_DSTADDRV4: + case SCTP_DSTADDRV6: + /* SCTP Socket API Extension + * 5.3.9/10 SCTP Destination IPv4/6 Address Structure (SCTP_DSTADDRV4/6) + * + * This cmsghdr structure specifies SCTP options for sendmsg(). + * + * cmsg_level cmsg_type cmsg_data[] + * ------------ ------------ --------------------- + * IPPROTO_SCTP SCTP_DSTADDRV4 struct in_addr + * ------------ ------------ --------------------- + * IPPROTO_SCTP SCTP_DSTADDRV6 struct in6_addr + */ + cmsgs->addrs_msg = my_msg; + break; default: return -EINVAL; } @@ -7972,6 +8166,26 @@ static void sctp_wfree(struct sk_buff *skb) sk->sk_wmem_queued -= skb->truesize; sk_mem_uncharge(sk, skb->truesize); + if (chunk->shkey) { + struct sctp_shared_key *shkey = chunk->shkey; + + /* refcnt == 2 and !list_empty mean after this release, it's + * not being used anywhere, and it's time to notify userland + * that this shkey can be freed if it's been deactivated. + */ + if (shkey->deactivated && !list_empty(&shkey->key_list) && + refcount_read(&shkey->refcnt) == 2) { + struct sctp_ulpevent *ev; + + ev = sctp_ulpevent_make_authkey(asoc, shkey->key_id, + SCTP_AUTH_FREE_KEY, + GFP_KERNEL); + if (ev) + asoc->stream.si->enqueue_event(&asoc->ulpq, ev); + } + sctp_auth_shkey_release(chunk->shkey); + } + sock_wfree(skb); sctp_wake_up_waiters(sk, asoc); diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 26684e086750..5f8046c62d90 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -7,12 +7,11 @@ * applicable with RoCE-cards only * * Initial restrictions: - * - IPv6 support postponed * - support for alternate links postponed * - partial support for non-blocking sockets only * - support for urgent data postponed * - * Copyright IBM Corp. 2016 + * Copyright IBM Corp. 2016, 2018 * * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com> * based on prototype from Frank Blaschka @@ -64,6 +63,10 @@ static struct smc_hashinfo smc_v4_hashinfo = { .lock = __RW_LOCK_UNLOCKED(smc_v4_hashinfo.lock), }; +static struct smc_hashinfo smc_v6_hashinfo = { + .lock = __RW_LOCK_UNLOCKED(smc_v6_hashinfo.lock), +}; + int smc_hash_sk(struct sock *sk) { struct smc_hashinfo *h = sk->sk_prot->h.smc_hash; @@ -103,6 +106,18 @@ struct proto smc_proto = { }; EXPORT_SYMBOL_GPL(smc_proto); +struct proto smc_proto6 = { + .name = "SMC6", + .owner = THIS_MODULE, + .keepalive = smc_set_keepalive, + .hash = smc_hash_sk, + .unhash = smc_unhash_sk, + .obj_size = sizeof(struct smc_sock), + .h.smc_hash = &smc_v6_hashinfo, + .slab_flags = SLAB_TYPESAFE_BY_RCU, +}; +EXPORT_SYMBOL_GPL(smc_proto6); + static int smc_release(struct socket *sock) { struct sock *sk = sock->sk; @@ -159,19 +174,22 @@ static void smc_destruct(struct sock *sk) sk_refcnt_debug_dec(sk); } -static struct sock *smc_sock_alloc(struct net *net, struct socket *sock) +static struct sock *smc_sock_alloc(struct net *net, struct socket *sock, + int protocol) { struct smc_sock *smc; + struct proto *prot; struct sock *sk; - sk = sk_alloc(net, PF_SMC, GFP_KERNEL, &smc_proto, 0); + prot = (protocol == SMCPROTO_SMC6) ? &smc_proto6 : &smc_proto; + sk = sk_alloc(net, PF_SMC, GFP_KERNEL, prot, 0); if (!sk) return NULL; sock_init_data(sock, sk); /* sets sk_refcnt to 1 */ sk->sk_state = SMC_INIT; sk->sk_destruct = smc_destruct; - sk->sk_protocol = SMCPROTO_SMC; + sk->sk_protocol = protocol; smc = smc_sk(sk); INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work); INIT_LIST_HEAD(&smc->accept_q); @@ -198,10 +216,13 @@ static int smc_bind(struct socket *sock, struct sockaddr *uaddr, goto out; rc = -EAFNOSUPPORT; + if (addr->sin_family != AF_INET && + addr->sin_family != AF_INET6 && + addr->sin_family != AF_UNSPEC) + goto out; /* accept AF_UNSPEC (mapped to AF_INET) only if s_addr is INADDR_ANY */ - if ((addr->sin_family != AF_INET) && - ((addr->sin_family != AF_UNSPEC) || - (addr->sin_addr.s_addr != htonl(INADDR_ANY)))) + if (addr->sin_family == AF_UNSPEC && + addr->sin_addr.s_addr != htonl(INADDR_ANY)) goto out; lock_sock(sk); @@ -529,7 +550,7 @@ static int smc_connect(struct socket *sock, struct sockaddr *addr, /* separate smc parameter checking to be safe */ if (alen < sizeof(addr->sa_family)) goto out_err; - if (addr->sa_family != AF_INET) + if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6) goto out_err; lock_sock(sk); @@ -571,7 +592,7 @@ static int smc_clcsock_accept(struct smc_sock *lsmc, struct smc_sock **new_smc) int rc; release_sock(lsk); - new_sk = smc_sock_alloc(sock_net(lsk), NULL); + new_sk = smc_sock_alloc(sock_net(lsk), NULL, lsk->sk_protocol); if (!new_sk) { rc = -ENOMEM; lsk->sk_err = ENOMEM; @@ -767,8 +788,6 @@ static void smc_listen_work(struct work_struct *work) struct smc_link *link; int reason_code = 0; int rc = 0; - __be32 subnet; - u8 prefix_len; u8 ibport; /* check if peer is smc capable */ @@ -803,17 +822,11 @@ static void smc_listen_work(struct work_struct *work) goto decline_rdma; } - /* determine subnet and mask from internal TCP socket */ - rc = smc_clc_netinfo_by_tcpsk(newclcsock, &subnet, &prefix_len); - if (rc) { - reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */ - goto decline_rdma; - } - pclc = (struct smc_clc_msg_proposal *)&buf; pclc_prfx = smc_clc_proposal_get_prefix(pclc); - if (pclc_prfx->outgoing_subnet != subnet || - pclc_prfx->prefix_len != prefix_len) { + + rc = smc_clc_prfx_match(newclcsock, pclc_prfx); + if (rc) { reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */ goto decline_rdma; } @@ -970,10 +983,6 @@ out: lsmc->clcsock = NULL; } release_sock(lsk); - /* no more listening, wake up smc_close_wait_listen_clcsock and - * accept - */ - lsk->sk_state_change(lsk); sock_put(&lsmc->sk); /* sock_hold in smc_listen */ } @@ -1375,6 +1384,7 @@ static const struct proto_ops smc_sock_ops = { static int smc_create(struct net *net, struct socket *sock, int protocol, int kern) { + int family = (protocol == SMCPROTO_SMC6) ? PF_INET6 : PF_INET; struct smc_sock *smc; struct sock *sk; int rc; @@ -1384,22 +1394,24 @@ static int smc_create(struct net *net, struct socket *sock, int protocol, goto out; rc = -EPROTONOSUPPORT; - if ((protocol != IPPROTO_IP) && (protocol != IPPROTO_TCP)) + if (protocol != SMCPROTO_SMC && protocol != SMCPROTO_SMC6) goto out; rc = -ENOBUFS; sock->ops = &smc_sock_ops; - sk = smc_sock_alloc(net, sock); + sk = smc_sock_alloc(net, sock, protocol); if (!sk) goto out; /* create internal TCP socket for CLC handshake and fallback */ smc = smc_sk(sk); smc->use_fallback = false; /* assume rdma capability first */ - rc = sock_create_kern(net, PF_INET, SOCK_STREAM, - IPPROTO_TCP, &smc->clcsock); - if (rc) + rc = sock_create_kern(net, family, SOCK_STREAM, IPPROTO_TCP, + &smc->clcsock); + if (rc) { sk_common_release(sk); + goto out; + } smc->sk.sk_sndbuf = max(smc->clcsock->sk->sk_sndbuf, SMC_BUF_MIN_SIZE); smc->sk.sk_rcvbuf = max(smc->clcsock->sk->sk_rcvbuf, SMC_BUF_MIN_SIZE); @@ -1435,16 +1447,23 @@ static int __init smc_init(void) rc = proto_register(&smc_proto, 1); if (rc) { - pr_err("%s: proto_register fails with %d\n", __func__, rc); + pr_err("%s: proto_register(v4) fails with %d\n", __func__, rc); goto out_pnet; } + rc = proto_register(&smc_proto6, 1); + if (rc) { + pr_err("%s: proto_register(v6) fails with %d\n", __func__, rc); + goto out_proto; + } + rc = sock_register(&smc_sock_family_ops); if (rc) { pr_err("%s: sock_register fails with %d\n", __func__, rc); - goto out_proto; + goto out_proto6; } INIT_HLIST_HEAD(&smc_v4_hashinfo.ht); + INIT_HLIST_HEAD(&smc_v6_hashinfo.ht); rc = smc_ib_register_client(); if (rc) { @@ -1457,6 +1476,8 @@ static int __init smc_init(void) out_sock: sock_unregister(PF_SMC); +out_proto6: + proto_unregister(&smc_proto6); out_proto: proto_unregister(&smc_proto); out_pnet: @@ -1475,11 +1496,13 @@ static void __exit smc_exit(void) spin_unlock_bh(&smc_lgr_list.lock); list_for_each_entry_safe(lgr, lg, &lgr_freeing_list, list) { list_del_init(&lgr->list); + cancel_delayed_work_sync(&lgr->free_work); smc_lgr_free(lgr); /* free link group */ } static_branch_disable(&tcp_have_smc); smc_ib_unregister_client(); sock_unregister(PF_SMC); + proto_unregister(&smc_proto6); proto_unregister(&smc_proto); smc_pnet_exit(); } diff --git a/net/smc/smc.h b/net/smc/smc.h index 268cdf11533c..e4829a2f46ba 100644 --- a/net/smc/smc.h +++ b/net/smc/smc.h @@ -18,11 +18,13 @@ #include "smc_ib.h" -#define SMCPROTO_SMC 0 /* SMC protocol */ +#define SMCPROTO_SMC 0 /* SMC protocol, IPv4 */ +#define SMCPROTO_SMC6 1 /* SMC protocol, IPv6 */ #define SMC_MAX_PORTS 2 /* Max # of ports */ extern struct proto smc_proto; +extern struct proto smc_proto6; #ifdef ATOMIC64_INIT #define KERNEL_HAS_ATOMIC64 diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c index 3cd086e5bd28..b42395d24cba 100644 --- a/net/smc/smc_cdc.c +++ b/net/smc/smc_cdc.c @@ -269,7 +269,7 @@ static void smc_cdc_rx_handler(struct ib_wc *wc, void *buf) if (wc->byte_len < offsetof(struct smc_cdc_msg, reserved)) return; /* short message */ - if (cdc->len != sizeof(*cdc)) + if (cdc->len != SMC_WR_TX_SIZE) return; /* invalid message */ smc_cdc_msg_recv(cdc, link, wc->wr_id); } diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c index 874c5a75d6dd..64fbc3230e6c 100644 --- a/net/smc/smc_clc.c +++ b/net/smc/smc_clc.c @@ -5,7 +5,7 @@ * CLC (connection layer control) handshake over initial TCP socket to * prepare for RDMA traffic * - * Copyright IBM Corp. 2016 + * Copyright IBM Corp. 2016, 2018 * * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com> */ @@ -15,6 +15,7 @@ #include <linux/if_ether.h> #include <linux/sched/signal.h> +#include <net/addrconf.h> #include <net/sock.h> #include <net/tcp.h> @@ -74,15 +75,67 @@ static bool smc_clc_msg_hdr_valid(struct smc_clc_msg_hdr *clcm) return true; } -/* determine subnet and mask of internal TCP socket */ -int smc_clc_netinfo_by_tcpsk(struct socket *clcsock, - __be32 *subnet, u8 *prefix_len) +/* find ipv4 addr on device and get the prefix len, fill CLC proposal msg */ +static int smc_clc_prfx_set4_rcu(struct dst_entry *dst, __be32 ipv4, + struct smc_clc_msg_proposal_prefix *prop) +{ + struct in_device *in_dev = __in_dev_get_rcu(dst->dev); + + if (!in_dev) + return -ENODEV; + for_ifa(in_dev) { + if (!inet_ifa_match(ipv4, ifa)) + continue; + prop->prefix_len = inet_mask_len(ifa->ifa_mask); + prop->outgoing_subnet = ifa->ifa_address & ifa->ifa_mask; + /* prop->ipv6_prefixes_cnt = 0; already done by memset before */ + return 0; + } endfor_ifa(in_dev); + return -ENOENT; +} + +/* fill CLC proposal msg with ipv6 prefixes from device */ +static int smc_clc_prfx_set6_rcu(struct dst_entry *dst, + struct smc_clc_msg_proposal_prefix *prop, + struct smc_clc_ipv6_prefix *ipv6_prfx) +{ +#if IS_ENABLED(CONFIG_IPV6) + struct inet6_dev *in6_dev = __in6_dev_get(dst->dev); + struct inet6_ifaddr *ifa; + int cnt = 0; + + if (!in6_dev) + return -ENODEV; + /* use a maximum of 8 IPv6 prefixes from device */ + list_for_each_entry(ifa, &in6_dev->addr_list, if_list) { + if (ipv6_addr_type(&ifa->addr) & IPV6_ADDR_LINKLOCAL) + continue; + ipv6_addr_prefix(&ipv6_prfx[cnt].prefix, + &ifa->addr, ifa->prefix_len); + ipv6_prfx[cnt].prefix_len = ifa->prefix_len; + cnt++; + if (cnt == SMC_CLC_MAX_V6_PREFIX) + break; + } + prop->ipv6_prefixes_cnt = cnt; + if (cnt) + return 0; +#endif + return -ENOENT; +} + +/* retrieve and set prefixes in CLC proposal msg */ +static int smc_clc_prfx_set(struct socket *clcsock, + struct smc_clc_msg_proposal_prefix *prop, + struct smc_clc_ipv6_prefix *ipv6_prfx) { struct dst_entry *dst = sk_dst_get(clcsock->sk); - struct in_device *in_dev; - struct sockaddr_in addr; + struct sockaddr_storage addrs; + struct sockaddr_in6 *addr6; + struct sockaddr_in *addr; int rc = -ENOENT; + memset(prop, 0, sizeof(*prop)); if (!dst) { rc = -ENOTCONN; goto out; @@ -91,22 +144,97 @@ int smc_clc_netinfo_by_tcpsk(struct socket *clcsock, rc = -ENODEV; goto out_rel; } - /* get address to which the internal TCP socket is bound */ - kernel_getsockname(clcsock, (struct sockaddr *)&addr); - /* analyze IPv4 specific data of net_device belonging to TCP socket */ + kernel_getsockname(clcsock, (struct sockaddr *)&addrs); + /* analyze IP specific data of net_device belonging to TCP socket */ + addr6 = (struct sockaddr_in6 *)&addrs; rcu_read_lock(); - in_dev = __in_dev_get_rcu(dst->dev); + if (addrs.ss_family == PF_INET) { + /* IPv4 */ + addr = (struct sockaddr_in *)&addrs; + rc = smc_clc_prfx_set4_rcu(dst, addr->sin_addr.s_addr, prop); + } else if (ipv6_addr_v4mapped(&addr6->sin6_addr)) { + /* mapped IPv4 address - peer is IPv4 only */ + rc = smc_clc_prfx_set4_rcu(dst, addr6->sin6_addr.s6_addr32[3], + prop); + } else { + /* IPv6 */ + rc = smc_clc_prfx_set6_rcu(dst, prop, ipv6_prfx); + } + rcu_read_unlock(); +out_rel: + dst_release(dst); +out: + return rc; +} + +/* match ipv4 addrs of dev against addr in CLC proposal */ +static int smc_clc_prfx_match4_rcu(struct net_device *dev, + struct smc_clc_msg_proposal_prefix *prop) +{ + struct in_device *in_dev = __in_dev_get_rcu(dev); + + if (!in_dev) + return -ENODEV; for_ifa(in_dev) { - if (!inet_ifa_match(addr.sin_addr.s_addr, ifa)) - continue; - *prefix_len = inet_mask_len(ifa->ifa_mask); - *subnet = ifa->ifa_address & ifa->ifa_mask; - rc = 0; - break; + if (prop->prefix_len == inet_mask_len(ifa->ifa_mask) && + inet_ifa_match(prop->outgoing_subnet, ifa)) + return 0; } endfor_ifa(in_dev); - rcu_read_unlock(); + return -ENOENT; +} + +/* match ipv6 addrs of dev against addrs in CLC proposal */ +static int smc_clc_prfx_match6_rcu(struct net_device *dev, + struct smc_clc_msg_proposal_prefix *prop) +{ +#if IS_ENABLED(CONFIG_IPV6) + struct inet6_dev *in6_dev = __in6_dev_get(dev); + struct smc_clc_ipv6_prefix *ipv6_prfx; + struct inet6_ifaddr *ifa; + int i, max; + + if (!in6_dev) + return -ENODEV; + /* ipv6 prefix list starts behind smc_clc_msg_proposal_prefix */ + ipv6_prfx = (struct smc_clc_ipv6_prefix *)((u8 *)prop + sizeof(*prop)); + max = min_t(u8, prop->ipv6_prefixes_cnt, SMC_CLC_MAX_V6_PREFIX); + list_for_each_entry(ifa, &in6_dev->addr_list, if_list) { + if (ipv6_addr_type(&ifa->addr) & IPV6_ADDR_LINKLOCAL) + continue; + for (i = 0; i < max; i++) { + if (ifa->prefix_len == ipv6_prfx[i].prefix_len && + ipv6_prefix_equal(&ifa->addr, &ipv6_prfx[i].prefix, + ifa->prefix_len)) + return 0; + } + } +#endif + return -ENOENT; +} + +/* check if proposed prefixes match one of our device prefixes */ +int smc_clc_prfx_match(struct socket *clcsock, + struct smc_clc_msg_proposal_prefix *prop) +{ + struct dst_entry *dst = sk_dst_get(clcsock->sk); + int rc; + + if (!dst) { + rc = -ENOTCONN; + goto out; + } + if (!dst->dev) { + rc = -ENODEV; + goto out_rel; + } + rcu_read_lock(); + if (!prop->ipv6_prefixes_cnt) + rc = smc_clc_prfx_match4_rcu(dst->dev, prop); + else + rc = smc_clc_prfx_match6_rcu(dst->dev, prop); + rcu_read_unlock(); out_rel: dst_release(dst); out: @@ -232,16 +360,24 @@ int smc_clc_send_proposal(struct smc_sock *smc, struct smc_ib_device *smcibdev, u8 ibport) { + struct smc_clc_ipv6_prefix ipv6_prfx[SMC_CLC_MAX_V6_PREFIX]; struct smc_clc_msg_proposal_prefix pclc_prfx; struct smc_clc_msg_proposal pclc; struct smc_clc_msg_trail trl; + int len, i, plen, rc; int reason_code = 0; - struct kvec vec[3]; + struct kvec vec[4]; struct msghdr msg; - int len, plen, rc; + + /* retrieve ip prefixes for CLC proposal msg */ + rc = smc_clc_prfx_set(smc->clcsock, &pclc_prfx, ipv6_prfx); + if (rc) + return SMC_CLC_DECL_CNFERR; /* configuration error */ /* send SMC Proposal CLC message */ - plen = sizeof(pclc) + sizeof(pclc_prfx) + sizeof(trl); + plen = sizeof(pclc) + sizeof(pclc_prfx) + + (pclc_prfx.ipv6_prefixes_cnt * sizeof(ipv6_prfx[0])) + + sizeof(trl); memset(&pclc, 0, sizeof(pclc)); memcpy(pclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); pclc.hdr.type = SMC_CLC_PROPOSAL; @@ -252,23 +388,22 @@ int smc_clc_send_proposal(struct smc_sock *smc, memcpy(&pclc.lcl.mac, &smcibdev->mac[ibport - 1], ETH_ALEN); pclc.iparea_offset = htons(0); - memset(&pclc_prfx, 0, sizeof(pclc_prfx)); - /* determine subnet and mask from internal TCP socket */ - rc = smc_clc_netinfo_by_tcpsk(smc->clcsock, &pclc_prfx.outgoing_subnet, - &pclc_prfx.prefix_len); - if (rc) - return SMC_CLC_DECL_CNFERR; /* configuration error */ - pclc_prfx.ipv6_prefixes_cnt = 0; memcpy(trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); memset(&msg, 0, sizeof(msg)); - vec[0].iov_base = &pclc; - vec[0].iov_len = sizeof(pclc); - vec[1].iov_base = &pclc_prfx; - vec[1].iov_len = sizeof(pclc_prfx); - vec[2].iov_base = &trl; - vec[2].iov_len = sizeof(trl); + i = 0; + vec[i].iov_base = &pclc; + vec[i++].iov_len = sizeof(pclc); + vec[i].iov_base = &pclc_prfx; + vec[i++].iov_len = sizeof(pclc_prfx); + if (pclc_prfx.ipv6_prefixes_cnt > 0) { + vec[i].iov_base = &ipv6_prfx[0]; + vec[i++].iov_len = pclc_prfx.ipv6_prefixes_cnt * + sizeof(ipv6_prfx[0]); + } + vec[i].iov_base = &trl; + vec[i++].iov_len = sizeof(trl); /* due to the few bytes needed for clc-handshake this cannot block */ - len = kernel_sendmsg(smc->clcsock, &msg, vec, 3, plen); + len = kernel_sendmsg(smc->clcsock, &msg, vec, i, plen); if (len < sizeof(pclc)) { if (len >= 0) { reason_code = -ENETUNREACH; diff --git a/net/smc/smc_clc.h b/net/smc/smc_clc.h index 20e048beac30..63bf1dc2c1f9 100644 --- a/net/smc/smc_clc.h +++ b/net/smc/smc_clc.h @@ -60,10 +60,15 @@ struct smc_clc_msg_local { /* header2 of clc messages */ u8 mac[6]; /* mac of ib_device port */ }; +#define SMC_CLC_MAX_V6_PREFIX 8 + +/* Struct would be 4 byte aligned, but it is used in an array that is sent + * to peers and must conform to RFC7609, hence we need to use packed here. + */ struct smc_clc_ipv6_prefix { - u8 prefix[4]; + struct in6_addr prefix; u8 prefix_len; -} __packed; +} __packed; /* format defined in RFC7609 */ struct smc_clc_msg_proposal_prefix { /* prefix part of clc proposal message*/ __be32 outgoing_subnet; /* subnet mask */ @@ -79,9 +84,11 @@ struct smc_clc_msg_proposal { /* clc proposal message sent by Linux */ } __aligned(4); #define SMC_CLC_PROPOSAL_MAX_OFFSET 0x28 -#define SMC_CLC_PROPOSAL_MAX_PREFIX (8 * sizeof(struct smc_clc_ipv6_prefix)) +#define SMC_CLC_PROPOSAL_MAX_PREFIX (SMC_CLC_MAX_V6_PREFIX * \ + sizeof(struct smc_clc_ipv6_prefix)) #define SMC_CLC_MAX_LEN (sizeof(struct smc_clc_msg_proposal) + \ SMC_CLC_PROPOSAL_MAX_OFFSET + \ + sizeof(struct smc_clc_msg_proposal_prefix) + \ SMC_CLC_PROPOSAL_MAX_PREFIX + \ sizeof(struct smc_clc_msg_trail)) @@ -122,8 +129,8 @@ smc_clc_proposal_get_prefix(struct smc_clc_msg_proposal *pclc) ((u8 *)pclc + sizeof(*pclc) + ntohs(pclc->iparea_offset)); } -int smc_clc_netinfo_by_tcpsk(struct socket *clcsock, __be32 *subnet, - u8 *prefix_len); +int smc_clc_prfx_match(struct socket *clcsock, + struct smc_clc_msg_proposal_prefix *prop); int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen, u8 expected_type); int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info); diff --git a/net/smc/smc_close.c b/net/smc/smc_close.c index e339c0186dcf..fa41d9881741 100644 --- a/net/smc/smc_close.c +++ b/net/smc/smc_close.c @@ -30,27 +30,6 @@ static void smc_close_cleanup_listen(struct sock *parent) smc_close_non_accepted(sk); } -static void smc_close_wait_listen_clcsock(struct smc_sock *smc) -{ - DEFINE_WAIT_FUNC(wait, woken_wake_function); - struct sock *sk = &smc->sk; - signed long timeout; - - timeout = SMC_CLOSE_WAIT_LISTEN_CLCSOCK_TIME; - add_wait_queue(sk_sleep(sk), &wait); - do { - release_sock(sk); - if (smc->clcsock) - timeout = wait_woken(&wait, TASK_UNINTERRUPTIBLE, - timeout); - sched_annotate_sleep(); - lock_sock(sk); - if (!smc->clcsock) - break; - } while (timeout); - remove_wait_queue(sk_sleep(sk), &wait); -} - /* wait for sndbuf data being transmitted */ static void smc_close_stream_wait(struct smc_sock *smc, long timeout) { @@ -204,9 +183,11 @@ again: rc = kernel_sock_shutdown(smc->clcsock, SHUT_RDWR); /* wake up kernel_accept of smc_tcp_listen_worker */ smc->clcsock->sk->sk_data_ready(smc->clcsock->sk); - smc_close_wait_listen_clcsock(smc); } smc_close_cleanup_listen(sk); + release_sock(sk); + flush_work(&smc->tcp_listen_work); + lock_sock(sk); break; case SMC_ACTIVE: smc_close_stream_wait(smc, timeout); diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 702ce5f85e97..f44f6803f7ff 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -32,6 +32,17 @@ static u32 smc_lgr_num; /* unique link group number */ +static void smc_lgr_schedule_free_work(struct smc_link_group *lgr) +{ + /* client link group creation always follows the server link group + * creation. For client use a somewhat higher removal delay time, + * otherwise there is a risk of out-of-sync link groups. + */ + mod_delayed_work(system_wq, &lgr->free_work, + lgr->role == SMC_CLNT ? SMC_LGR_FREE_DELAY_CLNT : + SMC_LGR_FREE_DELAY_SERV); +} + /* Register connection's alert token in our lookup structure. * To use rbtrees we have to implement our own insert core. * Requires @conns_lock @@ -111,13 +122,7 @@ static void smc_lgr_unregister_conn(struct smc_connection *conn) write_unlock_bh(&lgr->conns_lock); if (!reduced || lgr->conns_num) return; - /* client link group creation always follows the server link group - * creation. For client use a somewhat higher removal delay time, - * otherwise there is a risk of out-of-sync link groups. - */ - mod_delayed_work(system_wq, &lgr->free_work, - lgr->role == SMC_CLNT ? SMC_LGR_FREE_DELAY_CLNT : - SMC_LGR_FREE_DELAY_SERV); + smc_lgr_schedule_free_work(lgr); } static void smc_lgr_free_work(struct work_struct *work) @@ -140,7 +145,8 @@ static void smc_lgr_free_work(struct work_struct *work) list_del_init(&lgr->list); /* remove from smc_lgr_list */ free: spin_unlock_bh(&smc_lgr_list.lock); - smc_lgr_free(lgr); + if (!delayed_work_pending(&lgr->free_work)) + smc_lgr_free(lgr); } /* create a new SMC link group */ @@ -177,6 +183,7 @@ static int smc_lgr_create(struct smc_sock *smc, lnk = &lgr->lnk[SMC_SINGLE_LINK]; /* initialize link */ lnk->state = SMC_LNK_ACTIVATING; + lnk->link_id = SMC_SINGLE_LINK; lnk->smcibdev = smcibdev; lnk->ibport = ibport; lnk->path_mtu = smcibdev->pattr[ibport - 1].active_mtu; @@ -342,6 +349,7 @@ void smc_lgr_terminate(struct smc_link_group *lgr) } write_unlock_bh(&lgr->conns_lock); wake_up(&lgr->lnk[SMC_SINGLE_LINK].wr_reg_wait); + smc_lgr_schedule_free_work(lgr); } /* Determine vlan of internal TCP socket. @@ -468,7 +476,7 @@ create: rc = smc_link_determine_gid(conn->lgr); } conn->local_tx_ctrl.common.type = SMC_CDC_MSG_TYPE; - conn->local_tx_ctrl.len = sizeof(struct smc_cdc_msg); + conn->local_tx_ctrl.len = SMC_WR_TX_SIZE; #ifndef KERNEL_HAS_ATOMIC64 spin_lock_init(&conn->acurs_lock); #endif diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c index 2a8957bd6d38..26df554f7588 100644 --- a/net/smc/smc_ib.c +++ b/net/smc/smc_ib.c @@ -23,6 +23,8 @@ #include "smc_wr.h" #include "smc.h" +#define SMC_MAX_CQE 32766 /* max. # of completion queue elements */ + #define SMC_QP_MIN_RNR_TIMER 5 #define SMC_QP_TIMEOUT 15 /* 4096 * 2 ** timeout usec */ #define SMC_QP_RETRY_CNT 7 /* 7: infinite */ @@ -438,9 +440,15 @@ out: long smc_ib_setup_per_ibdev(struct smc_ib_device *smcibdev) { struct ib_cq_init_attr cqattr = { - .cqe = SMC_WR_MAX_CQE, .comp_vector = 0 }; + .cqe = SMC_MAX_CQE, .comp_vector = 0 }; + int cqe_size_order, smc_order; long rc; + /* the calculated number of cq entries fits to mlx5 cq allocation */ + cqe_size_order = cache_line_size() == 128 ? 7 : 6; + smc_order = MAX_ORDER - cqe_size_order - 1; + if (SMC_MAX_CQE + 2 > (0x00000001 << smc_order) * PAGE_SIZE) + cqattr.cqe = (0x00000001 << smc_order) * PAGE_SIZE - 2; smcibdev->roce_cq_send = ib_create_cq(smcibdev->ibdev, smc_wr_tx_cq_handler, NULL, smcibdev, &cqattr); diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c index 54e8d6dc9201..ea4b21981b4b 100644 --- a/net/smc/smc_llc.c +++ b/net/smc/smc_llc.c @@ -206,7 +206,7 @@ int smc_llc_send_confirm_link(struct smc_link *link, u8 mac[], memcpy(confllc->sender_mac, mac, ETH_ALEN); memcpy(confllc->sender_gid, gid, SMC_GID_SIZE); hton24(confllc->sender_qp_num, link->roce_qp->qp_num); - /* confllc->link_num = SMC_SINGLE_LINK; already done by memset above */ + confllc->link_num = link->link_id; memcpy(confllc->link_uid, lgr->id, SMC_LGR_ID_SIZE); confllc->max_links = SMC_LLC_ADD_LNK_MAX_LINKS; /* enforce peer resp. */ /* send llc message */ diff --git a/net/smc/smc_wr.h b/net/smc/smc_wr.h index ef0c3494c9cb..210bec3c3ebe 100644 --- a/net/smc/smc_wr.h +++ b/net/smc/smc_wr.h @@ -19,7 +19,6 @@ #include "smc.h" #include "smc_core.h" -#define SMC_WR_MAX_CQE 32768 /* max. # of completion queue elements */ #define SMC_WR_BUF_CNT 16 /* # of ctrl buffers per link */ #define SMC_WR_TX_WAIT_FREE_SLOT_TIME (10 * HZ) diff --git a/net/socket.c b/net/socket.c index d9a1ac233b35..3d1948d27a25 100644 --- a/net/socket.c +++ b/net/socket.c @@ -2590,6 +2590,11 @@ void sock_unregister(int family) } EXPORT_SYMBOL(sock_unregister); +bool sock_is_registered(int family) +{ + return family < NPROTO && rcu_access_pointer(net_families[family]); +} + static int __init sock_init(void) { int err; diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index 26531193fce4..5089dbb96d58 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -1375,7 +1375,7 @@ static int create_use_gss_proxy_proc_entry(struct net *net) struct proc_dir_entry **p = &sn->use_gssp_proc; sn->use_gss_proxy = -1; - *p = proc_create_data("use-gss-proxy", S_IFREG|S_IRUSR|S_IWUSR, + *p = proc_create_data("use-gss-proxy", S_IFREG | 0600, sn->proc_net_rpc, &use_gss_proxy_ops, net); if (!*p) diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 8a7e1c774f9c..c536cc24b3d1 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -1621,20 +1621,20 @@ static int create_cache_proc_entries(struct cache_detail *cd, struct net *net) if (cd->procfs == NULL) goto out_nomem; - p = proc_create_data("flush", S_IFREG|S_IRUSR|S_IWUSR, + p = proc_create_data("flush", S_IFREG | 0600, cd->procfs, &cache_flush_operations_procfs, cd); if (p == NULL) goto out_nomem; if (cd->cache_request || cd->cache_parse) { - p = proc_create_data("channel", S_IFREG|S_IRUSR|S_IWUSR, - cd->procfs, &cache_file_operations_procfs, cd); + p = proc_create_data("channel", S_IFREG | 0600, cd->procfs, + &cache_file_operations_procfs, cd); if (p == NULL) goto out_nomem; } if (cd->cache_show) { - p = proc_create_data("content", S_IFREG|S_IRUSR, - cd->procfs, &content_file_operations_procfs, cd); + p = proc_create_data("content", S_IFREG | 0400, cd->procfs, + &content_file_operations_procfs, cd); if (p == NULL) goto out_nomem; } diff --git a/net/sunrpc/debugfs.c b/net/sunrpc/debugfs.c index e980d2a493de..45a033329cd4 100644 --- a/net/sunrpc/debugfs.c +++ b/net/sunrpc/debugfs.c @@ -139,7 +139,7 @@ rpc_clnt_debugfs_register(struct rpc_clnt *clnt) return; /* make tasks file */ - if (!debugfs_create_file("tasks", S_IFREG | S_IRUSR, clnt->cl_debugfs, + if (!debugfs_create_file("tasks", S_IFREG | 0400, clnt->cl_debugfs, clnt, &tasks_fops)) goto out_err; @@ -241,7 +241,7 @@ rpc_xprt_debugfs_register(struct rpc_xprt *xprt) return; /* make tasks file */ - if (!debugfs_create_file("info", S_IFREG | S_IRUSR, xprt->debugfs, + if (!debugfs_create_file("info", S_IFREG | 0400, xprt->debugfs, xprt, &xprt_info_fops)) { debugfs_remove_recursive(xprt->debugfs); xprt->debugfs = NULL; @@ -317,7 +317,7 @@ inject_fault_dir(struct dentry *topdir) if (!faultdir) return NULL; - if (!debugfs_create_file("disconnect", S_IFREG | S_IRUSR, faultdir, + if (!debugfs_create_file("disconnect", S_IFREG | 0400, faultdir, NULL, &fault_disconnect_fops)) return NULL; diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index fc97fc3ed637..0f08934b2cea 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -820,13 +820,13 @@ struct dentry *rpc_mkpipe_dentry(struct dentry *parent, const char *name, { struct dentry *dentry; struct inode *dir = d_inode(parent); - umode_t umode = S_IFIFO | S_IRUSR | S_IWUSR; + umode_t umode = S_IFIFO | 0600; int err; if (pipe->ops->upcall == NULL) - umode &= ~S_IRUGO; + umode &= ~0444; if (pipe->ops->downcall == NULL) - umode &= ~S_IWUGO; + umode &= ~0222; inode_lock_nested(dir, I_MUTEX_PARENT); dentry = __rpc_lookup_create_exclusive(parent, name); @@ -1035,7 +1035,7 @@ static const struct rpc_filelist authfiles[] = { [RPCAUTH_info] = { .name = "info", .i_fop = &rpc_info_operations, - .mode = S_IFREG | S_IRUSR, + .mode = S_IFREG | 0400, }, }; @@ -1068,8 +1068,8 @@ struct dentry *rpc_create_client_dir(struct dentry *dentry, { struct dentry *ret; - ret = rpc_mkdir_populate(dentry, name, S_IRUGO | S_IXUGO, NULL, - rpc_clntdir_populate, rpc_client); + ret = rpc_mkdir_populate(dentry, name, 0555, NULL, + rpc_clntdir_populate, rpc_client); if (!IS_ERR(ret)) { rpc_client->cl_pipedir_objects.pdh_dentry = ret; rpc_create_pipe_dir_objects(&rpc_client->cl_pipedir_objects); @@ -1096,17 +1096,17 @@ static const struct rpc_filelist cache_pipefs_files[3] = { [0] = { .name = "channel", .i_fop = &cache_file_operations_pipefs, - .mode = S_IFREG|S_IRUSR|S_IWUSR, + .mode = S_IFREG | 0600, }, [1] = { .name = "content", .i_fop = &content_file_operations_pipefs, - .mode = S_IFREG|S_IRUSR, + .mode = S_IFREG | 0400, }, [2] = { .name = "flush", .i_fop = &cache_flush_operations_pipefs, - .mode = S_IFREG|S_IRUSR|S_IWUSR, + .mode = S_IFREG | 0600, }, }; @@ -1164,39 +1164,39 @@ enum { static const struct rpc_filelist files[] = { [RPCAUTH_lockd] = { .name = "lockd", - .mode = S_IFDIR | S_IRUGO | S_IXUGO, + .mode = S_IFDIR | 0555, }, [RPCAUTH_mount] = { .name = "mount", - .mode = S_IFDIR | S_IRUGO | S_IXUGO, + .mode = S_IFDIR | 0555, }, [RPCAUTH_nfs] = { .name = "nfs", - .mode = S_IFDIR | S_IRUGO | S_IXUGO, + .mode = S_IFDIR | 0555, }, [RPCAUTH_portmap] = { .name = "portmap", - .mode = S_IFDIR | S_IRUGO | S_IXUGO, + .mode = S_IFDIR | 0555, }, [RPCAUTH_statd] = { .name = "statd", - .mode = S_IFDIR | S_IRUGO | S_IXUGO, + .mode = S_IFDIR | 0555, }, [RPCAUTH_nfsd4_cb] = { .name = "nfsd4_cb", - .mode = S_IFDIR | S_IRUGO | S_IXUGO, + .mode = S_IFDIR | 0555, }, [RPCAUTH_cache] = { .name = "cache", - .mode = S_IFDIR | S_IRUGO | S_IXUGO, + .mode = S_IFDIR | 0555, }, [RPCAUTH_nfsd] = { .name = "nfsd", - .mode = S_IFDIR | S_IRUGO | S_IXUGO, + .mode = S_IFDIR | 0555, }, [RPCAUTH_gssd] = { .name = "gssd", - .mode = S_IFDIR | S_IRUGO | S_IXUGO, + .mode = S_IFDIR | 0555, }, }; @@ -1261,7 +1261,7 @@ EXPORT_SYMBOL_GPL(rpc_put_sb_net); static const struct rpc_filelist gssd_dummy_clnt_dir[] = { [0] = { .name = "clntXX", - .mode = S_IFDIR | S_IRUGO | S_IXUGO, + .mode = S_IFDIR | 0555, }, }; @@ -1310,7 +1310,7 @@ static const struct rpc_filelist gssd_dummy_info_file[] = { [0] = { .name = "info", .i_fop = &rpc_dummy_info_operations, - .mode = S_IFREG | S_IRUSR, + .mode = S_IFREG | 0400, }, }; @@ -1397,7 +1397,7 @@ rpc_fill_super(struct super_block *sb, void *data, int silent) sb->s_d_op = &simple_dentry_operations; sb->s_time_gran = 1; - inode = rpc_get_inode(sb, S_IFDIR | S_IRUGO | S_IXUGO); + inode = rpc_get_inode(sb, S_IFDIR | 0555); sb->s_root = root = d_make_root(inode); if (!root) return -ENOMEM; diff --git a/net/sysctl_net.c b/net/sysctl_net.c index f424539829b7..9aed6fe1bf1a 100644 --- a/net/sysctl_net.c +++ b/net/sysctl_net.c @@ -89,7 +89,6 @@ static void __net_exit sysctl_net_exit(struct net *net) static struct pernet_operations sysctl_pernet_ops = { .init = sysctl_net_init, .exit = sysctl_net_exit, - .async = true, }; static struct ctl_table_header *net_header; diff --git a/net/tipc/Kconfig b/net/tipc/Kconfig index c25a3a149dc4..e450212121d2 100644 --- a/net/tipc/Kconfig +++ b/net/tipc/Kconfig @@ -34,3 +34,11 @@ config TIPC_MEDIA_UDP Saying Y here will enable support for running TIPC over IP/UDP bool default y + +config TIPC_DIAG + tristate "TIPC: socket monitoring interface" + depends on TIPC + default y + ---help--- + Support for TIPC socket monitoring interface used by ss tool. + If unsure, say Y. diff --git a/net/tipc/Makefile b/net/tipc/Makefile index 1edb7192aa2f..aca168f2abb1 100644 --- a/net/tipc/Makefile +++ b/net/tipc/Makefile @@ -14,3 +14,8 @@ tipc-y += addr.o bcast.o bearer.o \ tipc-$(CONFIG_TIPC_MEDIA_UDP) += udp_media.o tipc-$(CONFIG_TIPC_MEDIA_IB) += ib_media.o tipc-$(CONFIG_SYSCTL) += sysctl.o + + +obj-$(CONFIG_TIPC_DIAG) += diag.o + +tipc_diag-y := diag.o diff --git a/net/tipc/addr.c b/net/tipc/addr.c index 48fd3b5a73fb..b88d48d00913 100644 --- a/net/tipc/addr.c +++ b/net/tipc/addr.c @@ -1,7 +1,7 @@ /* * net/tipc/addr.c: TIPC address utility routines * - * Copyright (c) 2000-2006, Ericsson AB + * Copyright (c) 2000-2006, 2018, Ericsson AB * Copyright (c) 2004-2005, 2010-2011, Wind River Systems * All rights reserved. * @@ -34,113 +34,90 @@ * POSSIBILITY OF SUCH DAMAGE. */ -#include <linux/kernel.h> #include "addr.h" #include "core.h" -/** - * in_own_cluster - test for cluster inclusion; <0.0.0> always matches - */ -int in_own_cluster(struct net *net, u32 addr) -{ - return in_own_cluster_exact(net, addr) || !addr; -} - -int in_own_cluster_exact(struct net *net, u32 addr) +bool tipc_in_scope(bool legacy_format, u32 domain, u32 addr) { - struct tipc_net *tn = net_generic(net, tipc_net_id); - - return !((addr ^ tn->own_addr) >> 12); + if (!domain || (domain == addr)) + return true; + if (!legacy_format) + return false; + if (domain == tipc_cluster_mask(addr)) /* domain <Z.C.0> */ + return true; + if (domain == (addr & TIPC_ZONE_CLUSTER_MASK)) /* domain <Z.C.0> */ + return true; + if (domain == (addr & TIPC_ZONE_MASK)) /* domain <Z.0.0> */ + return true; + return false; } -/** - * in_own_node - test for node inclusion; <0.0.0> always matches - */ -int in_own_node(struct net *net, u32 addr) +void tipc_set_node_id(struct net *net, u8 *id) { - struct tipc_net *tn = net_generic(net, tipc_net_id); + struct tipc_net *tn = tipc_net(net); + u32 *tmp = (u32 *)id; - return (addr == tn->own_addr) || !addr; + memcpy(tn->node_id, id, NODE_ID_LEN); + tipc_nodeid2string(tn->node_id_string, id); + tn->trial_addr = tmp[0] ^ tmp[1] ^ tmp[2] ^ tmp[3]; + pr_info("Own node identity %s, cluster identity %u\n", + tipc_own_id_string(net), tn->net_id); } -/** - * addr_domain - convert 2-bit scope value to equivalent message lookup domain - * - * Needed when address of a named message must be looked up a second time - * after a network hop. - */ -u32 addr_domain(struct net *net, u32 sc) +void tipc_set_node_addr(struct net *net, u32 addr) { - struct tipc_net *tn = net_generic(net, tipc_net_id); + struct tipc_net *tn = tipc_net(net); + u8 node_id[NODE_ID_LEN] = {0,}; - if (likely(sc == TIPC_NODE_SCOPE)) - return tn->own_addr; - if (sc == TIPC_CLUSTER_SCOPE) - return tipc_cluster_mask(tn->own_addr); - return tipc_zone_mask(tn->own_addr); + tn->node_addr = addr; + if (!tipc_own_id(net)) { + sprintf(node_id, "%x", addr); + tipc_set_node_id(net, node_id); + } + tn->trial_addr = addr; + pr_info("32-bit node address hash set to %x\n", addr); } -/** - * tipc_addr_domain_valid - validates a network domain address - * - * Accepts <Z.C.N>, <Z.C.0>, <Z.0.0>, and <0.0.0>, - * where Z, C, and N are non-zero. - * - * Returns 1 if domain address is valid, otherwise 0 - */ -int tipc_addr_domain_valid(u32 addr) +char *tipc_nodeid2string(char *str, u8 *id) { - u32 n = tipc_node(addr); - u32 c = tipc_cluster(addr); - u32 z = tipc_zone(addr); - - if (n && (!z || !c)) - return 0; - if (c && !z) - return 0; - return 1; -} + int i; + u8 c; -/** - * tipc_addr_node_valid - validates a proposed network address for this node - * - * Accepts <Z.C.N>, where Z, C, and N are non-zero. - * - * Returns 1 if address can be used, otherwise 0 - */ -int tipc_addr_node_valid(u32 addr) -{ - return tipc_addr_domain_valid(addr) && tipc_node(addr); -} + /* Already a string ? */ + for (i = 0; i < NODE_ID_LEN; i++) { + c = id[i]; + if (c >= '0' && c <= '9') + continue; + if (c >= 'A' && c <= 'Z') + continue; + if (c >= 'a' && c <= 'z') + continue; + if (c == '.') + continue; + if (c == ':') + continue; + if (c == '_') + continue; + if (c == '-') + continue; + if (c == '@') + continue; + if (c != 0) + break; + } + if (i == NODE_ID_LEN) { + memcpy(str, id, NODE_ID_LEN); + str[NODE_ID_LEN] = 0; + return str; + } -int tipc_in_scope(u32 domain, u32 addr) -{ - if (!domain || (domain == addr)) - return 1; - if (domain == tipc_cluster_mask(addr)) /* domain <Z.C.0> */ - return 1; - if (domain == tipc_zone_mask(addr)) /* domain <Z.0.0> */ - return 1; - return 0; -} + /* Translate to hex string */ + for (i = 0; i < NODE_ID_LEN; i++) + sprintf(&str[2 * i], "%02x", id[i]); -/** - * tipc_addr_scope - convert message lookup domain to a 2-bit scope value - */ -int tipc_addr_scope(u32 domain) -{ - if (likely(!domain)) - return TIPC_ZONE_SCOPE; - if (tipc_node(domain)) - return TIPC_NODE_SCOPE; - if (tipc_cluster(domain)) - return TIPC_CLUSTER_SCOPE; - return TIPC_ZONE_SCOPE; -} + /* Strip off trailing zeroes */ + for (i = NODE_ID_STR_LEN - 2; str[i] == '0'; i--) + str[i] = 0; -char *tipc_addr_string_fill(char *string, u32 addr) -{ - snprintf(string, 16, "<%u.%u.%u>", - tipc_zone(addr), tipc_cluster(addr), tipc_node(addr)); - return string; + return str; } diff --git a/net/tipc/addr.h b/net/tipc/addr.h index bebb347803ce..31bee0ea7b3e 100644 --- a/net/tipc/addr.h +++ b/net/tipc/addr.h @@ -1,7 +1,7 @@ /* * net/tipc/addr.h: Include file for TIPC address utility routines * - * Copyright (c) 2000-2006, Ericsson AB + * Copyright (c) 2000-2006, 2018, Ericsson AB * Copyright (c) 2004-2005, Wind River Systems * All rights reserved. * @@ -45,14 +45,21 @@ static inline u32 tipc_own_addr(struct net *net) { - struct tipc_net *tn = net_generic(net, tipc_net_id); + return tipc_net(net)->node_addr; +} + +static inline u8 *tipc_own_id(struct net *net) +{ + struct tipc_net *tn = tipc_net(net); - return tn->own_addr; + if (!strlen(tn->node_id_string)) + return NULL; + return tn->node_id; } -static inline u32 tipc_zone_mask(u32 addr) +static inline char *tipc_own_id_string(struct net *net) { - return addr & TIPC_ZONE_MASK; + return tipc_net(net)->node_id_string; } static inline u32 tipc_cluster_mask(u32 addr) @@ -60,15 +67,25 @@ static inline u32 tipc_cluster_mask(u32 addr) return addr & TIPC_ZONE_CLUSTER_MASK; } -u32 tipc_own_addr(struct net *net); -int in_own_cluster(struct net *net, u32 addr); -int in_own_cluster_exact(struct net *net, u32 addr); -int in_own_node(struct net *net, u32 addr); -u32 addr_domain(struct net *net, u32 sc); -int tipc_addr_domain_valid(u32); -int tipc_addr_node_valid(u32 addr); -int tipc_in_scope(u32 domain, u32 addr); -int tipc_addr_scope(u32 domain); -char *tipc_addr_string_fill(char *string, u32 addr); +static inline int tipc_node2scope(u32 node) +{ + return node ? TIPC_NODE_SCOPE : TIPC_CLUSTER_SCOPE; +} + +static inline int tipc_scope2node(struct net *net, int sc) +{ + return sc != TIPC_NODE_SCOPE ? 0 : tipc_own_addr(net); +} + +static inline int in_own_node(struct net *net, u32 addr) +{ + return addr == tipc_own_addr(net) || !addr; +} + +bool tipc_in_scope(bool legacy_format, u32 domain, u32 addr); +void tipc_set_node_id(struct net *net, u8 *id); +void tipc_set_node_addr(struct net *net, u32 addr); +char *tipc_nodeid2string(char *str, u8 *id); +u32 tipc_node_id2hash(u8 *id128); #endif diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index 37892b3909af..f3711176be45 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -574,5 +574,5 @@ void tipc_nlist_purge(struct tipc_nlist *nl) { tipc_dest_list_purge(&nl->list); nl->remote = 0; - nl->local = 0; + nl->local = false; } diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index f3d2e83313e1..f7d47c89d658 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -210,7 +210,7 @@ void tipc_bearer_add_dest(struct net *net, u32 bearer_id, u32 dest) rcu_read_lock(); b = rcu_dereference_rtnl(tn->bearer_list[bearer_id]); if (b) - tipc_disc_add_dest(b->link_req); + tipc_disc_add_dest(b->disc); rcu_read_unlock(); } @@ -222,7 +222,7 @@ void tipc_bearer_remove_dest(struct net *net, u32 bearer_id, u32 dest) rcu_read_lock(); b = rcu_dereference_rtnl(tn->bearer_list[bearer_id]); if (b) - tipc_disc_remove_dest(b->link_req); + tipc_disc_remove_dest(b->disc); rcu_read_unlock(); } @@ -230,88 +230,67 @@ void tipc_bearer_remove_dest(struct net *net, u32 bearer_id, u32 dest) * tipc_enable_bearer - enable bearer with the given name */ static int tipc_enable_bearer(struct net *net, const char *name, - u32 disc_domain, u32 priority, + u32 disc_domain, u32 prio, struct nlattr *attr[]) { - struct tipc_net *tn = net_generic(net, tipc_net_id); + struct tipc_net *tn = tipc_net(net); + struct tipc_bearer_names b_names; + int with_this_prio = 1; struct tipc_bearer *b; struct tipc_media *m; - struct tipc_bearer_names b_names; struct sk_buff *skb; - char addr_string[16]; - u32 bearer_id; - u32 with_this_prio; - u32 i; + int bearer_id = 0; int res = -EINVAL; + char *errstr = ""; - if (!tn->own_addr) { - pr_warn("Bearer <%s> rejected, not supported in standalone mode\n", - name); - return -ENOPROTOOPT; - } if (!bearer_name_validate(name, &b_names)) { - pr_warn("Bearer <%s> rejected, illegal name\n", name); - return -EINVAL; - } - if (tipc_addr_domain_valid(disc_domain) && - (disc_domain != tn->own_addr)) { - if (tipc_in_scope(disc_domain, tn->own_addr)) { - disc_domain = tn->own_addr & TIPC_ZONE_CLUSTER_MASK; - res = 0; /* accept any node in own cluster */ - } else if (in_own_cluster_exact(net, disc_domain)) - res = 0; /* accept specified node in own cluster */ + errstr = "illegal name"; + goto rejected; } - if (res) { - pr_warn("Bearer <%s> rejected, illegal discovery domain\n", - name); - return -EINVAL; - } - if ((priority > TIPC_MAX_LINK_PRI) && - (priority != TIPC_MEDIA_LINK_PRI)) { - pr_warn("Bearer <%s> rejected, illegal priority\n", name); - return -EINVAL; + + if (prio > TIPC_MAX_LINK_PRI && prio != TIPC_MEDIA_LINK_PRI) { + errstr = "illegal priority"; + goto rejected; } m = tipc_media_find(b_names.media_name); if (!m) { - pr_warn("Bearer <%s> rejected, media <%s> not registered\n", - name, b_names.media_name); - return -EINVAL; + errstr = "media not registered"; + goto rejected; } - if (priority == TIPC_MEDIA_LINK_PRI) - priority = m->priority; + if (prio == TIPC_MEDIA_LINK_PRI) + prio = m->priority; -restart: - bearer_id = MAX_BEARERS; - with_this_prio = 1; - for (i = MAX_BEARERS; i-- != 0; ) { - b = rtnl_dereference(tn->bearer_list[i]); - if (!b) { - bearer_id = i; - continue; - } + /* Check new bearer vs existing ones and find free bearer id if any */ + while (bearer_id < MAX_BEARERS) { + b = rtnl_dereference(tn->bearer_list[bearer_id]); + if (!b) + break; if (!strcmp(name, b->name)) { - pr_warn("Bearer <%s> rejected, already enabled\n", - name); - return -EINVAL; + errstr = "already enabled"; + goto rejected; } - if ((b->priority == priority) && - (++with_this_prio > 2)) { - if (priority-- == 0) { - pr_warn("Bearer <%s> rejected, duplicate priority\n", - name); - return -EINVAL; - } - pr_warn("Bearer <%s> priority adjustment required %u->%u\n", - name, priority + 1, priority); - goto restart; + bearer_id++; + if (b->priority != prio) + continue; + if (++with_this_prio <= 2) + continue; + pr_warn("Bearer <%s>: already 2 bearers with priority %u\n", + name, prio); + if (prio == TIPC_MIN_LINK_PRI) { + errstr = "cannot adjust to lower"; + goto rejected; } + pr_warn("Bearer <%s>: trying with adjusted priority\n", name); + prio--; + bearer_id = 0; + with_this_prio = 1; } + if (bearer_id >= MAX_BEARERS) { - pr_warn("Bearer <%s> rejected, bearer limit reached (%u)\n", - name, MAX_BEARERS); - return -EINVAL; + errstr = "max 3 bearers permitted"; + goto rejected; } b = kzalloc(sizeof(*b), GFP_ATOMIC); @@ -322,10 +301,9 @@ restart: b->media = m; res = m->enable_media(net, b, attr); if (res) { - pr_warn("Bearer <%s> rejected, enable failure (%d)\n", - name, -res); kfree(b); - return -EINVAL; + errstr = "failed to enable media"; + goto rejected; } b->identity = bearer_id; @@ -333,15 +311,15 @@ restart: b->window = m->window; b->domain = disc_domain; b->net_plane = bearer_id + 'A'; - b->priority = priority; + b->priority = prio; test_and_set_bit_lock(0, &b->up); res = tipc_disc_create(net, b, &b->bcast_addr, &skb); if (res) { bearer_disable(net, b); - pr_warn("Bearer <%s> rejected, discovery object creation failed\n", - name); - return -EINVAL; + kfree(b); + errstr = "failed to create discoverer"; + goto rejected; } rcu_assign_pointer(tn->bearer_list[bearer_id], b); @@ -353,9 +331,11 @@ restart: return -ENOMEM; } - pr_info("Enabled bearer <%s>, discovery domain %s, priority %u\n", - name, - tipc_addr_string_fill(addr_string, disc_domain), priority); + pr_info("Enabled bearer <%s>, priority %u\n", name, prio); + + return res; +rejected: + pr_warn("Enabling of bearer <%s> rejected, %s\n", name, errstr); return res; } @@ -385,8 +365,8 @@ static void bearer_disable(struct net *net, struct tipc_bearer *b) tipc_node_delete_links(net, bearer_id); b->media->disable_media(b); RCU_INIT_POINTER(b->media_ptr, NULL); - if (b->link_req) - tipc_disc_delete(b->link_req); + if (b->disc) + tipc_disc_delete(b->disc); RCU_INIT_POINTER(tn->bearer_list[bearer_id], NULL); kfree_rcu(b, rcu); tipc_mon_delete(net, bearer_id); @@ -395,11 +375,13 @@ static void bearer_disable(struct net *net, struct tipc_bearer *b) int tipc_enable_l2_media(struct net *net, struct tipc_bearer *b, struct nlattr *attr[]) { + char *dev_name = strchr((const char *)b->name, ':') + 1; + int hwaddr_len = b->media->hwaddr_len; + u8 node_id[NODE_ID_LEN] = {0,}; struct net_device *dev; - char *driver_name = strchr((const char *)b->name, ':') + 1; /* Find device with specified name */ - dev = dev_get_by_name(net, driver_name); + dev = dev_get_by_name(net, dev_name); if (!dev) return -ENODEV; if (tipc_mtu_bad(dev, 0)) { @@ -407,6 +389,16 @@ int tipc_enable_l2_media(struct net *net, struct tipc_bearer *b, return -EINVAL; } + /* Autoconfigure own node identity if needed */ + if (!tipc_own_id(net) && hwaddr_len <= NODE_ID_LEN) { + memcpy(node_id, dev->dev_addr, hwaddr_len); + tipc_net_init(net, node_id, 0); + } + if (!tipc_own_id(net)) { + pr_warn("Failed to obtain node identity\n"); + return -EINVAL; + } + /* Associate TIPC bearer with L2 bearer */ rcu_assign_pointer(b->media_ptr, dev); b->pt.dev = dev; @@ -414,7 +406,7 @@ int tipc_enable_l2_media(struct net *net, struct tipc_bearer *b, b->pt.func = tipc_l2_rcv_msg; dev_add_pack(&b->pt); memset(&b->bcast_addr, 0, sizeof(b->bcast_addr)); - memcpy(b->bcast_addr.value, dev->broadcast, b->media->hwaddr_len); + memcpy(b->bcast_addr.value, dev->broadcast, hwaddr_len); b->bcast_addr.media_id = b->media->type_id; b->bcast_addr.broadcast = TIPC_BROADCAST_SUPPORT; b->mtu = dev->mtu; @@ -861,12 +853,10 @@ int __tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info) char *bearer; struct nlattr *attrs[TIPC_NLA_BEARER_MAX + 1]; struct net *net = sock_net(skb->sk); - struct tipc_net *tn = net_generic(net, tipc_net_id); - u32 domain; + u32 domain = 0; u32 prio; prio = TIPC_MEDIA_LINK_PRI; - domain = tn->own_addr & TIPC_ZONE_CLUSTER_MASK; if (!info->attrs[TIPC_NLA_BEARER]) return -EINVAL; diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h index a53613d95bc9..6efcee63a381 100644 --- a/net/tipc/bearer.h +++ b/net/tipc/bearer.h @@ -159,7 +159,7 @@ struct tipc_bearer { u32 tolerance; u32 domain; u32 identity; - struct tipc_link_req *link_req; + struct tipc_discoverer *disc; char net_plane; unsigned long up; }; diff --git a/net/tipc/core.c b/net/tipc/core.c index 0b982d048fb9..5b38f5164281 100644 --- a/net/tipc/core.c +++ b/net/tipc/core.c @@ -56,7 +56,11 @@ static int __net_init tipc_init_net(struct net *net) int err; tn->net_id = 4711; - tn->own_addr = 0; + tn->node_addr = 0; + tn->trial_addr = 0; + tn->addr_trial_end = 0; + memset(tn->node_id, 0, sizeof(tn->node_id)); + memset(tn->node_id_string, 0, sizeof(tn->node_id_string)); tn->mon_threshold = TIPC_DEF_MON_THRESHOLD; get_random_bytes(&tn->random, sizeof(int)); INIT_LIST_HEAD(&tn->node_list); diff --git a/net/tipc/core.h b/net/tipc/core.h index ff8b071654f5..d0f64ca62d02 100644 --- a/net/tipc/core.h +++ b/net/tipc/core.h @@ -1,7 +1,7 @@ /* * net/tipc/core.h: Include file for TIPC global declarations * - * Copyright (c) 2005-2006, 2013 Ericsson AB + * Copyright (c) 2005-2006, 2013-2018 Ericsson AB * Copyright (c) 2005-2007, 2010-2013, Wind River Systems * All rights reserved. * @@ -72,15 +72,22 @@ struct tipc_monitor; #define NODE_HTABLE_SIZE 512 #define MAX_BEARERS 3 #define TIPC_DEF_MON_THRESHOLD 32 +#define NODE_ID_LEN 16 +#define NODE_ID_STR_LEN (NODE_ID_LEN * 2 + 1) extern unsigned int tipc_net_id __read_mostly; extern int sysctl_tipc_rmem[3] __read_mostly; extern int sysctl_tipc_named_timeout __read_mostly; struct tipc_net { - u32 own_addr; + u8 node_id[NODE_ID_LEN]; + u32 node_addr; + u32 trial_addr; + unsigned long addr_trial_end; + char node_id_string[NODE_ID_STR_LEN]; int net_id; int random; + bool legacy_addr_format; /* Node table and node list */ spinlock_t node_list_lock; @@ -131,6 +138,11 @@ static inline struct list_head *tipc_nodes(struct net *net) return &tipc_net(net)->node_list; } +static inline struct name_table *tipc_name_table(struct net *net) +{ + return tipc_net(net)->nametbl; +} + static inline struct tipc_topsrv *tipc_topsrv(struct net *net) { return tipc_net(net)->topsrv; diff --git a/net/tipc/diag.c b/net/tipc/diag.c new file mode 100644 index 000000000000..46d9cd62f781 --- /dev/null +++ b/net/tipc/diag.c @@ -0,0 +1,114 @@ +/* + * net/tipc/diag.c: TIPC socket diag + * + * Copyright (c) 2018, Ericsson AB + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "ASIS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "core.h" +#include "socket.h" +#include <linux/sock_diag.h> +#include <linux/tipc_sockets_diag.h> + +static u64 __tipc_diag_gen_cookie(struct sock *sk) +{ + u32 res[2]; + + sock_diag_save_cookie(sk, res); + return *((u64 *)res); +} + +static int __tipc_add_sock_diag(struct sk_buff *skb, + struct netlink_callback *cb, + struct tipc_sock *tsk) +{ + struct tipc_sock_diag_req *req = nlmsg_data(cb->nlh); + struct nlmsghdr *nlh; + int err; + + nlh = nlmsg_put_answer(skb, cb, SOCK_DIAG_BY_FAMILY, 0, + NLM_F_MULTI); + if (!nlh) + return -EMSGSIZE; + + err = tipc_sk_fill_sock_diag(skb, tsk, req->tidiag_states, + __tipc_diag_gen_cookie); + if (err) + return err; + + nlmsg_end(skb, nlh); + return 0; +} + +static int tipc_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) +{ + return tipc_nl_sk_walk(skb, cb, __tipc_add_sock_diag); +} + +static int tipc_sock_diag_handler_dump(struct sk_buff *skb, + struct nlmsghdr *h) +{ + int hdrlen = sizeof(struct tipc_sock_diag_req); + struct net *net = sock_net(skb->sk); + + if (nlmsg_len(h) < hdrlen) + return -EINVAL; + + if (h->nlmsg_flags & NLM_F_DUMP) { + struct netlink_dump_control c = { + .dump = tipc_diag_dump, + }; + netlink_dump_start(net->diag_nlsk, skb, h, &c); + return 0; + } + return -EOPNOTSUPP; +} + +static const struct sock_diag_handler tipc_sock_diag_handler = { + .family = AF_TIPC, + .dump = tipc_sock_diag_handler_dump, +}; + +static int __init tipc_diag_init(void) +{ + return sock_diag_register(&tipc_sock_diag_handler); +} + +static void __exit tipc_diag_exit(void) +{ + sock_diag_unregister(&tipc_sock_diag_handler); +} + +module_init(tipc_diag_init); +module_exit(tipc_diag_exit); + +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, AF_TIPC); diff --git a/net/tipc/discover.c b/net/tipc/discover.c index 92e4828c6b09..9f666e0650e2 100644 --- a/net/tipc/discover.c +++ b/net/tipc/discover.c @@ -1,7 +1,7 @@ /* * net/tipc/discover.c * - * Copyright (c) 2003-2006, 2014-2015, Ericsson AB + * Copyright (c) 2003-2006, 2014-2018, Ericsson AB * Copyright (c) 2005-2006, 2010-2011, Wind River Systems * All rights reserved. * @@ -39,34 +39,34 @@ #include "discover.h" /* min delay during bearer start up */ -#define TIPC_LINK_REQ_INIT msecs_to_jiffies(125) +#define TIPC_DISC_INIT msecs_to_jiffies(125) /* max delay if bearer has no links */ -#define TIPC_LINK_REQ_FAST msecs_to_jiffies(1000) +#define TIPC_DISC_FAST msecs_to_jiffies(1000) /* max delay if bearer has links */ -#define TIPC_LINK_REQ_SLOW msecs_to_jiffies(60000) +#define TIPC_DISC_SLOW msecs_to_jiffies(60000) /* indicates no timer in use */ -#define TIPC_LINK_REQ_INACTIVE 0xffffffff +#define TIPC_DISC_INACTIVE 0xffffffff /** - * struct tipc_link_req - information about an ongoing link setup request + * struct tipc_discoverer - information about an ongoing link setup request * @bearer_id: identity of bearer issuing requests * @net: network namespace instance * @dest: destination address for request messages * @domain: network domain to which links can be established * @num_nodes: number of nodes currently discovered (i.e. with an active link) * @lock: spinlock for controlling access to requests - * @buf: request message to be (repeatedly) sent + * @skb: request message to be (repeatedly) sent * @timer: timer governing period between requests * @timer_intv: current interval between requests (in ms) */ -struct tipc_link_req { +struct tipc_discoverer { u32 bearer_id; struct tipc_media_addr dest; struct net *net; u32 domain; int num_nodes; spinlock_t lock; - struct sk_buff *buf; + struct sk_buff *skb; struct timer_list timer; unsigned long timer_intv; }; @@ -77,22 +77,42 @@ struct tipc_link_req { * @type: message type (request or response) * @b: ptr to bearer issuing message */ -static void tipc_disc_init_msg(struct net *net, struct sk_buff *buf, u32 type, - struct tipc_bearer *b) +static void tipc_disc_init_msg(struct net *net, struct sk_buff *skb, + u32 mtyp, struct tipc_bearer *b) { - struct tipc_net *tn = net_generic(net, tipc_net_id); - struct tipc_msg *msg; + struct tipc_net *tn = tipc_net(net); u32 dest_domain = b->domain; + struct tipc_msg *hdr; - msg = buf_msg(buf); - tipc_msg_init(tn->own_addr, msg, LINK_CONFIG, type, + hdr = buf_msg(skb); + tipc_msg_init(tn->trial_addr, hdr, LINK_CONFIG, mtyp, MAX_H_SIZE, dest_domain); - msg_set_non_seq(msg, 1); - msg_set_node_sig(msg, tn->random); - msg_set_node_capabilities(msg, TIPC_NODE_CAPABILITIES); - msg_set_dest_domain(msg, dest_domain); - msg_set_bc_netid(msg, tn->net_id); - b->media->addr2msg(msg_media_addr(msg), &b->addr); + msg_set_size(hdr, MAX_H_SIZE + NODE_ID_LEN); + msg_set_non_seq(hdr, 1); + msg_set_node_sig(hdr, tn->random); + msg_set_node_capabilities(hdr, TIPC_NODE_CAPABILITIES); + msg_set_dest_domain(hdr, dest_domain); + msg_set_bc_netid(hdr, tn->net_id); + b->media->addr2msg(msg_media_addr(hdr), &b->addr); + msg_set_node_id(hdr, tipc_own_id(net)); +} + +static void tipc_disc_msg_xmit(struct net *net, u32 mtyp, u32 dst, + u32 src, u32 sugg_addr, + struct tipc_media_addr *maddr, + struct tipc_bearer *b) +{ + struct tipc_msg *hdr; + struct sk_buff *skb; + + skb = tipc_buf_acquire(MAX_H_SIZE + NODE_ID_LEN, GFP_ATOMIC); + if (!skb) + return; + hdr = buf_msg(skb); + tipc_disc_init_msg(net, skb, mtyp, b); + msg_set_sugg_node_addr(hdr, sugg_addr); + msg_set_dest_domain(hdr, dst); + tipc_bearer_xmit_skb(net, b->identity, skb, maddr); } /** @@ -104,161 +124,207 @@ static void tipc_disc_init_msg(struct net *net, struct sk_buff *buf, u32 type, static void disc_dupl_alert(struct tipc_bearer *b, u32 node_addr, struct tipc_media_addr *media_addr) { - char node_addr_str[16]; char media_addr_str[64]; - tipc_addr_string_fill(node_addr_str, node_addr); tipc_media_addr_printf(media_addr_str, sizeof(media_addr_str), media_addr); - pr_warn("Duplicate %s using %s seen on <%s>\n", node_addr_str, + pr_warn("Duplicate %x using %s seen on <%s>\n", node_addr, media_addr_str, b->name); } +/* tipc_disc_addr_trial(): - handle an address uniqueness trial from peer + */ +static bool tipc_disc_addr_trial_msg(struct tipc_discoverer *d, + struct tipc_media_addr *maddr, + struct tipc_bearer *b, + u32 dst, u32 src, + u32 sugg_addr, + u8 *peer_id, + int mtyp) +{ + struct net *net = d->net; + struct tipc_net *tn = tipc_net(net); + bool trial = time_before(jiffies, tn->addr_trial_end); + u32 self = tipc_own_addr(net); + + if (mtyp == DSC_TRIAL_FAIL_MSG) { + if (!trial) + return true; + + /* Ignore if somebody else already gave new suggestion */ + if (dst != tn->trial_addr) + return true; + + /* Otherwise update trial address and restart trial period */ + tn->trial_addr = sugg_addr; + msg_set_prevnode(buf_msg(d->skb), sugg_addr); + tn->addr_trial_end = jiffies + msecs_to_jiffies(1000); + return true; + } + + /* Apply trial address if we just left trial period */ + if (!trial && !self) { + tipc_net_finalize(net, tn->trial_addr); + msg_set_type(buf_msg(d->skb), DSC_REQ_MSG); + } + + if (mtyp != DSC_TRIAL_MSG) + return false; + + sugg_addr = tipc_node_try_addr(net, peer_id, src); + if (sugg_addr) + tipc_disc_msg_xmit(net, DSC_TRIAL_FAIL_MSG, src, + self, sugg_addr, maddr, b); + return true; +} + /** * tipc_disc_rcv - handle incoming discovery message (request or response) - * @net: the applicable net namespace - * @buf: buffer containing message - * @bearer: bearer that message arrived on + * @net: applicable net namespace + * @skb: buffer containing message + * @b: bearer that message arrived on */ void tipc_disc_rcv(struct net *net, struct sk_buff *skb, - struct tipc_bearer *bearer) + struct tipc_bearer *b) { - struct tipc_net *tn = net_generic(net, tipc_net_id); - struct tipc_media_addr maddr; - struct sk_buff *rskb; + struct tipc_net *tn = tipc_net(net); struct tipc_msg *hdr = buf_msg(skb); - u32 ddom = msg_dest_domain(hdr); - u32 onode = msg_prevnode(hdr); + u16 caps = msg_node_capabilities(hdr); + bool legacy = tn->legacy_addr_format; + u32 sugg = msg_sugg_node_addr(hdr); + u32 signature = msg_node_sig(hdr); + u8 peer_id[NODE_ID_LEN] = {0,}; + u32 dst = msg_dest_domain(hdr); u32 net_id = msg_bc_netid(hdr); + struct tipc_media_addr maddr; + u32 src = msg_prevnode(hdr); u32 mtyp = msg_type(hdr); - u32 signature = msg_node_sig(hdr); - u16 caps = msg_node_capabilities(hdr); - bool respond = false; bool dupl_addr = false; + bool respond = false; + u32 self; int err; - err = bearer->media->msg2addr(bearer, &maddr, msg_media_addr(hdr)); - kfree_skb(skb); - if (err) - return; + skb_linearize(skb); + hdr = buf_msg(skb); - /* Ensure message from node is valid and communication is permitted */ - if (net_id != tn->net_id) + if (caps & TIPC_NODE_ID128) + memcpy(peer_id, msg_node_id(hdr), NODE_ID_LEN); + else + sprintf(peer_id, "%x", src); + + err = b->media->msg2addr(b, &maddr, msg_media_addr(hdr)); + kfree_skb(skb); + if (err || maddr.broadcast) { + pr_warn_ratelimited("Rcv corrupt discovery message\n"); return; - if (maddr.broadcast) + } + /* Ignore discovery messages from own node */ + if (!memcmp(&maddr, &b->addr, sizeof(maddr))) return; - if (!tipc_addr_domain_valid(ddom)) + if (net_id != tn->net_id) return; - if (!tipc_addr_node_valid(onode)) + if (tipc_disc_addr_trial_msg(b->disc, &maddr, b, dst, + src, sugg, peer_id, mtyp)) return; + self = tipc_own_addr(net); - if (in_own_node(net, onode)) { - if (memcmp(&maddr, &bearer->addr, sizeof(maddr))) - disc_dupl_alert(bearer, tn->own_addr, &maddr); + /* Message from somebody using this node's address */ + if (in_own_node(net, src)) { + disc_dupl_alert(b, self, &maddr); return; } - if (!tipc_in_scope(ddom, tn->own_addr)) + if (!tipc_in_scope(legacy, dst, self)) return; - if (!tipc_in_scope(bearer->domain, onode)) + if (!tipc_in_scope(legacy, b->domain, src)) return; - - tipc_node_check_dest(net, onode, bearer, caps, signature, + tipc_node_check_dest(net, src, peer_id, b, caps, signature, &maddr, &respond, &dupl_addr); if (dupl_addr) - disc_dupl_alert(bearer, onode, &maddr); - - /* Send response, if necessary */ - if (respond && (mtyp == DSC_REQ_MSG)) { - rskb = tipc_buf_acquire(MAX_H_SIZE, GFP_ATOMIC); - if (!rskb) - return; - tipc_disc_init_msg(net, rskb, DSC_RESP_MSG, bearer); - tipc_bearer_xmit_skb(net, bearer->identity, rskb, &maddr); - } + disc_dupl_alert(b, src, &maddr); + if (!respond) + return; + if (mtyp != DSC_REQ_MSG) + return; + tipc_disc_msg_xmit(net, DSC_RESP_MSG, src, self, 0, &maddr, b); } -/** - * disc_update - update frequency of periodic link setup requests - * @req: ptr to link request structure - * - * Reinitiates discovery process if discovery object has no associated nodes - * and is either not currently searching or is searching at a slow rate +/* tipc_disc_add_dest - increment set of discovered nodes */ -static void disc_update(struct tipc_link_req *req) +void tipc_disc_add_dest(struct tipc_discoverer *d) { - if (!req->num_nodes) { - if ((req->timer_intv == TIPC_LINK_REQ_INACTIVE) || - (req->timer_intv > TIPC_LINK_REQ_FAST)) { - req->timer_intv = TIPC_LINK_REQ_INIT; - mod_timer(&req->timer, jiffies + req->timer_intv); - } - } + spin_lock_bh(&d->lock); + d->num_nodes++; + spin_unlock_bh(&d->lock); } -/** - * tipc_disc_add_dest - increment set of discovered nodes - * @req: ptr to link request structure +/* tipc_disc_remove_dest - decrement set of discovered nodes */ -void tipc_disc_add_dest(struct tipc_link_req *req) +void tipc_disc_remove_dest(struct tipc_discoverer *d) { - spin_lock_bh(&req->lock); - req->num_nodes++; - spin_unlock_bh(&req->lock); -} + int intv, num; -/** - * tipc_disc_remove_dest - decrement set of discovered nodes - * @req: ptr to link request structure - */ -void tipc_disc_remove_dest(struct tipc_link_req *req) -{ - spin_lock_bh(&req->lock); - req->num_nodes--; - disc_update(req); - spin_unlock_bh(&req->lock); + spin_lock_bh(&d->lock); + d->num_nodes--; + num = d->num_nodes; + intv = d->timer_intv; + if (!num && (intv == TIPC_DISC_INACTIVE || intv > TIPC_DISC_FAST)) { + d->timer_intv = TIPC_DISC_INIT; + mod_timer(&d->timer, jiffies + d->timer_intv); + } + spin_unlock_bh(&d->lock); } -/** - * disc_timeout - send a periodic link setup request - * @data: ptr to link request structure - * +/* tipc_disc_timeout - send a periodic link setup request * Called whenever a link setup request timer associated with a bearer expires. + * - Keep doubling time between sent request until limit is reached; + * - Hold at fast polling rate if we don't have any associated nodes + * - Otherwise hold at slow polling rate */ -static void disc_timeout(struct timer_list *t) +static void tipc_disc_timeout(struct timer_list *t) { - struct tipc_link_req *req = from_timer(req, t, timer); - struct sk_buff *skb; - int max_delay; + struct tipc_discoverer *d = from_timer(d, t, timer); + struct tipc_net *tn = tipc_net(d->net); + u32 self = tipc_own_addr(d->net); + struct tipc_media_addr maddr; + struct sk_buff *skb = NULL; + struct net *net = d->net; + u32 bearer_id; - spin_lock_bh(&req->lock); + spin_lock_bh(&d->lock); /* Stop searching if only desired node has been found */ - if (tipc_node(req->domain) && req->num_nodes) { - req->timer_intv = TIPC_LINK_REQ_INACTIVE; + if (tipc_node(d->domain) && d->num_nodes) { + d->timer_intv = TIPC_DISC_INACTIVE; goto exit; } - /* - * Send discovery message, then update discovery timer - * - * Keep doubling time between requests until limit is reached; - * hold at fast polling rate if don't have any associated nodes, - * otherwise hold at slow polling rate - */ - skb = skb_clone(req->buf, GFP_ATOMIC); - if (skb) - tipc_bearer_xmit_skb(req->net, req->bearer_id, skb, &req->dest); - req->timer_intv *= 2; - if (req->num_nodes) - max_delay = TIPC_LINK_REQ_SLOW; - else - max_delay = TIPC_LINK_REQ_FAST; - if (req->timer_intv > max_delay) - req->timer_intv = max_delay; + /* Did we just leave the address trial period ? */ + if (!self && !time_before(jiffies, tn->addr_trial_end)) { + self = tn->trial_addr; + tipc_net_finalize(net, self); + msg_set_prevnode(buf_msg(d->skb), self); + msg_set_type(buf_msg(d->skb), DSC_REQ_MSG); + } + + /* Adjust timeout interval according to discovery phase */ + if (time_before(jiffies, tn->addr_trial_end)) { + d->timer_intv = TIPC_DISC_INIT; + } else { + d->timer_intv *= 2; + if (d->num_nodes && d->timer_intv > TIPC_DISC_SLOW) + d->timer_intv = TIPC_DISC_SLOW; + else if (!d->num_nodes && d->timer_intv > TIPC_DISC_FAST) + d->timer_intv = TIPC_DISC_FAST; + } - mod_timer(&req->timer, jiffies + req->timer_intv); + mod_timer(&d->timer, jiffies + d->timer_intv); + memcpy(&maddr, &d->dest, sizeof(maddr)); + skb = skb_clone(d->skb, GFP_ATOMIC); + bearer_id = d->bearer_id; exit: - spin_unlock_bh(&req->lock); + spin_unlock_bh(&d->lock); + if (skb) + tipc_bearer_xmit_skb(net, bearer_id, skb, &maddr); } /** @@ -273,41 +339,47 @@ exit: int tipc_disc_create(struct net *net, struct tipc_bearer *b, struct tipc_media_addr *dest, struct sk_buff **skb) { - struct tipc_link_req *req; + struct tipc_net *tn = tipc_net(net); + struct tipc_discoverer *d; - req = kmalloc(sizeof(*req), GFP_ATOMIC); - if (!req) + d = kmalloc(sizeof(*d), GFP_ATOMIC); + if (!d) return -ENOMEM; - req->buf = tipc_buf_acquire(MAX_H_SIZE, GFP_ATOMIC); - if (!req->buf) { - kfree(req); + d->skb = tipc_buf_acquire(MAX_H_SIZE + NODE_ID_LEN, GFP_ATOMIC); + if (!d->skb) { + kfree(d); return -ENOMEM; } + tipc_disc_init_msg(net, d->skb, DSC_REQ_MSG, b); - tipc_disc_init_msg(net, req->buf, DSC_REQ_MSG, b); - memcpy(&req->dest, dest, sizeof(*dest)); - req->net = net; - req->bearer_id = b->identity; - req->domain = b->domain; - req->num_nodes = 0; - req->timer_intv = TIPC_LINK_REQ_INIT; - spin_lock_init(&req->lock); - timer_setup(&req->timer, disc_timeout, 0); - mod_timer(&req->timer, jiffies + req->timer_intv); - b->link_req = req; - *skb = skb_clone(req->buf, GFP_ATOMIC); + /* Do we need an address trial period first ? */ + if (!tipc_own_addr(net)) { + tn->addr_trial_end = jiffies + msecs_to_jiffies(1000); + msg_set_type(buf_msg(d->skb), DSC_TRIAL_MSG); + } + memcpy(&d->dest, dest, sizeof(*dest)); + d->net = net; + d->bearer_id = b->identity; + d->domain = b->domain; + d->num_nodes = 0; + d->timer_intv = TIPC_DISC_INIT; + spin_lock_init(&d->lock); + timer_setup(&d->timer, tipc_disc_timeout, 0); + mod_timer(&d->timer, jiffies + d->timer_intv); + b->disc = d; + *skb = skb_clone(d->skb, GFP_ATOMIC); return 0; } /** * tipc_disc_delete - destroy object sending periodic link setup requests - * @req: ptr to link request structure + * @d: ptr to link duest structure */ -void tipc_disc_delete(struct tipc_link_req *req) +void tipc_disc_delete(struct tipc_discoverer *d) { - del_timer_sync(&req->timer); - kfree_skb(req->buf); - kfree(req); + del_timer_sync(&d->timer); + kfree_skb(d->skb); + kfree(d); } /** @@ -318,19 +390,21 @@ void tipc_disc_delete(struct tipc_link_req *req) */ void tipc_disc_reset(struct net *net, struct tipc_bearer *b) { - struct tipc_link_req *req = b->link_req; + struct tipc_discoverer *d = b->disc; + struct tipc_media_addr maddr; struct sk_buff *skb; - spin_lock_bh(&req->lock); - tipc_disc_init_msg(net, req->buf, DSC_REQ_MSG, b); - req->net = net; - req->bearer_id = b->identity; - req->domain = b->domain; - req->num_nodes = 0; - req->timer_intv = TIPC_LINK_REQ_INIT; - mod_timer(&req->timer, jiffies + req->timer_intv); - skb = skb_clone(req->buf, GFP_ATOMIC); + spin_lock_bh(&d->lock); + tipc_disc_init_msg(net, d->skb, DSC_REQ_MSG, b); + d->net = net; + d->bearer_id = b->identity; + d->domain = b->domain; + d->num_nodes = 0; + d->timer_intv = TIPC_DISC_INIT; + memcpy(&maddr, &d->dest, sizeof(maddr)); + mod_timer(&d->timer, jiffies + d->timer_intv); + skb = skb_clone(d->skb, GFP_ATOMIC); + spin_unlock_bh(&d->lock); if (skb) - tipc_bearer_xmit_skb(net, req->bearer_id, skb, &req->dest); - spin_unlock_bh(&req->lock); + tipc_bearer_xmit_skb(net, b->identity, skb, &maddr); } diff --git a/net/tipc/discover.h b/net/tipc/discover.h index b80a335389c0..521d96c41dfd 100644 --- a/net/tipc/discover.h +++ b/net/tipc/discover.h @@ -37,14 +37,14 @@ #ifndef _TIPC_DISCOVER_H #define _TIPC_DISCOVER_H -struct tipc_link_req; +struct tipc_discoverer; int tipc_disc_create(struct net *net, struct tipc_bearer *b_ptr, struct tipc_media_addr *dest, struct sk_buff **skb); -void tipc_disc_delete(struct tipc_link_req *req); +void tipc_disc_delete(struct tipc_discoverer *req); void tipc_disc_reset(struct net *net, struct tipc_bearer *b_ptr); -void tipc_disc_add_dest(struct tipc_link_req *req); -void tipc_disc_remove_dest(struct tipc_link_req *req); +void tipc_disc_add_dest(struct tipc_discoverer *req); +void tipc_disc_remove_dest(struct tipc_discoverer *req); void tipc_disc_rcv(struct net *net, struct sk_buff *buf, struct tipc_bearer *b_ptr); diff --git a/net/tipc/group.c b/net/tipc/group.c index 03086ccb7746..d7a7befeddd4 100644 --- a/net/tipc/group.c +++ b/net/tipc/group.c @@ -189,6 +189,7 @@ struct tipc_group *tipc_group_create(struct net *net, u32 portid, grp->loopback = mreq->flags & TIPC_GROUP_LOOPBACK; grp->events = mreq->flags & TIPC_GROUP_MEMBER_EVTS; grp->open = group_is_open; + *grp->open = false; filter |= global ? TIPC_SUB_CLUSTER_SCOPE : TIPC_SUB_NODE_SCOPE; if (tipc_topsrv_kern_subscr(net, portid, type, 0, ~0, filter, &grp->subid)) diff --git a/net/tipc/link.c b/net/tipc/link.c index 3c230466804d..1289b4ba404f 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -434,14 +434,16 @@ char *tipc_link_name(struct tipc_link *l) */ bool tipc_link_create(struct net *net, char *if_name, int bearer_id, int tolerance, char net_plane, u32 mtu, int priority, - int window, u32 session, u32 ownnode, u32 peer, - u16 peer_caps, + int window, u32 session, u32 self, + u32 peer, u8 *peer_id, u16 peer_caps, struct tipc_link *bc_sndlink, struct tipc_link *bc_rcvlink, struct sk_buff_head *inputq, struct sk_buff_head *namedq, struct tipc_link **link) { + char peer_str[NODE_ID_STR_LEN] = {0,}; + char self_str[NODE_ID_STR_LEN] = {0,}; struct tipc_link *l; l = kzalloc(sizeof(*l), GFP_ATOMIC); @@ -450,10 +452,18 @@ bool tipc_link_create(struct net *net, char *if_name, int bearer_id, *link = l; l->session = session; - /* Note: peer i/f name is completed by reset/activate message */ - sprintf(l->name, "%u.%u.%u:%s-%u.%u.%u:unknown", - tipc_zone(ownnode), tipc_cluster(ownnode), tipc_node(ownnode), - if_name, tipc_zone(peer), tipc_cluster(peer), tipc_node(peer)); + /* Set link name for unicast links only */ + if (peer_id) { + tipc_nodeid2string(self_str, tipc_own_id(net)); + if (strlen(self_str) > 16) + sprintf(self_str, "%x", self); + tipc_nodeid2string(peer_str, peer_id); + if (strlen(peer_str) > 16) + sprintf(peer_str, "%x", peer); + } + /* Peer i/f name will be completed by reset/activate message */ + sprintf(l->name, "%s:%s-%s:unknown", self_str, if_name, peer_str); + strcpy(l->if_name, if_name); l->addr = peer; l->peer_caps = peer_caps; @@ -501,7 +511,7 @@ bool tipc_link_bc_create(struct net *net, u32 ownnode, u32 peer, struct tipc_link *l; if (!tipc_link_create(net, "", MAX_BEARERS, 0, 'Z', mtu, 0, window, - 0, ownnode, peer, peer_caps, bc_sndlink, + 0, ownnode, peer, NULL, peer_caps, bc_sndlink, NULL, inputq, namedq, link)) return false; @@ -1938,11 +1948,11 @@ msg_full: int __tipc_nl_add_link(struct net *net, struct tipc_nl_msg *msg, struct tipc_link *link, int nlflags) { - int err; - void *hdr; + u32 self = tipc_own_addr(net); struct nlattr *attrs; struct nlattr *prop; - struct tipc_net *tn = net_generic(net, tipc_net_id); + void *hdr; + int err; hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family, nlflags, TIPC_NL_LINK_GET); @@ -1955,8 +1965,7 @@ int __tipc_nl_add_link(struct net *net, struct tipc_nl_msg *msg, if (nla_put_string(msg->skb, TIPC_NLA_LINK_NAME, link->name)) goto attr_msg_full; - if (nla_put_u32(msg->skb, TIPC_NLA_LINK_DEST, - tipc_cluster_mask(tn->own_addr))) + if (nla_put_u32(msg->skb, TIPC_NLA_LINK_DEST, tipc_cluster_mask(self))) goto attr_msg_full; if (nla_put_u32(msg->skb, TIPC_NLA_LINK_MTU, link->mtu)) goto attr_msg_full; diff --git a/net/tipc/link.h b/net/tipc/link.h index d1bd1787a768..ec59348a81e8 100644 --- a/net/tipc/link.h +++ b/net/tipc/link.h @@ -73,8 +73,8 @@ enum { bool tipc_link_create(struct net *net, char *if_name, int bearer_id, int tolerance, char net_plane, u32 mtu, int priority, - int window, u32 session, u32 ownnode, u32 peer, - u16 peer_caps, + int window, u32 session, u32 ownnode, + u32 peer, u8 *peer_id, u16 peer_caps, struct tipc_link *bc_sndlink, struct tipc_link *bc_rcvlink, struct sk_buff_head *inputq, diff --git a/net/tipc/msg.c b/net/tipc/msg.c index 4e1c6f6450bb..b6c45dccba3d 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -580,7 +580,7 @@ bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, int *err) msg = buf_msg(skb); if (msg_reroute_cnt(msg)) return false; - dnode = addr_domain(net, msg_lookup_scope(msg)); + dnode = tipc_scope2node(net, msg_lookup_scope(msg)); dport = tipc_nametbl_translate(net, msg_nametype(msg), msg_nameinst(msg), &dnode); if (!dport) diff --git a/net/tipc/msg.h b/net/tipc/msg.h index b4ba1b4f9ae7..a4e944d59394 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -550,6 +550,8 @@ static inline void msg_set_nameupper(struct tipc_msg *m, u32 n) */ #define DSC_REQ_MSG 0 #define DSC_RESP_MSG 1 +#define DSC_TRIAL_MSG 2 +#define DSC_TRIAL_FAIL_MSG 3 /* * Group protocol message types @@ -627,7 +629,6 @@ static inline void msg_set_bcgap_to(struct tipc_msg *m, u32 n) msg_set_bits(m, 2, 0, 0xffff, n); } - /* * Word 4 */ @@ -925,6 +926,26 @@ static inline bool msg_is_reset(struct tipc_msg *hdr) return (msg_user(hdr) == LINK_PROTOCOL) && (msg_type(hdr) == RESET_MSG); } +static inline u32 msg_sugg_node_addr(struct tipc_msg *m) +{ + return msg_word(m, 14); +} + +static inline void msg_set_sugg_node_addr(struct tipc_msg *m, u32 n) +{ + msg_set_word(m, 14, n); +} + +static inline void msg_set_node_id(struct tipc_msg *hdr, u8 *id) +{ + memcpy(msg_data(hdr), id, 16); +} + +static inline u8 *msg_node_id(struct tipc_msg *hdr) +{ + return (u8 *)msg_data(hdr); +} + struct sk_buff *tipc_buf_acquire(u32 size, gfp_t gfp); bool tipc_msg_validate(struct sk_buff **_skb); bool tipc_msg_reverse(u32 own_addr, struct sk_buff **skb, int err); diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c index 23f8899e0f8c..8240a85b0d0c 100644 --- a/net/tipc/name_distr.c +++ b/net/tipc/name_distr.c @@ -56,7 +56,7 @@ static void publ_to_item(struct distr_item *i, struct publication *p) i->type = htonl(p->type); i->lower = htonl(p->lower); i->upper = htonl(p->upper); - i->ref = htonl(p->ref); + i->port = htonl(p->port); i->key = htonl(p->key); } @@ -68,14 +68,14 @@ static void publ_to_item(struct distr_item *i, struct publication *p) static struct sk_buff *named_prepare_buf(struct net *net, u32 type, u32 size, u32 dest) { - struct tipc_net *tn = net_generic(net, tipc_net_id); struct sk_buff *buf = tipc_buf_acquire(INT_H_SIZE + size, GFP_ATOMIC); + u32 self = tipc_own_addr(net); struct tipc_msg *msg; if (buf != NULL) { msg = buf_msg(buf); - tipc_msg_init(tn->own_addr, msg, NAME_DISTRIBUTOR, type, - INT_H_SIZE, dest); + tipc_msg_init(self, msg, NAME_DISTRIBUTOR, + type, INT_H_SIZE, dest); msg_set_size(msg, INT_H_SIZE + size); } return buf; @@ -86,25 +86,25 @@ static struct sk_buff *named_prepare_buf(struct net *net, u32 type, u32 size, */ struct sk_buff *tipc_named_publish(struct net *net, struct publication *publ) { - struct tipc_net *tn = net_generic(net, tipc_net_id); - struct sk_buff *buf; + struct name_table *nt = tipc_name_table(net); struct distr_item *item; + struct sk_buff *skb; - list_add_tail_rcu(&publ->local_list, - &tn->nametbl->publ_list[publ->scope]); - - if (publ->scope == TIPC_NODE_SCOPE) + if (publ->scope == TIPC_NODE_SCOPE) { + list_add_tail_rcu(&publ->binding_node, &nt->node_scope); return NULL; + } + list_add_tail_rcu(&publ->binding_node, &nt->cluster_scope); - buf = named_prepare_buf(net, PUBLICATION, ITEM_SIZE, 0); - if (!buf) { + skb = named_prepare_buf(net, PUBLICATION, ITEM_SIZE, 0); + if (!skb) { pr_warn("Publication distribution failure\n"); return NULL; } - item = (struct distr_item *)msg_data(buf_msg(buf)); + item = (struct distr_item *)msg_data(buf_msg(skb)); publ_to_item(item, publ); - return buf; + return skb; } /** @@ -115,7 +115,7 @@ struct sk_buff *tipc_named_withdraw(struct net *net, struct publication *publ) struct sk_buff *buf; struct distr_item *item; - list_del(&publ->local_list); + list_del(&publ->binding_node); if (publ->scope == TIPC_NODE_SCOPE) return NULL; @@ -147,7 +147,7 @@ static void named_distribute(struct net *net, struct sk_buff_head *list, ITEM_SIZE) * ITEM_SIZE; u32 msg_rem = msg_dsz; - list_for_each_entry(publ, pls, local_list) { + list_for_each_entry(publ, pls, binding_node) { /* Prepare next buffer: */ if (!skb) { skb = named_prepare_buf(net, PUBLICATION, msg_rem, @@ -184,16 +184,13 @@ static void named_distribute(struct net *net, struct sk_buff_head *list, */ void tipc_named_node_up(struct net *net, u32 dnode) { - struct tipc_net *tn = net_generic(net, tipc_net_id); + struct name_table *nt = tipc_name_table(net); struct sk_buff_head head; __skb_queue_head_init(&head); rcu_read_lock(); - named_distribute(net, &head, dnode, - &tn->nametbl->publ_list[TIPC_CLUSTER_SCOPE]); - named_distribute(net, &head, dnode, - &tn->nametbl->publ_list[TIPC_ZONE_SCOPE]); + named_distribute(net, &head, dnode, &nt->cluster_scope); rcu_read_unlock(); tipc_node_xmit(net, &head, dnode, 0); @@ -212,15 +209,15 @@ static void tipc_publ_purge(struct net *net, struct publication *publ, u32 addr) spin_lock_bh(&tn->nametbl_lock); p = tipc_nametbl_remove_publ(net, publ->type, publ->lower, - publ->node, publ->ref, publ->key); + publ->node, publ->port, publ->key); if (p) - tipc_node_unsubscribe(net, &p->nodesub_list, addr); + tipc_node_unsubscribe(net, &p->binding_node, addr); spin_unlock_bh(&tn->nametbl_lock); if (p != publ) { pr_err("Unable to remove publication from failed node\n" - " (type=%u, lower=%u, node=0x%x, ref=%u, key=%u)\n", - publ->type, publ->lower, publ->node, publ->ref, + " (type=%u, lower=%u, node=0x%x, port=%u, key=%u)\n", + publ->type, publ->lower, publ->node, publ->port, publ->key); } @@ -249,7 +246,7 @@ void tipc_publ_notify(struct net *net, struct list_head *nsub_list, u32 addr) { struct publication *publ, *tmp; - list_for_each_entry_safe(publ, tmp, nsub_list, nodesub_list) + list_for_each_entry_safe(publ, tmp, nsub_list, binding_node) tipc_publ_purge(net, publ, addr); tipc_dist_queue_purge(net, addr); } @@ -271,18 +268,18 @@ static bool tipc_update_nametbl(struct net *net, struct distr_item *i, ntohl(i->lower), ntohl(i->upper), TIPC_CLUSTER_SCOPE, node, - ntohl(i->ref), ntohl(i->key)); + ntohl(i->port), ntohl(i->key)); if (publ) { - tipc_node_subscribe(net, &publ->nodesub_list, node); + tipc_node_subscribe(net, &publ->binding_node, node); return true; } } else if (dtype == WITHDRAWAL) { publ = tipc_nametbl_remove_publ(net, ntohl(i->type), ntohl(i->lower), - node, ntohl(i->ref), + node, ntohl(i->port), ntohl(i->key)); if (publ) { - tipc_node_unsubscribe(net, &publ->nodesub_list, node); + tipc_node_unsubscribe(net, &publ->binding_node, node); kfree_rcu(publ, rcu); return true; } @@ -321,7 +318,6 @@ void tipc_named_process_backlog(struct net *net) { struct distr_queue_item *e, *tmp; struct tipc_net *tn = net_generic(net, tipc_net_id); - char addr[16]; unsigned long now = get_jiffies_64(); list_for_each_entry_safe(e, tmp, &tn->dist_queue, next) { @@ -329,12 +325,11 @@ void tipc_named_process_backlog(struct net *net) if (!tipc_update_nametbl(net, &e->i, e->node, e->dtype)) continue; } else { - tipc_addr_string_fill(addr, e->node); - pr_warn_ratelimited("Dropping name table update (%d) of {%u, %u, %u} from %s key=%u\n", + pr_warn_ratelimited("Dropping name table update (%d) of {%u, %u, %u} from %x key=%u\n", e->dtype, ntohl(e->i.type), ntohl(e->i.lower), ntohl(e->i.upper), - addr, ntohl(e->i.key)); + e->node, ntohl(e->i.key)); } list_del(&e->next); kfree(e); @@ -382,16 +377,17 @@ void tipc_named_rcv(struct net *net, struct sk_buff_head *inputq) */ void tipc_named_reinit(struct net *net) { - struct tipc_net *tn = net_generic(net, tipc_net_id); + struct name_table *nt = tipc_name_table(net); + struct tipc_net *tn = tipc_net(net); struct publication *publ; - int scope; + u32 self = tipc_own_addr(net); spin_lock_bh(&tn->nametbl_lock); - for (scope = TIPC_ZONE_SCOPE; scope <= TIPC_NODE_SCOPE; scope++) - list_for_each_entry_rcu(publ, &tn->nametbl->publ_list[scope], - local_list) - publ->node = tn->own_addr; + list_for_each_entry_rcu(publ, &nt->node_scope, binding_node) + publ->node = self; + list_for_each_entry_rcu(publ, &nt->cluster_scope, binding_node) + publ->node = self; spin_unlock_bh(&tn->nametbl_lock); } diff --git a/net/tipc/name_distr.h b/net/tipc/name_distr.h index 1264ba0af937..4753e628d7c4 100644 --- a/net/tipc/name_distr.h +++ b/net/tipc/name_distr.h @@ -63,7 +63,7 @@ struct distr_item { __be32 type; __be32 lower; __be32 upper; - __be32 ref; + __be32 port; __be32 key; }; diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c index e01c9c691ba2..4359605b1bec 100644 --- a/net/tipc/name_table.c +++ b/net/tipc/name_table.c @@ -1,7 +1,7 @@ /* * net/tipc/name_table.c: TIPC name table code * - * Copyright (c) 2000-2006, 2014-2015, Ericsson AB + * Copyright (c) 2000-2006, 2014-2018, Ericsson AB * Copyright (c) 2004-2008, 2010-2014, Wind River Systems * All rights reserved. * @@ -50,24 +50,12 @@ /** * struct name_info - name sequence publication info - * @node_list: circular list of publications made by own node - * @cluster_list: circular list of publications made by own cluster - * @zone_list: circular list of publications made by own zone - * @node_list_size: number of entries in "node_list" - * @cluster_list_size: number of entries in "cluster_list" - * @zone_list_size: number of entries in "zone_list" - * - * Note: The zone list always contains at least one entry, since all - * publications of the associated name sequence belong to it. - * (The cluster and node lists may be empty.) + * @node_list: list of publications on own node of this <type,lower,upper> + * @all_publ: list of all publications of this <type,lower,upper> */ struct name_info { - struct list_head node_list; - struct list_head cluster_list; - struct list_head zone_list; - u32 node_list_size; - u32 cluster_list_size; - u32 zone_list_size; + struct list_head local_publ; + struct list_head all_publ; }; /** @@ -114,7 +102,7 @@ static int hash(int x) * publ_create - create a publication structure */ static struct publication *publ_create(u32 type, u32 lower, u32 upper, - u32 scope, u32 node, u32 port_ref, + u32 scope, u32 node, u32 port, u32 key) { struct publication *publ = kzalloc(sizeof(*publ), GFP_ATOMIC); @@ -128,9 +116,9 @@ static struct publication *publ_create(u32 type, u32 lower, u32 upper, publ->upper = upper; publ->scope = scope; publ->node = node; - publ->ref = port_ref; + publ->port = port; publ->key = key; - INIT_LIST_HEAD(&publ->pport_list); + INIT_LIST_HEAD(&publ->binding_sock); return publ; } @@ -249,9 +237,9 @@ static struct publication *tipc_nameseq_insert_publ(struct net *net, info = sseq->info; /* Check if an identical publication already exists */ - list_for_each_entry(publ, &info->zone_list, zone_list) { - if ((publ->ref == port) && (publ->key == key) && - (!publ->node || (publ->node == node))) + list_for_each_entry(publ, &info->all_publ, all_publ) { + if (publ->port == port && publ->key == key && + (!publ->node || publ->node == node)) return NULL; } } else { @@ -290,9 +278,8 @@ static struct publication *tipc_nameseq_insert_publ(struct net *net, return NULL; } - INIT_LIST_HEAD(&info->node_list); - INIT_LIST_HEAD(&info->cluster_list); - INIT_LIST_HEAD(&info->zone_list); + INIT_LIST_HEAD(&info->local_publ); + INIT_LIST_HEAD(&info->all_publ); /* Insert new sub-sequence */ sseq = &nseq->sseqs[inspos]; @@ -311,23 +298,15 @@ static struct publication *tipc_nameseq_insert_publ(struct net *net, if (!publ) return NULL; - list_add(&publ->zone_list, &info->zone_list); - info->zone_list_size++; - - if (in_own_cluster(net, node)) { - list_add(&publ->cluster_list, &info->cluster_list); - info->cluster_list_size++; - } + list_add(&publ->all_publ, &info->all_publ); - if (in_own_node(net, node)) { - list_add(&publ->node_list, &info->node_list); - info->node_list_size++; - } + if (in_own_node(net, node)) + list_add(&publ->local_publ, &info->local_publ); /* Any subscriptions waiting for notification? */ list_for_each_entry_safe(s, st, &nseq->subscriptions, nameseq_list) { tipc_sub_report_overlap(s, publ->lower, publ->upper, - TIPC_PUBLISHED, publ->ref, + TIPC_PUBLISHED, publ->port, publ->node, publ->scope, created_subseq); } @@ -348,7 +327,7 @@ static struct publication *tipc_nameseq_insert_publ(struct net *net, static struct publication *tipc_nameseq_remove_publ(struct net *net, struct name_seq *nseq, u32 inst, u32 node, - u32 ref, u32 key) + u32 port, u32 key) { struct publication *publ; struct sub_seq *sseq = nameseq_find_subseq(nseq, inst); @@ -363,32 +342,20 @@ static struct publication *tipc_nameseq_remove_publ(struct net *net, info = sseq->info; /* Locate publication, if it exists */ - list_for_each_entry(publ, &info->zone_list, zone_list) { - if ((publ->key == key) && (publ->ref == ref) && - (!publ->node || (publ->node == node))) + list_for_each_entry(publ, &info->all_publ, all_publ) { + if (publ->key == key && publ->port == port && + (!publ->node || publ->node == node)) goto found; } return NULL; found: - /* Remove publication from zone scope list */ - list_del(&publ->zone_list); - info->zone_list_size--; - - /* Remove publication from cluster scope list, if present */ - if (in_own_cluster(net, node)) { - list_del(&publ->cluster_list); - info->cluster_list_size--; - } - - /* Remove publication from node scope list, if present */ - if (in_own_node(net, node)) { - list_del(&publ->node_list); - info->node_list_size--; - } + list_del(&publ->all_publ); + if (in_own_node(net, node)) + list_del(&publ->local_publ); /* Contract subseq list if no more publications for that subseq */ - if (list_empty(&info->zone_list)) { + if (list_empty(&info->all_publ)) { kfree(info); free = &nseq->sseqs[nseq->first_free--]; memmove(sseq, sseq + 1, (free - (sseq + 1)) * sizeof(*sseq)); @@ -398,8 +365,9 @@ found: /* Notify any waiting subscriptions */ list_for_each_entry_safe(s, st, &nseq->subscriptions, nameseq_list) { tipc_sub_report_overlap(s, publ->lower, publ->upper, - TIPC_WITHDRAWN, publ->ref, publ->node, - publ->scope, removed_subseq); + TIPC_WITHDRAWN, publ->port, + publ->node, publ->scope, + removed_subseq); } return publ; @@ -435,11 +403,12 @@ static void tipc_nameseq_subscribe(struct name_seq *nseq, struct name_info *info = sseq->info; int must_report = 1; - list_for_each_entry(crs, &info->zone_list, zone_list) { + list_for_each_entry(crs, &info->all_publ, all_publ) { tipc_sub_report_overlap(sub, sseq->lower, sseq->upper, TIPC_PUBLISHED, - crs->ref, crs->node, + crs->port, + crs->node, crs->scope, must_report); must_report = 0; @@ -473,8 +442,7 @@ struct publication *tipc_nametbl_insert_publ(struct net *net, u32 type, struct name_seq *seq = nametbl_find_seq(net, type); int index = hash(type); - if ((scope < TIPC_ZONE_SCOPE) || (scope > TIPC_NODE_SCOPE) || - (lower > upper)) { + if (scope > TIPC_NODE_SCOPE || lower > upper) { pr_debug("Failed to publish illegal {%u,%u,%u} with scope %u\n", type, lower, upper, scope); return NULL; @@ -493,7 +461,7 @@ struct publication *tipc_nametbl_insert_publ(struct net *net, u32 type, } struct publication *tipc_nametbl_remove_publ(struct net *net, u32 type, - u32 lower, u32 node, u32 ref, + u32 lower, u32 node, u32 port, u32 key) { struct publication *publ; @@ -503,7 +471,7 @@ struct publication *tipc_nametbl_remove_publ(struct net *net, u32 type, return NULL; spin_lock_bh(&seq->lock); - publ = tipc_nameseq_remove_publ(net, seq, lower, node, ref, key); + publ = tipc_nameseq_remove_publ(net, seq, lower, node, port, key); if (!seq->first_free && list_empty(&seq->subscriptions)) { hlist_del_init_rcu(&seq->ns_list); kfree(seq->sseqs); @@ -531,15 +499,17 @@ struct publication *tipc_nametbl_remove_publ(struct net *net, u32 type, u32 tipc_nametbl_translate(struct net *net, u32 type, u32 instance, u32 *destnode) { - struct tipc_net *tn = net_generic(net, tipc_net_id); + struct tipc_net *tn = tipc_net(net); + bool legacy = tn->legacy_addr_format; + u32 self = tipc_own_addr(net); struct sub_seq *sseq; struct name_info *info; struct publication *publ; struct name_seq *seq; - u32 ref = 0; + u32 port = 0; u32 node = 0; - if (!tipc_in_scope(*destnode, tn->own_addr)) + if (!tipc_in_scope(legacy, *destnode, self)) return 0; rcu_read_lock(); @@ -553,55 +523,43 @@ u32 tipc_nametbl_translate(struct net *net, u32 type, u32 instance, info = sseq->info; /* Closest-First Algorithm */ - if (likely(!*destnode)) { - if (!list_empty(&info->node_list)) { - publ = list_first_entry(&info->node_list, + if (legacy && !*destnode) { + if (!list_empty(&info->local_publ)) { + publ = list_first_entry(&info->local_publ, struct publication, - node_list); - list_move_tail(&publ->node_list, - &info->node_list); - } else if (!list_empty(&info->cluster_list)) { - publ = list_first_entry(&info->cluster_list, - struct publication, - cluster_list); - list_move_tail(&publ->cluster_list, - &info->cluster_list); + local_publ); + list_move_tail(&publ->local_publ, + &info->local_publ); } else { - publ = list_first_entry(&info->zone_list, + publ = list_first_entry(&info->all_publ, struct publication, - zone_list); - list_move_tail(&publ->zone_list, - &info->zone_list); + all_publ); + list_move_tail(&publ->all_publ, + &info->all_publ); } } /* Round-Robin Algorithm */ - else if (*destnode == tn->own_addr) { - if (list_empty(&info->node_list)) - goto no_match; - publ = list_first_entry(&info->node_list, struct publication, - node_list); - list_move_tail(&publ->node_list, &info->node_list); - } else if (in_own_cluster_exact(net, *destnode)) { - if (list_empty(&info->cluster_list)) + else if (*destnode == tipc_own_addr(net)) { + if (list_empty(&info->local_publ)) goto no_match; - publ = list_first_entry(&info->cluster_list, struct publication, - cluster_list); - list_move_tail(&publ->cluster_list, &info->cluster_list); + publ = list_first_entry(&info->local_publ, struct publication, + local_publ); + list_move_tail(&publ->local_publ, &info->local_publ); } else { - publ = list_first_entry(&info->zone_list, struct publication, - zone_list); - list_move_tail(&publ->zone_list, &info->zone_list); + publ = list_first_entry(&info->all_publ, struct publication, + all_publ); + list_move_tail(&publ->all_publ, &info->all_publ); } - ref = publ->ref; + port = publ->port; node = publ->node; no_match: spin_unlock_bh(&seq->lock); not_found: rcu_read_unlock(); *destnode = node; - return ref; + return port; } bool tipc_nametbl_lookup(struct net *net, u32 type, u32 instance, u32 scope, @@ -623,16 +581,16 @@ bool tipc_nametbl_lookup(struct net *net, u32 type, u32 instance, u32 scope, sseq = nameseq_find_subseq(seq, instance); if (likely(sseq)) { info = sseq->info; - list_for_each_entry(publ, &info->zone_list, zone_list) { + list_for_each_entry(publ, &info->all_publ, all_publ) { if (publ->scope != scope) continue; - if (publ->ref == exclude && publ->node == self) + if (publ->port == exclude && publ->node == self) continue; - tipc_dest_push(dsts, publ->node, publ->ref); + tipc_dest_push(dsts, publ->node, publ->port); (*dstcnt)++; if (all) continue; - list_move_tail(&publ->zone_list, &info->zone_list); + list_move_tail(&publ->all_publ, &info->all_publ); break; } } @@ -642,15 +600,14 @@ exit: return !list_empty(dsts); } -int tipc_nametbl_mc_lookup(struct net *net, u32 type, u32 lower, u32 upper, - u32 scope, bool exact, struct list_head *dports) +void tipc_nametbl_mc_lookup(struct net *net, u32 type, u32 lower, u32 upper, + u32 scope, bool exact, struct list_head *dports) { struct sub_seq *sseq_stop; struct name_info *info; struct publication *p; struct name_seq *seq; struct sub_seq *sseq; - int res = 0; rcu_read_lock(); seq = nametbl_find_seq(net, type); @@ -664,18 +621,14 @@ int tipc_nametbl_mc_lookup(struct net *net, u32 type, u32 lower, u32 upper, if (sseq->lower > upper) break; info = sseq->info; - list_for_each_entry(p, &info->node_list, node_list) { + list_for_each_entry(p, &info->local_publ, local_publ) { if (p->scope == scope || (!exact && p->scope < scope)) - tipc_dest_push(dports, 0, p->ref); + tipc_dest_push(dports, 0, p->port); } - - if (info->cluster_list_size != info->node_list_size) - res = 1; } spin_unlock_bh(&seq->lock); exit: rcu_read_unlock(); - return res; } /* tipc_nametbl_lookup_dst_nodes - find broadcast destination nodes @@ -700,7 +653,7 @@ void tipc_nametbl_lookup_dst_nodes(struct net *net, u32 type, u32 lower, stop = seq->sseqs + seq->first_free; for (; sseq != stop && sseq->lower <= upper; sseq++) { info = sseq->info; - list_for_each_entry(publ, &info->zone_list, zone_list) { + list_for_each_entry(publ, &info->all_publ, all_publ) { tipc_nlist_add(nodes, publ->node); } } @@ -729,10 +682,10 @@ void tipc_nametbl_build_group(struct net *net, struct tipc_group *grp, stop = seq->sseqs + seq->first_free; for (; sseq != stop; sseq++) { info = sseq->info; - list_for_each_entry(p, &info->zone_list, zone_list) { + list_for_each_entry(p, &info->all_publ, all_publ) { if (p->scope != scope) continue; - tipc_group_add_member(grp, p->node, p->ref, p->lower); + tipc_group_add_member(grp, p->node, p->port, p->lower); } } spin_unlock_bh(&seq->lock); @@ -760,7 +713,7 @@ struct publication *tipc_nametbl_publish(struct net *net, u32 type, u32 lower, } publ = tipc_nametbl_insert_publ(net, type, lower, upper, scope, - tn->own_addr, port_ref, key); + tipc_own_addr(net), port_ref, key); if (likely(publ)) { tn->nametbl->local_publ_count++; buf = tipc_named_publish(net, publ); @@ -777,7 +730,7 @@ struct publication *tipc_nametbl_publish(struct net *net, u32 type, u32 lower, /** * tipc_nametbl_withdraw - withdraw name publication from network name tables */ -int tipc_nametbl_withdraw(struct net *net, u32 type, u32 lower, u32 ref, +int tipc_nametbl_withdraw(struct net *net, u32 type, u32 lower, u32 port, u32 key) { struct publication *publ; @@ -785,19 +738,19 @@ int tipc_nametbl_withdraw(struct net *net, u32 type, u32 lower, u32 ref, struct tipc_net *tn = net_generic(net, tipc_net_id); spin_lock_bh(&tn->nametbl_lock); - publ = tipc_nametbl_remove_publ(net, type, lower, tn->own_addr, - ref, key); + publ = tipc_nametbl_remove_publ(net, type, lower, tipc_own_addr(net), + port, key); if (likely(publ)) { tn->nametbl->local_publ_count--; skb = tipc_named_withdraw(net, publ); /* Any pending external events? */ tipc_named_process_backlog(net); - list_del_init(&publ->pport_list); + list_del_init(&publ->binding_sock); kfree_rcu(publ, rcu); } else { pr_err("Unable to remove local publication\n" - "(type=%u, lower=%u, ref=%u, key=%u)\n", - type, lower, ref, key); + "(type=%u, lower=%u, port=%u, key=%u)\n", + type, lower, port, key); } spin_unlock_bh(&tn->nametbl_lock); @@ -879,9 +832,8 @@ int tipc_nametbl_init(struct net *net) for (i = 0; i < TIPC_NAMETBL_SIZE; i++) INIT_HLIST_HEAD(&tipc_nametbl->seq_hlist[i]); - INIT_LIST_HEAD(&tipc_nametbl->publ_list[TIPC_ZONE_SCOPE]); - INIT_LIST_HEAD(&tipc_nametbl->publ_list[TIPC_CLUSTER_SCOPE]); - INIT_LIST_HEAD(&tipc_nametbl->publ_list[TIPC_NODE_SCOPE]); + INIT_LIST_HEAD(&tipc_nametbl->node_scope); + INIT_LIST_HEAD(&tipc_nametbl->cluster_scope); tn->nametbl = tipc_nametbl; spin_lock_init(&tn->nametbl_lock); return 0; @@ -901,9 +853,9 @@ static void tipc_purge_publications(struct net *net, struct name_seq *seq) spin_lock_bh(&seq->lock); sseq = seq->sseqs; info = sseq->info; - list_for_each_entry_safe(publ, safe, &info->zone_list, zone_list) { + list_for_each_entry_safe(publ, safe, &info->all_publ, all_publ) { tipc_nameseq_remove_publ(net, seq, publ->lower, publ->node, - publ->ref, publ->key); + publ->port, publ->key); kfree_rcu(publ, rcu); } hlist_del_init_rcu(&seq->ns_list); @@ -950,17 +902,17 @@ static int __tipc_nl_add_nametable_publ(struct tipc_nl_msg *msg, struct publication *p; if (*last_publ) { - list_for_each_entry(p, &sseq->info->zone_list, zone_list) + list_for_each_entry(p, &sseq->info->all_publ, all_publ) if (p->key == *last_publ) break; if (p->key != *last_publ) return -EPIPE; } else { - p = list_first_entry(&sseq->info->zone_list, struct publication, - zone_list); + p = list_first_entry(&sseq->info->all_publ, struct publication, + all_publ); } - list_for_each_entry_from(p, &sseq->info->zone_list, zone_list) { + list_for_each_entry_from(p, &sseq->info->all_publ, all_publ) { *last_publ = p->key; hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, @@ -987,7 +939,7 @@ static int __tipc_nl_add_nametable_publ(struct tipc_nl_msg *msg, goto publ_msg_full; if (nla_put_u32(msg->skb, TIPC_NLA_PUBL_NODE, p->node)) goto publ_msg_full; - if (nla_put_u32(msg->skb, TIPC_NLA_PUBL_REF, p->ref)) + if (nla_put_u32(msg->skb, TIPC_NLA_PUBL_REF, p->port)) goto publ_msg_full; if (nla_put_u32(msg->skb, TIPC_NLA_PUBL_KEY, p->key)) goto publ_msg_full; diff --git a/net/tipc/name_table.h b/net/tipc/name_table.h index 17652602d5e2..34a4ccb907aa 100644 --- a/net/tipc/name_table.h +++ b/net/tipc/name_table.h @@ -1,7 +1,7 @@ /* * net/tipc/name_table.h: Include file for TIPC name table code * - * Copyright (c) 2000-2006, 2014-2015, Ericsson AB + * Copyright (c) 2000-2006, 2014-2018, Ericsson AB * Copyright (c) 2004-2005, 2010-2011, Wind River Systems * All rights reserved. * @@ -54,19 +54,22 @@ struct tipc_group; * @type: name sequence type * @lower: name sequence lower bound * @upper: name sequence upper bound - * @scope: scope of publication - * @node: network address of publishing port's node - * @ref: publishing port - * @key: publication key - * @nodesub_list: subscription to "node down" event (off-node publication only) - * @local_list: adjacent entries in list of publications made by this node - * @pport_list: adjacent entries in list of publications made by this port - * @node_list: adjacent matching name seq publications with >= node scope - * @cluster_list: adjacent matching name seq publications with >= cluster scope - * @zone_list: adjacent matching name seq publications with >= zone scope + * @scope: scope of publication, TIPC_NODE_SCOPE or TIPC_CLUSTER_SCOPE + * @node: network address of publishing socket's node + * @port: publishing port + * @key: publication key, unique across the cluster + * @binding_node: all publications from the same node which bound this one + * - Remote publications: in node->publ_list + * Used by node/name distr to withdraw publications when node is lost + * - Local/node scope publications: in name_table->node_scope list + * - Local/cluster scope publications: in name_table->cluster_scope list + * @binding_sock: all publications from the same socket which bound this one + * Used by socket to withdraw publications when socket is unbound/released + * @local_publ: list of identical publications made from this node + * Used by closest_first and multicast receive lookup algorithms + * @all_publ: all publications identical to this one, whatever node and scope + * Used by round-robin lookup algorithm * @rcu: RCU callback head used for deferred freeing - * - * Note that the node list, cluster list, and zone list are circular lists. */ struct publication { u32 type; @@ -74,34 +77,37 @@ struct publication { u32 upper; u32 scope; u32 node; - u32 ref; + u32 port; u32 key; - struct list_head nodesub_list; - struct list_head local_list; - struct list_head pport_list; - struct list_head node_list; - struct list_head cluster_list; - struct list_head zone_list; + struct list_head binding_node; + struct list_head binding_sock; + struct list_head local_publ; + struct list_head all_publ; struct rcu_head rcu; }; /** * struct name_table - table containing all existing port name publications * @seq_hlist: name sequence hash lists - * @publ_list: pulication lists + * @node_scope: all local publications with node scope + * - used by name_distr during re-init of name table + * @cluster_scope: all local publications with cluster scope + * - used by name_distr to send bulk updates to new nodes + * - used by name_distr during re-init of name table * @local_publ_count: number of publications issued by this node */ struct name_table { struct hlist_head seq_hlist[TIPC_NAMETBL_SIZE]; - struct list_head publ_list[TIPC_PUBL_SCOPE_NUM]; + struct list_head node_scope; + struct list_head cluster_scope; u32 local_publ_count; }; int tipc_nl_name_table_dump(struct sk_buff *skb, struct netlink_callback *cb); u32 tipc_nametbl_translate(struct net *net, u32 type, u32 instance, u32 *node); -int tipc_nametbl_mc_lookup(struct net *net, u32 type, u32 lower, u32 upper, - u32 scope, bool exact, struct list_head *dports); +void tipc_nametbl_mc_lookup(struct net *net, u32 type, u32 lower, u32 upper, + u32 scope, bool exact, struct list_head *dports); void tipc_nametbl_build_group(struct net *net, struct tipc_group *grp, u32 type, u32 domain); void tipc_nametbl_lookup_dst_nodes(struct net *net, u32 type, u32 lower, diff --git a/net/tipc/net.c b/net/tipc/net.c index 1a2fde0d6f61..29538dc00857 100644 --- a/net/tipc/net.c +++ b/net/tipc/net.c @@ -104,38 +104,39 @@ * - A local spin_lock protecting the queue of subscriber events. */ -int tipc_net_start(struct net *net, u32 addr) +int tipc_net_init(struct net *net, u8 *node_id, u32 addr) { - struct tipc_net *tn = net_generic(net, tipc_net_id); - char addr_string[16]; + if (tipc_own_id(net)) { + pr_info("Cannot configure node identity twice\n"); + return -1; + } + pr_info("Started in network mode\n"); - tn->own_addr = addr; + if (node_id) + tipc_set_node_id(net, node_id); + if (addr) + tipc_net_finalize(net, addr); + return 0; +} - /* Ensure that the new address is visible before we reinit. */ +void tipc_net_finalize(struct net *net, u32 addr) +{ + tipc_set_node_addr(net, addr); smp_mb(); - tipc_named_reinit(net); tipc_sk_reinit(net); - - tipc_nametbl_publish(net, TIPC_CFG_SRV, tn->own_addr, tn->own_addr, - TIPC_ZONE_SCOPE, 0, tn->own_addr); - - pr_info("Started in network mode\n"); - pr_info("Own node address %s, network identity %u\n", - tipc_addr_string_fill(addr_string, tn->own_addr), - tn->net_id); - return 0; + tipc_nametbl_publish(net, TIPC_CFG_SRV, addr, addr, + TIPC_CLUSTER_SCOPE, 0, addr); } void tipc_net_stop(struct net *net) { - struct tipc_net *tn = net_generic(net, tipc_net_id); + u32 self = tipc_own_addr(net); - if (!tn->own_addr) + if (!self) return; - tipc_nametbl_withdraw(net, TIPC_CFG_SRV, tn->own_addr, 0, - tn->own_addr); + tipc_nametbl_withdraw(net, TIPC_CFG_SRV, self, 0, self); rtnl_lock(); tipc_bearer_stop(net); tipc_node_stop(net); @@ -147,8 +148,10 @@ void tipc_net_stop(struct net *net) static int __tipc_nl_add_net(struct net *net, struct tipc_nl_msg *msg) { struct tipc_net *tn = net_generic(net, tipc_net_id); - void *hdr; + u64 *w0 = (u64 *)&tn->node_id[0]; + u64 *w1 = (u64 *)&tn->node_id[8]; struct nlattr *attrs; + void *hdr; hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family, NLM_F_MULTI, TIPC_NL_NET_GET); @@ -161,7 +164,10 @@ static int __tipc_nl_add_net(struct net *net, struct tipc_nl_msg *msg) if (nla_put_u32(msg->skb, TIPC_NLA_NET_ID, tn->net_id)) goto attr_msg_full; - + if (nla_put_u64_64bit(msg->skb, TIPC_NLA_NET_NODEID, *w0, 0)) + goto attr_msg_full; + if (nla_put_u64_64bit(msg->skb, TIPC_NLA_NET_NODEID_W1, *w1, 0)) + goto attr_msg_full; nla_nest_end(msg->skb, attrs); genlmsg_end(msg->skb, hdr); @@ -202,9 +208,9 @@ out: int __tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info) { - struct net *net = sock_net(skb->sk); - struct tipc_net *tn = net_generic(net, tipc_net_id); struct nlattr *attrs[TIPC_NLA_NET_MAX + 1]; + struct net *net = sock_net(skb->sk); + struct tipc_net *tn = tipc_net(net); int err; if (!info->attrs[TIPC_NLA_NET]) @@ -213,16 +219,17 @@ int __tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info) err = nla_parse_nested(attrs, TIPC_NLA_NET_MAX, info->attrs[TIPC_NLA_NET], tipc_nl_net_policy, info->extack); + if (err) return err; + /* Can't change net id once TIPC has joined a network */ + if (tipc_own_addr(net)) + return -EPERM; + if (attrs[TIPC_NLA_NET_ID]) { u32 val; - /* Can't change net id once TIPC has joined a network */ - if (tn->own_addr) - return -EPERM; - val = nla_get_u32(attrs[TIPC_NLA_NET_ID]); if (val < 1 || val > 9999) return -EINVAL; @@ -233,17 +240,22 @@ int __tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info) if (attrs[TIPC_NLA_NET_ADDR]) { u32 addr; - /* Can't change net addr once TIPC has joined a network */ - if (tn->own_addr) - return -EPERM; - addr = nla_get_u32(attrs[TIPC_NLA_NET_ADDR]); - if (!tipc_addr_node_valid(addr)) + if (!addr) return -EINVAL; - - tipc_net_start(net, addr); + tn->legacy_addr_format = true; + tipc_net_init(net, NULL, addr); } + if (attrs[TIPC_NLA_NET_NODEID]) { + u8 node_id[NODE_ID_LEN]; + u64 *w0 = (u64 *)&node_id[0]; + u64 *w1 = (u64 *)&node_id[8]; + + *w0 = nla_get_u64(attrs[TIPC_NLA_NET_NODEID]); + *w1 = nla_get_u64(attrs[TIPC_NLA_NET_NODEID_W1]); + tipc_net_init(net, node_id, 0); + } return 0; } diff --git a/net/tipc/net.h b/net/tipc/net.h index c0306aa2374b..09ad02b50bb1 100644 --- a/net/tipc/net.h +++ b/net/tipc/net.h @@ -41,10 +41,9 @@ extern const struct nla_policy tipc_nl_net_policy[]; -int tipc_net_start(struct net *net, u32 addr); - +int tipc_net_init(struct net *net, u8 *node_id, u32 addr); +void tipc_net_finalize(struct net *net, u32 addr); void tipc_net_stop(struct net *net); - int tipc_nl_net_dump(struct sk_buff *skb, struct netlink_callback *cb); int tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info); int __tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info); diff --git a/net/tipc/node.c b/net/tipc/node.c index 389193d7cf67..4fb4327311bb 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -115,6 +115,7 @@ struct tipc_node { u16 capabilities; u32 signature; u32 link_id; + u8 peer_id[16]; struct list_head publ_list; struct list_head conn_sks; unsigned long keepalive_intv; @@ -156,6 +157,7 @@ static void tipc_node_delete(struct tipc_node *node); static void tipc_node_timeout(struct timer_list *t); static void tipc_node_fsm_evt(struct tipc_node *n, int evt); static struct tipc_node *tipc_node_find(struct net *net, u32 addr); +static struct tipc_node *tipc_node_find_by_id(struct net *net, u8 *id); static void tipc_node_put(struct tipc_node *node); static bool node_is_up(struct tipc_node *n); @@ -233,9 +235,6 @@ static struct tipc_node *tipc_node_find(struct net *net, u32 addr) struct tipc_node *node; unsigned int thash = tipc_hashfn(addr); - if (unlikely(!in_own_cluster_exact(net, addr))) - return NULL; - rcu_read_lock(); hlist_for_each_entry_rcu(node, &tn->node_htable[thash], hash) { if (node->addr != addr) @@ -248,6 +247,30 @@ static struct tipc_node *tipc_node_find(struct net *net, u32 addr) return node; } +/* tipc_node_find_by_id - locate specified node object by its 128-bit id + * Note: this function is called only when a discovery request failed + * to find the node by its 32-bit id, and is not time critical + */ +static struct tipc_node *tipc_node_find_by_id(struct net *net, u8 *id) +{ + struct tipc_net *tn = tipc_net(net); + struct tipc_node *n; + bool found = false; + + rcu_read_lock(); + list_for_each_entry_rcu(n, &tn->node_list, list) { + read_lock_bh(&n->lock); + if (!memcmp(id, n->peer_id, 16) && + kref_get_unless_zero(&n->kref)) + found = true; + read_unlock_bh(&n->lock); + if (found) + break; + } + rcu_read_unlock(); + return found ? n : NULL; +} + static void tipc_node_read_lock(struct tipc_node *n) { read_lock_bh(&n->lock); @@ -310,7 +333,8 @@ static void tipc_node_write_unlock(struct tipc_node *n) } } -struct tipc_node *tipc_node_create(struct net *net, u32 addr, u16 capabilities) +static struct tipc_node *tipc_node_create(struct net *net, u32 addr, + u8 *peer_id, u16 capabilities) { struct tipc_net *tn = net_generic(net, tipc_net_id); struct tipc_node *n, *temp_node; @@ -329,6 +353,7 @@ struct tipc_node *tipc_node_create(struct net *net, u32 addr, u16 capabilities) goto exit; } n->addr = addr; + memcpy(&n->peer_id, peer_id, 16); n->net = net; n->capabilities = capabilities; kref_init(&n->kref); @@ -347,8 +372,8 @@ struct tipc_node *tipc_node_create(struct net *net, u32 addr, u16 capabilities) n->signature = INVALID_NODE_SIG; n->active_links[0] = INVALID_BEARER_ID; n->active_links[1] = INVALID_BEARER_ID; - if (!tipc_link_bc_create(net, tipc_own_addr(net), n->addr, - U16_MAX, + if (!tipc_link_bc_create(net, tipc_own_addr(net), + addr, U16_MAX, tipc_link_window(tipc_bc_sndlink(net)), n->capabilities, &n->bc_entry.inputq1, @@ -738,8 +763,51 @@ bool tipc_node_is_up(struct net *net, u32 addr) return retval; } -void tipc_node_check_dest(struct net *net, u32 onode, - struct tipc_bearer *b, +static u32 tipc_node_suggest_addr(struct net *net, u32 addr) +{ + struct tipc_node *n; + + addr ^= tipc_net(net)->random; + while ((n = tipc_node_find(net, addr))) { + tipc_node_put(n); + addr++; + } + return addr; +} + +/* tipc_node_try_addr(): Check if addr can be used by peer, suggest other if not + */ +u32 tipc_node_try_addr(struct net *net, u8 *id, u32 addr) +{ + struct tipc_net *tn = tipc_net(net); + struct tipc_node *n; + + /* Suggest new address if some other peer is using this one */ + n = tipc_node_find(net, addr); + if (n) { + if (!memcmp(n->peer_id, id, NODE_ID_LEN)) + addr = 0; + tipc_node_put(n); + if (!addr) + return 0; + return tipc_node_suggest_addr(net, addr); + } + + /* Suggest previously used address if peer is known */ + n = tipc_node_find_by_id(net, id); + if (n) { + addr = n->addr; + tipc_node_put(n); + } + /* Even this node may be in trial phase */ + if (tn->trial_addr == addr) + return tipc_node_suggest_addr(net, addr); + + return addr; +} + +void tipc_node_check_dest(struct net *net, u32 addr, + u8 *peer_id, struct tipc_bearer *b, u16 capabilities, u32 signature, struct tipc_media_addr *maddr, bool *respond, bool *dupl_addr) @@ -758,7 +826,7 @@ void tipc_node_check_dest(struct net *net, u32 onode, *dupl_addr = false; *respond = false; - n = tipc_node_create(net, onode, capabilities); + n = tipc_node_create(net, addr, peer_id, capabilities); if (!n) return; @@ -836,15 +904,14 @@ void tipc_node_check_dest(struct net *net, u32 onode, /* Now create new link if not already existing */ if (!l) { - if (n->link_cnt == 2) { - pr_warn("Cannot establish 3rd link to %x\n", n->addr); + if (n->link_cnt == 2) goto exit; - } + if_name = strchr(b->name, ':') + 1; if (!tipc_link_create(net, if_name, b->identity, b->tolerance, b->net_plane, b->mtu, b->priority, b->window, mod(tipc_net(net)->random), - tipc_own_addr(net), onode, + tipc_own_addr(net), addr, peer_id, n->capabilities, tipc_bc_sndlink(n->net), n->bc_entry.link, &le->inputq, @@ -887,11 +954,9 @@ void tipc_node_delete_links(struct net *net, int bearer_id) static void tipc_node_reset_links(struct tipc_node *n) { - char addr_string[16]; int i; - pr_warn("Resetting all links to %s\n", - tipc_addr_string_fill(addr_string, n->addr)); + pr_warn("Resetting all links to %x\n", n->addr); for (i = 0; i < MAX_BEARERS; i++) { tipc_node_link_down(n, i, false); @@ -1078,15 +1143,13 @@ illegal_evt: static void node_lost_contact(struct tipc_node *n, struct sk_buff_head *inputq) { - char addr_string[16]; struct tipc_sock_conn *conn, *safe; struct tipc_link *l; struct list_head *conns = &n->conn_sks; struct sk_buff *skb; uint i; - pr_debug("Lost contact with %s\n", - tipc_addr_string_fill(addr_string, n->addr)); + pr_debug("Lost contact with %x\n", n->addr); /* Clean up broadcast state */ tipc_bcast_remove_peer(n->net, n->bc_entry.link); diff --git a/net/tipc/node.h b/net/tipc/node.h index 4ce5e3a185c0..f24b83500df1 100644 --- a/net/tipc/node.h +++ b/net/tipc/node.h @@ -49,17 +49,19 @@ enum { TIPC_BCAST_STATE_NACK = (1 << 2), TIPC_BLOCK_FLOWCTL = (1 << 3), TIPC_BCAST_RCAST = (1 << 4), - TIPC_MCAST_GROUPS = (1 << 5) + TIPC_NODE_ID128 = (1 << 5) }; #define TIPC_NODE_CAPABILITIES (TIPC_BCAST_SYNCH | \ TIPC_BCAST_STATE_NACK | \ TIPC_BCAST_RCAST | \ - TIPC_BLOCK_FLOWCTL) + TIPC_BLOCK_FLOWCTL | \ + TIPC_NODE_ID128) #define INVALID_BEARER_ID -1 void tipc_node_stop(struct net *net); -void tipc_node_check_dest(struct net *net, u32 onode, +u32 tipc_node_try_addr(struct net *net, u8 *id, u32 addr); +void tipc_node_check_dest(struct net *net, u32 onode, u8 *peer_id128, struct tipc_bearer *bearer, u16 capabilities, u32 signature, struct tipc_media_addr *maddr, diff --git a/net/tipc/socket.c b/net/tipc/socket.c index f93477187a90..275b666f6231 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -289,10 +289,9 @@ static bool tipc_sk_type_connectionless(struct sock *sk) static bool tsk_peer_msg(struct tipc_sock *tsk, struct tipc_msg *msg) { struct sock *sk = &tsk->sk; - struct tipc_net *tn = net_generic(sock_net(sk), tipc_net_id); + u32 self = tipc_own_addr(sock_net(sk)); u32 peer_port = tsk_peer_port(tsk); - u32 orig_node; - u32 peer_node; + u32 orig_node, peer_node; if (unlikely(!tipc_sk_connected(sk))) return false; @@ -306,10 +305,10 @@ static bool tsk_peer_msg(struct tipc_sock *tsk, struct tipc_msg *msg) if (likely(orig_node == peer_node)) return true; - if (!orig_node && (peer_node == tn->own_addr)) + if (!orig_node && peer_node == self) return true; - if (!peer_node && (orig_node == tn->own_addr)) + if (!peer_node && orig_node == self) return true; return false; @@ -461,8 +460,8 @@ static int tipc_sk_create(struct net *net, struct socket *sock, /* Ensure tsk is visible before we read own_addr. */ smp_mb(); - tipc_msg_init(tn->own_addr, msg, TIPC_LOW_IMPORTANCE, TIPC_NAMED_MSG, - NAMED_H_SIZE, 0); + tipc_msg_init(tipc_own_addr(net), msg, TIPC_LOW_IMPORTANCE, + TIPC_NAMED_MSG, NAMED_H_SIZE, 0); msg_set_origport(msg, tsk->portid); timer_setup(&sk->sk_timer, tipc_sk_timeout, 0); @@ -473,6 +472,7 @@ static int tipc_sk_create(struct net *net, struct socket *sock, sk->sk_write_space = tipc_write_space; sk->sk_destruct = tipc_sock_destruct; tsk->conn_timeout = CONN_TIMEOUT_DEFAULT; + tsk->group_is_open = true; atomic_set(&tsk->dupl_rcvcnt, 0); /* Start out with safe limits until we receive an advertised window */ @@ -643,7 +643,7 @@ static int tipc_bind(struct socket *sock, struct sockaddr *uaddr, goto exit; } - res = (addr->scope > 0) ? + res = (addr->scope >= 0) ? tipc_sk_publish(tsk, addr->scope, &addr->addr.nameseq) : tipc_sk_withdraw(tsk, -addr->scope, &addr->addr.nameseq); exit: @@ -670,7 +670,6 @@ static int tipc_getname(struct socket *sock, struct sockaddr *uaddr, struct sockaddr_tipc *addr = (struct sockaddr_tipc *)uaddr; struct sock *sk = sock->sk; struct tipc_sock *tsk = tipc_sk(sk); - struct tipc_net *tn = net_generic(sock_net(sock->sk), tipc_net_id); memset(addr, 0, sizeof(*addr)); if (peer) { @@ -681,7 +680,7 @@ static int tipc_getname(struct socket *sock, struct sockaddr *uaddr, addr->addr.id.node = tsk_peer_node(tsk); } else { addr->addr.id.ref = tsk->portid; - addr->addr.id.node = tn->own_addr; + addr->addr.id.node = tipc_own_addr(sock_net(sk)); } addr->addrtype = TIPC_ADDR_ID; @@ -1279,8 +1278,8 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen) struct tipc_msg *hdr = &tsk->phdr; struct tipc_name_seq *seq; struct sk_buff_head pkts; - u32 type, inst, domain; u32 dnode, dport; + u32 type, inst; int mtu, rc; if (unlikely(dlen > TIPC_MAX_USER_MSG_SIZE)) @@ -1331,13 +1330,12 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen) if (dest->addrtype == TIPC_ADDR_NAME) { type = dest->addr.name.name.type; inst = dest->addr.name.name.instance; - domain = dest->addr.name.domain; - dnode = domain; + dnode = dest->addr.name.domain; msg_set_type(hdr, TIPC_NAMED_MSG); msg_set_hdr_sz(hdr, NAMED_H_SIZE); msg_set_nametype(hdr, type); msg_set_nameinst(hdr, inst); - msg_set_lookup_scope(hdr, tipc_addr_scope(domain)); + msg_set_lookup_scope(hdr, tipc_node2scope(dnode)); dport = tipc_nametbl_translate(net, type, inst, &dnode); msg_set_destnode(hdr, dnode); msg_set_destport(hdr, dport); @@ -2122,8 +2120,10 @@ static void tipc_sk_filter_rcv(struct sock *sk, struct sk_buff *skb, (!sk_conn && msg_connected(hdr)) || (!grp && msg_in_group(hdr))) err = TIPC_ERR_NO_PORT; - else if (sk_rmem_alloc_get(sk) + skb->truesize >= limit) + else if (sk_rmem_alloc_get(sk) + skb->truesize >= limit) { + atomic_inc(&sk->sk_drops); err = TIPC_ERR_OVERLOAD; + } if (unlikely(err)) { tipc_skb_reject(net, err, skb, xmitq); @@ -2202,6 +2202,7 @@ static void tipc_sk_enqueue(struct sk_buff_head *inputq, struct sock *sk, /* Overload => reject message back to sender */ onode = tipc_own_addr(sock_net(sk)); + atomic_inc(&sk->sk_drops); if (tipc_msg_reverse(onode, &skb, TIPC_ERR_OVERLOAD)) __skb_queue_tail(xmitq, skb); break; @@ -2591,6 +2592,9 @@ static int tipc_sk_publish(struct tipc_sock *tsk, uint scope, struct publication *publ; u32 key; + if (scope != TIPC_NODE_SCOPE) + scope = TIPC_CLUSTER_SCOPE; + if (tipc_sk_connected(sk)) return -EINVAL; key = tsk->portid + tsk->pub_count + 1; @@ -2602,7 +2606,7 @@ static int tipc_sk_publish(struct tipc_sock *tsk, uint scope, if (unlikely(!publ)) return -EINVAL; - list_add(&publ->pport_list, &tsk->publications); + list_add(&publ->binding_sock, &tsk->publications); tsk->pub_count++; tsk->published = 1; return 0; @@ -2616,7 +2620,10 @@ static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope, struct publication *safe; int rc = -EINVAL; - list_for_each_entry_safe(publ, safe, &tsk->publications, pport_list) { + if (scope != TIPC_NODE_SCOPE) + scope = TIPC_CLUSTER_SCOPE; + + list_for_each_entry_safe(publ, safe, &tsk->publications, binding_sock) { if (seq) { if (publ->scope != scope) continue; @@ -2627,12 +2634,12 @@ static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope, if (publ->upper != seq->upper) break; tipc_nametbl_withdraw(net, publ->type, publ->lower, - publ->ref, publ->key); + publ->port, publ->key); rc = 0; break; } tipc_nametbl_withdraw(net, publ->type, publ->lower, - publ->ref, publ->key); + publ->port, publ->key); rc = 0; } if (list_empty(&tsk->publications)) @@ -2658,8 +2665,8 @@ void tipc_sk_reinit(struct net *net) while ((tsk = rhashtable_walk_next(&iter)) && !IS_ERR(tsk)) { spin_lock_bh(&tsk->sk.sk_lock.slock); msg = &tsk->phdr; - msg_set_prevnode(msg, tn->own_addr); - msg_set_orignode(msg, tn->own_addr); + msg_set_prevnode(msg, tipc_own_addr(net)); + msg_set_orignode(msg, tipc_own_addr(net)); spin_unlock_bh(&tsk->sk.sk_lock.slock); } @@ -3154,16 +3161,32 @@ msg_full: return -EMSGSIZE; } +static int __tipc_nl_add_sk_info(struct sk_buff *skb, struct tipc_sock + *tsk) +{ + struct net *net = sock_net(skb->sk); + struct sock *sk = &tsk->sk; + + if (nla_put_u32(skb, TIPC_NLA_SOCK_REF, tsk->portid) || + nla_put_u32(skb, TIPC_NLA_SOCK_ADDR, tipc_own_addr(net))) + return -EMSGSIZE; + + if (tipc_sk_connected(sk)) { + if (__tipc_nl_add_sk_con(skb, tsk)) + return -EMSGSIZE; + } else if (!list_empty(&tsk->publications)) { + if (nla_put_flag(skb, TIPC_NLA_SOCK_HAS_PUBL)) + return -EMSGSIZE; + } + return 0; +} + /* Caller should hold socket lock for the passed tipc socket. */ static int __tipc_nl_add_sk(struct sk_buff *skb, struct netlink_callback *cb, struct tipc_sock *tsk) { - int err; - void *hdr; struct nlattr *attrs; - struct net *net = sock_net(skb->sk); - struct tipc_net *tn = net_generic(net, tipc_net_id); - struct sock *sk = &tsk->sk; + void *hdr; hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, &tipc_genl_family, NLM_F_MULTI, TIPC_NL_SOCK_GET); @@ -3173,19 +3196,10 @@ static int __tipc_nl_add_sk(struct sk_buff *skb, struct netlink_callback *cb, attrs = nla_nest_start(skb, TIPC_NLA_SOCK); if (!attrs) goto genlmsg_cancel; - if (nla_put_u32(skb, TIPC_NLA_SOCK_REF, tsk->portid)) - goto attr_msg_cancel; - if (nla_put_u32(skb, TIPC_NLA_SOCK_ADDR, tn->own_addr)) + + if (__tipc_nl_add_sk_info(skb, tsk)) goto attr_msg_cancel; - if (tipc_sk_connected(sk)) { - err = __tipc_nl_add_sk_con(skb, tsk); - if (err) - goto attr_msg_cancel; - } else if (!list_empty(&tsk->publications)) { - if (nla_put_flag(skb, TIPC_NLA_SOCK_HAS_PUBL)) - goto attr_msg_cancel; - } nla_nest_end(skb, attrs); genlmsg_end(skb, hdr); @@ -3199,16 +3213,19 @@ msg_cancel: return -EMSGSIZE; } -int tipc_nl_sk_dump(struct sk_buff *skb, struct netlink_callback *cb) +int tipc_nl_sk_walk(struct sk_buff *skb, struct netlink_callback *cb, + int (*skb_handler)(struct sk_buff *skb, + struct netlink_callback *cb, + struct tipc_sock *tsk)) { - int err; - struct tipc_sock *tsk; - const struct bucket_table *tbl; - struct rhash_head *pos; struct net *net = sock_net(skb->sk); - struct tipc_net *tn = net_generic(net, tipc_net_id); - u32 tbl_id = cb->args[0]; + struct tipc_net *tn = tipc_net(net); + const struct bucket_table *tbl; u32 prev_portid = cb->args[1]; + u32 tbl_id = cb->args[0]; + struct rhash_head *pos; + struct tipc_sock *tsk; + int err; rcu_read_lock(); tbl = rht_dereference_rcu((&tn->sk_rht)->tbl, &tn->sk_rht); @@ -3220,12 +3237,13 @@ int tipc_nl_sk_dump(struct sk_buff *skb, struct netlink_callback *cb) continue; } - err = __tipc_nl_add_sk(skb, cb, tsk); + err = skb_handler(skb, cb, tsk); if (err) { prev_portid = tsk->portid; spin_unlock_bh(&tsk->sk.sk_lock.slock); goto out; } + prev_portid = 0; spin_unlock_bh(&tsk->sk.sk_lock.slock); } @@ -3237,6 +3255,75 @@ out: return skb->len; } +EXPORT_SYMBOL(tipc_nl_sk_walk); + +int tipc_sk_fill_sock_diag(struct sk_buff *skb, struct tipc_sock *tsk, + u32 sk_filter_state, + u64 (*tipc_diag_gen_cookie)(struct sock *sk)) +{ + struct sock *sk = &tsk->sk; + struct nlattr *attrs; + struct nlattr *stat; + + /*filter response w.r.t sk_state*/ + if (!(sk_filter_state & (1 << sk->sk_state))) + return 0; + + attrs = nla_nest_start(skb, TIPC_NLA_SOCK); + if (!attrs) + goto msg_cancel; + + if (__tipc_nl_add_sk_info(skb, tsk)) + goto attr_msg_cancel; + + if (nla_put_u32(skb, TIPC_NLA_SOCK_TYPE, (u32)sk->sk_type) || + nla_put_u32(skb, TIPC_NLA_SOCK_TIPC_STATE, (u32)sk->sk_state) || + nla_put_u32(skb, TIPC_NLA_SOCK_INO, sock_i_ino(sk)) || + nla_put_u32(skb, TIPC_NLA_SOCK_UID, + from_kuid_munged(sk_user_ns(sk), sock_i_uid(sk))) || + nla_put_u64_64bit(skb, TIPC_NLA_SOCK_COOKIE, + tipc_diag_gen_cookie(sk), + TIPC_NLA_SOCK_PAD)) + goto attr_msg_cancel; + + stat = nla_nest_start(skb, TIPC_NLA_SOCK_STAT); + if (!stat) + goto attr_msg_cancel; + + if (nla_put_u32(skb, TIPC_NLA_SOCK_STAT_RCVQ, + skb_queue_len(&sk->sk_receive_queue)) || + nla_put_u32(skb, TIPC_NLA_SOCK_STAT_SENDQ, + skb_queue_len(&sk->sk_write_queue)) || + nla_put_u32(skb, TIPC_NLA_SOCK_STAT_DROP, + atomic_read(&sk->sk_drops))) + goto stat_msg_cancel; + + if (tsk->cong_link_cnt && + nla_put_flag(skb, TIPC_NLA_SOCK_STAT_LINK_CONG)) + goto stat_msg_cancel; + + if (tsk_conn_cong(tsk) && + nla_put_flag(skb, TIPC_NLA_SOCK_STAT_CONN_CONG)) + goto stat_msg_cancel; + + nla_nest_end(skb, stat); + nla_nest_end(skb, attrs); + + return 0; + +stat_msg_cancel: + nla_nest_cancel(skb, stat); +attr_msg_cancel: + nla_nest_cancel(skb, attrs); +msg_cancel: + return -EMSGSIZE; +} +EXPORT_SYMBOL(tipc_sk_fill_sock_diag); + +int tipc_nl_sk_dump(struct sk_buff *skb, struct netlink_callback *cb) +{ + return tipc_nl_sk_walk(skb, cb, __tipc_nl_add_sk); +} /* Caller should hold socket lock for the passed tipc socket. */ static int __tipc_nl_add_sk_publ(struct sk_buff *skb, @@ -3286,7 +3373,7 @@ static int __tipc_nl_list_sk_publ(struct sk_buff *skb, struct publication *p; if (*last_publ) { - list_for_each_entry(p, &tsk->publications, pport_list) { + list_for_each_entry(p, &tsk->publications, binding_sock) { if (p->key == *last_publ) break; } @@ -3303,10 +3390,10 @@ static int __tipc_nl_list_sk_publ(struct sk_buff *skb, } } else { p = list_first_entry(&tsk->publications, struct publication, - pport_list); + binding_sock); } - list_for_each_entry_from(p, &tsk->publications, pport_list) { + list_for_each_entry_from(p, &tsk->publications, binding_sock) { err = __tipc_nl_add_sk_publ(skb, cb, p); if (err) { *last_publ = p->key; diff --git a/net/tipc/socket.h b/net/tipc/socket.h index 06fb5944cf76..aae3fd4cd06c 100644 --- a/net/tipc/socket.h +++ b/net/tipc/socket.h @@ -49,6 +49,8 @@ #define RCVBUF_DEF (FLOWCTL_BLK_SZ * 1024 * 2) #define RCVBUF_MAX (FLOWCTL_BLK_SZ * 1024 * 16) +struct tipc_sock; + int tipc_socket_init(void); void tipc_socket_stop(void); void tipc_sk_rcv(struct net *net, struct sk_buff_head *inputq); @@ -59,5 +61,11 @@ int tipc_sk_rht_init(struct net *net); void tipc_sk_rht_destroy(struct net *net); int tipc_nl_sk_dump(struct sk_buff *skb, struct netlink_callback *cb); int tipc_nl_publ_dump(struct sk_buff *skb, struct netlink_callback *cb); - +int tipc_sk_fill_sock_diag(struct sk_buff *skb, struct tipc_sock *tsk, + u32 sk_filter_state, + u64 (*tipc_diag_gen_cookie)(struct sock *sk)); +int tipc_nl_sk_walk(struct sk_buff *skb, struct netlink_callback *cb, + int (*skb_handler)(struct sk_buff *skb, + struct netlink_callback *cb, + struct tipc_sock *tsk)); #endif diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c index 3deabcab4882..e7d91f5d5cae 100644 --- a/net/tipc/udp_media.c +++ b/net/tipc/udp_media.c @@ -47,6 +47,8 @@ #include <net/addrconf.h> #include <linux/tipc_netlink.h> #include "core.h" +#include "addr.h" +#include "net.h" #include "bearer.h" #include "netlink.h" #include "msg.h" @@ -647,6 +649,7 @@ static int tipc_udp_enable(struct net *net, struct tipc_bearer *b, struct udp_port_cfg udp_conf = {0}; struct udp_tunnel_sock_cfg tuncfg = {NULL}; struct nlattr *opts[TIPC_NLA_UDP_MAX + 1]; + u8 node_id[NODE_ID_LEN] = {0,}; ub = kzalloc(sizeof(*ub), GFP_ATOMIC); if (!ub) @@ -677,6 +680,17 @@ static int tipc_udp_enable(struct net *net, struct tipc_bearer *b, if (err) goto err; + /* Autoconfigure own node identity if needed */ + if (!tipc_own_id(net)) { + memcpy(node_id, local.ipv6.in6_u.u6_addr8, 16); + tipc_net_init(net, node_id, 0); + } + if (!tipc_own_id(net)) { + pr_warn("Failed to set node id, please configure manually\n"); + err = -EINVAL; + goto err; + } + b->bcast_addr.media_id = TIPC_MEDIA_TYPE_UDP; b->bcast_addr.broadcast = TIPC_BROADCAST_SUPPORT; rcu_assign_pointer(b->media_ptr, ub); diff --git a/net/tls/Kconfig b/net/tls/Kconfig index eb583038c67e..89b8745a986f 100644 --- a/net/tls/Kconfig +++ b/net/tls/Kconfig @@ -7,6 +7,7 @@ config TLS select CRYPTO select CRYPTO_AES select CRYPTO_GCM + select STREAM_PARSER default n ---help--- Enable kernel support for TLS protocol. This allows symmetric diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index e9b4b53ab53e..6f5c1146da4a 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -46,16 +46,29 @@ MODULE_DESCRIPTION("Transport Layer Security Support"); MODULE_LICENSE("Dual BSD/GPL"); enum { - TLS_BASE_TX, + TLSV4, + TLSV6, + TLS_NUM_PROTS, +}; + +enum { + TLS_BASE, TLS_SW_TX, + TLS_SW_RX, + TLS_SW_RXTX, TLS_NUM_CONFIG, }; -static struct proto tls_prots[TLS_NUM_CONFIG]; +static struct proto *saved_tcpv6_prot; +static DEFINE_MUTEX(tcpv6_prot_mutex); +static struct proto tls_prots[TLS_NUM_PROTS][TLS_NUM_CONFIG]; +static struct proto_ops tls_sw_proto_ops; static inline void update_sk_prot(struct sock *sk, struct tls_context *ctx) { - sk->sk_prot = &tls_prots[ctx->tx_conf]; + int ip_ver = sk->sk_family == AF_INET6 ? TLSV6 : TLSV4; + + sk->sk_prot = &tls_prots[ip_ver][ctx->conf]; } int wait_on_pending_writer(struct sock *sk, long *timeo) @@ -228,7 +241,7 @@ static void tls_sk_proto_close(struct sock *sk, long timeout) lock_sock(sk); sk_proto_close = ctx->sk_proto_close; - if (ctx->tx_conf == TLS_BASE_TX) { + if (ctx->conf == TLS_BASE) { kfree(ctx); goto skip_tx_cleanup; } @@ -249,11 +262,16 @@ static void tls_sk_proto_close(struct sock *sk, long timeout) } } - kfree(ctx->rec_seq); - kfree(ctx->iv); + kfree(ctx->tx.rec_seq); + kfree(ctx->tx.iv); + kfree(ctx->rx.rec_seq); + kfree(ctx->rx.iv); - if (ctx->tx_conf == TLS_SW_TX) - tls_sw_free_tx_resources(sk); + if (ctx->conf == TLS_SW_TX || + ctx->conf == TLS_SW_RX || + ctx->conf == TLS_SW_RXTX) { + tls_sw_free_resources(sk); + } skip_tx_cleanup: release_sock(sk); @@ -309,9 +327,9 @@ static int do_tls_getsockopt_tx(struct sock *sk, char __user *optval, } lock_sock(sk); memcpy(crypto_info_aes_gcm_128->iv, - ctx->iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE, + ctx->tx.iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE, TLS_CIPHER_AES_GCM_128_IV_SIZE); - memcpy(crypto_info_aes_gcm_128->rec_seq, ctx->rec_seq, + memcpy(crypto_info_aes_gcm_128->rec_seq, ctx->tx.rec_seq, TLS_CIPHER_AES_GCM_128_REC_SEQ_SIZE); release_sock(sk); if (copy_to_user(optval, @@ -355,20 +373,24 @@ static int tls_getsockopt(struct sock *sk, int level, int optname, return do_tls_getsockopt(sk, optname, optval, optlen); } -static int do_tls_setsockopt_tx(struct sock *sk, char __user *optval, - unsigned int optlen) +static int do_tls_setsockopt_conf(struct sock *sk, char __user *optval, + unsigned int optlen, int tx) { struct tls_crypto_info *crypto_info; struct tls_context *ctx = tls_get_ctx(sk); int rc = 0; - int tx_conf; + int conf; if (!optval || (optlen < sizeof(*crypto_info))) { rc = -EINVAL; goto out; } - crypto_info = &ctx->crypto_send; + if (tx) + crypto_info = &ctx->crypto_send; + else + crypto_info = &ctx->crypto_recv; + /* Currently we don't support set crypto info more than one time */ if (TLS_CRYPTO_INFO_READY(crypto_info)) { rc = -EBUSY; @@ -407,15 +429,31 @@ static int do_tls_setsockopt_tx(struct sock *sk, char __user *optval, } /* currently SW is default, we will have ethtool in future */ - rc = tls_set_sw_offload(sk, ctx); - tx_conf = TLS_SW_TX; + if (tx) { + rc = tls_set_sw_offload(sk, ctx, 1); + if (ctx->conf == TLS_SW_RX) + conf = TLS_SW_RXTX; + else + conf = TLS_SW_TX; + } else { + rc = tls_set_sw_offload(sk, ctx, 0); + if (ctx->conf == TLS_SW_TX) + conf = TLS_SW_RXTX; + else + conf = TLS_SW_RX; + } + if (rc) goto err_crypto_info; - ctx->tx_conf = tx_conf; + ctx->conf = conf; update_sk_prot(sk, ctx); - ctx->sk_write_space = sk->sk_write_space; - sk->sk_write_space = tls_write_space; + if (tx) { + ctx->sk_write_space = sk->sk_write_space; + sk->sk_write_space = tls_write_space; + } else { + sk->sk_socket->ops = &tls_sw_proto_ops; + } goto out; err_crypto_info: @@ -431,8 +469,10 @@ static int do_tls_setsockopt(struct sock *sk, int optname, switch (optname) { case TLS_TX: + case TLS_RX: lock_sock(sk); - rc = do_tls_setsockopt_tx(sk, optval, optlen); + rc = do_tls_setsockopt_conf(sk, optval, optlen, + optname == TLS_TX); release_sock(sk); break; default: @@ -453,8 +493,29 @@ static int tls_setsockopt(struct sock *sk, int level, int optname, return do_tls_setsockopt(sk, optname, optval, optlen); } +static void build_protos(struct proto *prot, struct proto *base) +{ + prot[TLS_BASE] = *base; + prot[TLS_BASE].setsockopt = tls_setsockopt; + prot[TLS_BASE].getsockopt = tls_getsockopt; + prot[TLS_BASE].close = tls_sk_proto_close; + + prot[TLS_SW_TX] = prot[TLS_BASE]; + prot[TLS_SW_TX].sendmsg = tls_sw_sendmsg; + prot[TLS_SW_TX].sendpage = tls_sw_sendpage; + + prot[TLS_SW_RX] = prot[TLS_BASE]; + prot[TLS_SW_RX].recvmsg = tls_sw_recvmsg; + prot[TLS_SW_RX].close = tls_sk_proto_close; + + prot[TLS_SW_RXTX] = prot[TLS_SW_TX]; + prot[TLS_SW_RXTX].recvmsg = tls_sw_recvmsg; + prot[TLS_SW_RXTX].close = tls_sk_proto_close; +} + static int tls_init(struct sock *sk) { + int ip_ver = sk->sk_family == AF_INET6 ? TLSV6 : TLSV4; struct inet_connection_sock *icsk = inet_csk(sk); struct tls_context *ctx; int rc = 0; @@ -479,7 +540,18 @@ static int tls_init(struct sock *sk) ctx->getsockopt = sk->sk_prot->getsockopt; ctx->sk_proto_close = sk->sk_prot->close; - ctx->tx_conf = TLS_BASE_TX; + /* Build IPv6 TLS whenever the address of tcpv6_prot changes */ + if (ip_ver == TLSV6 && + unlikely(sk->sk_prot != smp_load_acquire(&saved_tcpv6_prot))) { + mutex_lock(&tcpv6_prot_mutex); + if (likely(sk->sk_prot != saved_tcpv6_prot)) { + build_protos(tls_prots[TLSV6], sk->sk_prot); + smp_store_release(&saved_tcpv6_prot, sk->sk_prot); + } + mutex_unlock(&tcpv6_prot_mutex); + } + + ctx->conf = TLS_BASE; update_sk_prot(sk, ctx); out: return rc; @@ -493,21 +565,13 @@ static struct tcp_ulp_ops tcp_tls_ulp_ops __read_mostly = { .init = tls_init, }; -static void build_protos(struct proto *prot, struct proto *base) -{ - prot[TLS_BASE_TX] = *base; - prot[TLS_BASE_TX].setsockopt = tls_setsockopt; - prot[TLS_BASE_TX].getsockopt = tls_getsockopt; - prot[TLS_BASE_TX].close = tls_sk_proto_close; - - prot[TLS_SW_TX] = prot[TLS_BASE_TX]; - prot[TLS_SW_TX].sendmsg = tls_sw_sendmsg; - prot[TLS_SW_TX].sendpage = tls_sw_sendpage; -} - static int __init tls_register(void) { - build_protos(tls_prots, &tcp_prot); + build_protos(tls_prots[TLSV4], &tcp_prot); + + tls_sw_proto_ops = inet_stream_ops; + tls_sw_proto_ops.poll = tls_sw_poll; + tls_sw_proto_ops.splice_read = tls_sw_splice_read; tcp_register_ulp(&tcp_tls_ulp_ops); diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index f26376e954ae..4dc766b03f00 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -34,11 +34,60 @@ * SOFTWARE. */ +#include <linux/sched/signal.h> #include <linux/module.h> #include <crypto/aead.h> +#include <net/strparser.h> #include <net/tls.h> +static int tls_do_decryption(struct sock *sk, + struct scatterlist *sgin, + struct scatterlist *sgout, + char *iv_recv, + size_t data_len, + struct sk_buff *skb, + gfp_t flags) +{ + struct tls_context *tls_ctx = tls_get_ctx(sk); + struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx); + struct strp_msg *rxm = strp_msg(skb); + struct aead_request *aead_req; + + int ret; + unsigned int req_size = sizeof(struct aead_request) + + crypto_aead_reqsize(ctx->aead_recv); + + aead_req = kzalloc(req_size, flags); + if (!aead_req) + return -ENOMEM; + + aead_request_set_tfm(aead_req, ctx->aead_recv); + aead_request_set_ad(aead_req, TLS_AAD_SPACE_SIZE); + aead_request_set_crypt(aead_req, sgin, sgout, + data_len + tls_ctx->rx.tag_size, + (u8 *)iv_recv); + aead_request_set_callback(aead_req, CRYPTO_TFM_REQ_MAY_BACKLOG, + crypto_req_done, &ctx->async_wait); + + ret = crypto_wait_req(crypto_aead_decrypt(aead_req), &ctx->async_wait); + + if (ret < 0) + goto out; + + rxm->offset += tls_ctx->rx.prepend_size; + rxm->full_len -= tls_ctx->rx.overhead_size; + tls_advance_record_sn(sk, &tls_ctx->rx); + + ctx->decrypted = true; + + ctx->saved_data_ready(sk); + +out: + kfree(aead_req); + return ret; +} + static void trim_sg(struct sock *sk, struct scatterlist *sg, int *sg_num_elem, unsigned int *sg_size, int target_size) { @@ -79,7 +128,7 @@ static void trim_both_sgl(struct sock *sk, int target_size) target_size); if (target_size > 0) - target_size += tls_ctx->overhead_size; + target_size += tls_ctx->tx.overhead_size; trim_sg(sk, ctx->sg_encrypted_data, &ctx->sg_encrypted_num_elem, @@ -87,71 +136,16 @@ static void trim_both_sgl(struct sock *sk, int target_size) target_size); } -static int alloc_sg(struct sock *sk, int len, struct scatterlist *sg, - int *sg_num_elem, unsigned int *sg_size, - int first_coalesce) -{ - struct page_frag *pfrag; - unsigned int size = *sg_size; - int num_elem = *sg_num_elem, use = 0, rc = 0; - struct scatterlist *sge; - unsigned int orig_offset; - - len -= size; - pfrag = sk_page_frag(sk); - - while (len > 0) { - if (!sk_page_frag_refill(sk, pfrag)) { - rc = -ENOMEM; - goto out; - } - - use = min_t(int, len, pfrag->size - pfrag->offset); - - if (!sk_wmem_schedule(sk, use)) { - rc = -ENOMEM; - goto out; - } - - sk_mem_charge(sk, use); - size += use; - orig_offset = pfrag->offset; - pfrag->offset += use; - - sge = sg + num_elem - 1; - if (num_elem > first_coalesce && sg_page(sg) == pfrag->page && - sg->offset + sg->length == orig_offset) { - sg->length += use; - } else { - sge++; - sg_unmark_end(sge); - sg_set_page(sge, pfrag->page, use, orig_offset); - get_page(pfrag->page); - ++num_elem; - if (num_elem == MAX_SKB_FRAGS) { - rc = -ENOSPC; - break; - } - } - - len -= use; - } - goto out; - -out: - *sg_size = size; - *sg_num_elem = num_elem; - return rc; -} - static int alloc_encrypted_sg(struct sock *sk, int len) { struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx); int rc = 0; - rc = alloc_sg(sk, len, ctx->sg_encrypted_data, - &ctx->sg_encrypted_num_elem, &ctx->sg_encrypted_size, 0); + rc = sk_alloc_sg(sk, len, + ctx->sg_encrypted_data, 0, + &ctx->sg_encrypted_num_elem, + &ctx->sg_encrypted_size, 0); return rc; } @@ -162,9 +156,9 @@ static int alloc_plaintext_sg(struct sock *sk, int len) struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx); int rc = 0; - rc = alloc_sg(sk, len, ctx->sg_plaintext_data, - &ctx->sg_plaintext_num_elem, &ctx->sg_plaintext_size, - tls_ctx->pending_open_record_frags); + rc = sk_alloc_sg(sk, len, ctx->sg_plaintext_data, 0, + &ctx->sg_plaintext_num_elem, &ctx->sg_plaintext_size, + tls_ctx->pending_open_record_frags); return rc; } @@ -207,21 +201,21 @@ static int tls_do_encryption(struct tls_context *tls_ctx, if (!aead_req) return -ENOMEM; - ctx->sg_encrypted_data[0].offset += tls_ctx->prepend_size; - ctx->sg_encrypted_data[0].length -= tls_ctx->prepend_size; + ctx->sg_encrypted_data[0].offset += tls_ctx->tx.prepend_size; + ctx->sg_encrypted_data[0].length -= tls_ctx->tx.prepend_size; aead_request_set_tfm(aead_req, ctx->aead_send); aead_request_set_ad(aead_req, TLS_AAD_SPACE_SIZE); aead_request_set_crypt(aead_req, ctx->sg_aead_in, ctx->sg_aead_out, - data_len, tls_ctx->iv); + data_len, tls_ctx->tx.iv); aead_request_set_callback(aead_req, CRYPTO_TFM_REQ_MAY_BACKLOG, crypto_req_done, &ctx->async_wait); rc = crypto_wait_req(crypto_aead_encrypt(aead_req), &ctx->async_wait); - ctx->sg_encrypted_data[0].offset -= tls_ctx->prepend_size; - ctx->sg_encrypted_data[0].length += tls_ctx->prepend_size; + ctx->sg_encrypted_data[0].offset -= tls_ctx->tx.prepend_size; + ctx->sg_encrypted_data[0].length += tls_ctx->tx.prepend_size; kfree(aead_req); return rc; @@ -238,7 +232,7 @@ static int tls_push_record(struct sock *sk, int flags, sg_mark_end(ctx->sg_encrypted_data + ctx->sg_encrypted_num_elem - 1); tls_make_aad(ctx->aad_space, ctx->sg_plaintext_size, - tls_ctx->rec_seq, tls_ctx->rec_seq_size, + tls_ctx->tx.rec_seq, tls_ctx->tx.rec_seq_size, record_type); tls_fill_prepend(tls_ctx, @@ -269,9 +263,9 @@ static int tls_push_record(struct sock *sk, int flags, /* Only pass through MSG_DONTWAIT and MSG_NOSIGNAL flags */ rc = tls_push_sg(sk, tls_ctx, ctx->sg_encrypted_data, 0, flags); if (rc < 0 && rc != -EAGAIN) - tls_err_abort(sk); + tls_err_abort(sk, EBADMSG); - tls_advance_record_sn(sk, tls_ctx); + tls_advance_record_sn(sk, &tls_ctx->tx); return rc; } @@ -281,23 +275,24 @@ static int tls_sw_push_pending_record(struct sock *sk, int flags) } static int zerocopy_from_iter(struct sock *sk, struct iov_iter *from, - int length) + int length, int *pages_used, + unsigned int *size_used, + struct scatterlist *to, int to_max_pages, + bool charge) { - struct tls_context *tls_ctx = tls_get_ctx(sk); - struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx); struct page *pages[MAX_SKB_FRAGS]; size_t offset; ssize_t copied, use; int i = 0; - unsigned int size = ctx->sg_plaintext_size; - int num_elem = ctx->sg_plaintext_num_elem; + unsigned int size = *size_used; + int num_elem = *pages_used; int rc = 0; int maxpages; while (length > 0) { i = 0; - maxpages = ARRAY_SIZE(ctx->sg_plaintext_data) - num_elem; + maxpages = to_max_pages - num_elem; if (maxpages == 0) { rc = -EFAULT; goto out; @@ -317,10 +312,11 @@ static int zerocopy_from_iter(struct sock *sk, struct iov_iter *from, while (copied) { use = min_t(int, copied, PAGE_SIZE - offset); - sg_set_page(&ctx->sg_plaintext_data[num_elem], + sg_set_page(&to[num_elem], pages[i], use, offset); - sg_unmark_end(&ctx->sg_plaintext_data[num_elem]); - sk_mem_charge(sk, use); + sg_unmark_end(&to[num_elem]); + if (charge) + sk_mem_charge(sk, use); offset = 0; copied -= use; @@ -331,8 +327,9 @@ static int zerocopy_from_iter(struct sock *sk, struct iov_iter *from, } out: - ctx->sg_plaintext_size = size; - ctx->sg_plaintext_num_elem = num_elem; + *size_used = size; + *pages_used = num_elem; + return rc; } @@ -409,7 +406,7 @@ int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) } required_size = ctx->sg_plaintext_size + try_to_copy + - tls_ctx->overhead_size; + tls_ctx->tx.overhead_size; if (!sk_stream_memory_free(sk)) goto wait_for_sndbuf; @@ -429,7 +426,11 @@ alloc_encrypted: if (full_record || eor) { ret = zerocopy_from_iter(sk, &msg->msg_iter, - try_to_copy); + try_to_copy, &ctx->sg_plaintext_num_elem, + &ctx->sg_plaintext_size, + ctx->sg_plaintext_data, + ARRAY_SIZE(ctx->sg_plaintext_data), + true); if (ret) goto fallback_to_reg_send; @@ -468,7 +469,7 @@ alloc_plaintext: &ctx->sg_encrypted_num_elem, &ctx->sg_encrypted_size, ctx->sg_plaintext_size + - tls_ctx->overhead_size); + tls_ctx->tx.overhead_size); } ret = memcopy_from_iter(sk, &msg->msg_iter, try_to_copy); @@ -560,7 +561,7 @@ int tls_sw_sendpage(struct sock *sk, struct page *page, full_record = true; } required_size = ctx->sg_plaintext_size + copy + - tls_ctx->overhead_size; + tls_ctx->tx.overhead_size; if (!sk_stream_memory_free(sk)) goto wait_for_sndbuf; @@ -629,13 +630,404 @@ sendpage_end: return ret; } -void tls_sw_free_tx_resources(struct sock *sk) +static struct sk_buff *tls_wait_data(struct sock *sk, int flags, + long timeo, int *err) +{ + struct tls_context *tls_ctx = tls_get_ctx(sk); + struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx); + struct sk_buff *skb; + DEFINE_WAIT_FUNC(wait, woken_wake_function); + + while (!(skb = ctx->recv_pkt)) { + if (sk->sk_err) { + *err = sock_error(sk); + return NULL; + } + + if (sock_flag(sk, SOCK_DONE)) + return NULL; + + if ((flags & MSG_DONTWAIT) || !timeo) { + *err = -EAGAIN; + return NULL; + } + + add_wait_queue(sk_sleep(sk), &wait); + sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk); + sk_wait_event(sk, &timeo, ctx->recv_pkt != skb, &wait); + sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk); + remove_wait_queue(sk_sleep(sk), &wait); + + /* Handle signals */ + if (signal_pending(current)) { + *err = sock_intr_errno(timeo); + return NULL; + } + } + + return skb; +} + +static int decrypt_skb(struct sock *sk, struct sk_buff *skb, + struct scatterlist *sgout) +{ + struct tls_context *tls_ctx = tls_get_ctx(sk); + struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx); + char iv[TLS_CIPHER_AES_GCM_128_SALT_SIZE + tls_ctx->rx.iv_size]; + struct scatterlist sgin_arr[MAX_SKB_FRAGS + 2]; + struct scatterlist *sgin = &sgin_arr[0]; + struct strp_msg *rxm = strp_msg(skb); + int ret, nsg = ARRAY_SIZE(sgin_arr); + char aad_recv[TLS_AAD_SPACE_SIZE]; + struct sk_buff *unused; + + ret = skb_copy_bits(skb, rxm->offset + TLS_HEADER_SIZE, + iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE, + tls_ctx->rx.iv_size); + if (ret < 0) + return ret; + + memcpy(iv, tls_ctx->rx.iv, TLS_CIPHER_AES_GCM_128_SALT_SIZE); + if (!sgout) { + nsg = skb_cow_data(skb, 0, &unused) + 1; + sgin = kmalloc_array(nsg, sizeof(*sgin), sk->sk_allocation); + if (!sgout) + sgout = sgin; + } + + sg_init_table(sgin, nsg); + sg_set_buf(&sgin[0], aad_recv, sizeof(aad_recv)); + + nsg = skb_to_sgvec(skb, &sgin[1], + rxm->offset + tls_ctx->rx.prepend_size, + rxm->full_len - tls_ctx->rx.prepend_size); + + tls_make_aad(aad_recv, + rxm->full_len - tls_ctx->rx.overhead_size, + tls_ctx->rx.rec_seq, + tls_ctx->rx.rec_seq_size, + ctx->control); + + ret = tls_do_decryption(sk, sgin, sgout, iv, + rxm->full_len - tls_ctx->rx.overhead_size, + skb, sk->sk_allocation); + + if (sgin != &sgin_arr[0]) + kfree(sgin); + + return ret; +} + +static bool tls_sw_advance_skb(struct sock *sk, struct sk_buff *skb, + unsigned int len) +{ + struct tls_context *tls_ctx = tls_get_ctx(sk); + struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx); + struct strp_msg *rxm = strp_msg(skb); + + if (len < rxm->full_len) { + rxm->offset += len; + rxm->full_len -= len; + + return false; + } + + /* Finished with message */ + ctx->recv_pkt = NULL; + kfree_skb(skb); + strp_unpause(&ctx->strp); + + return true; +} + +int tls_sw_recvmsg(struct sock *sk, + struct msghdr *msg, + size_t len, + int nonblock, + int flags, + int *addr_len) +{ + struct tls_context *tls_ctx = tls_get_ctx(sk); + struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx); + unsigned char control; + struct strp_msg *rxm; + struct sk_buff *skb; + ssize_t copied = 0; + bool cmsg = false; + int err = 0; + long timeo; + + flags |= nonblock; + + if (unlikely(flags & MSG_ERRQUEUE)) + return sock_recv_errqueue(sk, msg, len, SOL_IP, IP_RECVERR); + + lock_sock(sk); + + timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); + do { + bool zc = false; + int chunk = 0; + + skb = tls_wait_data(sk, flags, timeo, &err); + if (!skb) + goto recv_end; + + rxm = strp_msg(skb); + if (!cmsg) { + int cerr; + + cerr = put_cmsg(msg, SOL_TLS, TLS_GET_RECORD_TYPE, + sizeof(ctx->control), &ctx->control); + cmsg = true; + control = ctx->control; + if (ctx->control != TLS_RECORD_TYPE_DATA) { + if (cerr || msg->msg_flags & MSG_CTRUNC) { + err = -EIO; + goto recv_end; + } + } + } else if (control != ctx->control) { + goto recv_end; + } + + if (!ctx->decrypted) { + int page_count; + int to_copy; + + page_count = iov_iter_npages(&msg->msg_iter, + MAX_SKB_FRAGS); + to_copy = rxm->full_len - tls_ctx->rx.overhead_size; + if (to_copy <= len && page_count < MAX_SKB_FRAGS && + likely(!(flags & MSG_PEEK))) { + struct scatterlist sgin[MAX_SKB_FRAGS + 1]; + char unused[21]; + int pages = 0; + + zc = true; + sg_init_table(sgin, MAX_SKB_FRAGS + 1); + sg_set_buf(&sgin[0], unused, 13); + + err = zerocopy_from_iter(sk, &msg->msg_iter, + to_copy, &pages, + &chunk, &sgin[1], + MAX_SKB_FRAGS, false); + if (err < 0) + goto fallback_to_reg_recv; + + err = decrypt_skb(sk, skb, sgin); + for (; pages > 0; pages--) + put_page(sg_page(&sgin[pages])); + if (err < 0) { + tls_err_abort(sk, EBADMSG); + goto recv_end; + } + } else { +fallback_to_reg_recv: + err = decrypt_skb(sk, skb, NULL); + if (err < 0) { + tls_err_abort(sk, EBADMSG); + goto recv_end; + } + } + ctx->decrypted = true; + } + + if (!zc) { + chunk = min_t(unsigned int, rxm->full_len, len); + err = skb_copy_datagram_msg(skb, rxm->offset, msg, + chunk); + if (err < 0) + goto recv_end; + } + + copied += chunk; + len -= chunk; + if (likely(!(flags & MSG_PEEK))) { + u8 control = ctx->control; + + if (tls_sw_advance_skb(sk, skb, chunk)) { + /* Return full control message to + * userspace before trying to parse + * another message type + */ + msg->msg_flags |= MSG_EOR; + if (control != TLS_RECORD_TYPE_DATA) + goto recv_end; + } + } + } while (len); + +recv_end: + release_sock(sk); + return copied ? : err; +} + +ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos, + struct pipe_inode_info *pipe, + size_t len, unsigned int flags) +{ + struct tls_context *tls_ctx = tls_get_ctx(sock->sk); + struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx); + struct strp_msg *rxm = NULL; + struct sock *sk = sock->sk; + struct sk_buff *skb; + ssize_t copied = 0; + int err = 0; + long timeo; + int chunk; + + lock_sock(sk); + + timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); + + skb = tls_wait_data(sk, flags, timeo, &err); + if (!skb) + goto splice_read_end; + + /* splice does not support reading control messages */ + if (ctx->control != TLS_RECORD_TYPE_DATA) { + err = -ENOTSUPP; + goto splice_read_end; + } + + if (!ctx->decrypted) { + err = decrypt_skb(sk, skb, NULL); + + if (err < 0) { + tls_err_abort(sk, EBADMSG); + goto splice_read_end; + } + ctx->decrypted = true; + } + rxm = strp_msg(skb); + + chunk = min_t(unsigned int, rxm->full_len, len); + copied = skb_splice_bits(skb, sk, rxm->offset, pipe, chunk, flags); + if (copied < 0) + goto splice_read_end; + + if (likely(!(flags & MSG_PEEK))) + tls_sw_advance_skb(sk, skb, copied); + +splice_read_end: + release_sock(sk); + return copied ? : err; +} + +unsigned int tls_sw_poll(struct file *file, struct socket *sock, + struct poll_table_struct *wait) +{ + unsigned int ret; + struct sock *sk = sock->sk; + struct tls_context *tls_ctx = tls_get_ctx(sk); + struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx); + + /* Grab POLLOUT and POLLHUP from the underlying socket */ + ret = ctx->sk_poll(file, sock, wait); + + /* Clear POLLIN bits, and set based on recv_pkt */ + ret &= ~(POLLIN | POLLRDNORM); + if (ctx->recv_pkt) + ret |= POLLIN | POLLRDNORM; + + return ret; +} + +static int tls_read_size(struct strparser *strp, struct sk_buff *skb) +{ + struct tls_context *tls_ctx = tls_get_ctx(strp->sk); + struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx); + char header[tls_ctx->rx.prepend_size]; + struct strp_msg *rxm = strp_msg(skb); + size_t cipher_overhead; + size_t data_len = 0; + int ret; + + /* Verify that we have a full TLS header, or wait for more data */ + if (rxm->offset + tls_ctx->rx.prepend_size > skb->len) + return 0; + + /* Linearize header to local buffer */ + ret = skb_copy_bits(skb, rxm->offset, header, tls_ctx->rx.prepend_size); + + if (ret < 0) + goto read_failure; + + ctx->control = header[0]; + + data_len = ((header[4] & 0xFF) | (header[3] << 8)); + + cipher_overhead = tls_ctx->rx.tag_size + tls_ctx->rx.iv_size; + + if (data_len > TLS_MAX_PAYLOAD_SIZE + cipher_overhead) { + ret = -EMSGSIZE; + goto read_failure; + } + if (data_len < cipher_overhead) { + ret = -EBADMSG; + goto read_failure; + } + + if (header[1] != TLS_VERSION_MINOR(tls_ctx->crypto_recv.version) || + header[2] != TLS_VERSION_MAJOR(tls_ctx->crypto_recv.version)) { + ret = -EINVAL; + goto read_failure; + } + + return data_len + TLS_HEADER_SIZE; + +read_failure: + tls_err_abort(strp->sk, ret); + + return ret; +} + +static void tls_queue(struct strparser *strp, struct sk_buff *skb) +{ + struct tls_context *tls_ctx = tls_get_ctx(strp->sk); + struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx); + struct strp_msg *rxm; + + rxm = strp_msg(skb); + + ctx->decrypted = false; + + ctx->recv_pkt = skb; + strp_pause(strp); + + strp->sk->sk_state_change(strp->sk); +} + +static void tls_data_ready(struct sock *sk) +{ + struct tls_context *tls_ctx = tls_get_ctx(sk); + struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx); + + strp_data_ready(&ctx->strp); +} + +void tls_sw_free_resources(struct sock *sk) { struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx); if (ctx->aead_send) crypto_free_aead(ctx->aead_send); + if (ctx->aead_recv) { + if (ctx->recv_pkt) { + kfree_skb(ctx->recv_pkt); + ctx->recv_pkt = NULL; + } + crypto_free_aead(ctx->aead_recv); + strp_stop(&ctx->strp); + write_lock_bh(&sk->sk_callback_lock); + sk->sk_data_ready = ctx->saved_data_ready; + write_unlock_bh(&sk->sk_callback_lock); + release_sock(sk); + strp_done(&ctx->strp); + lock_sock(sk); + } tls_free_both_sg(sk); @@ -643,12 +1035,15 @@ void tls_sw_free_tx_resources(struct sock *sk) kfree(tls_ctx); } -int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx) +int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx) { char keyval[TLS_CIPHER_AES_GCM_128_KEY_SIZE]; struct tls_crypto_info *crypto_info; struct tls12_crypto_info_aes_gcm_128 *gcm_128_info; struct tls_sw_context *sw_ctx; + struct cipher_context *cctx; + struct crypto_aead **aead; + struct strp_callbacks cb; u16 nonce_size, tag_size, iv_size, rec_seq_size; char *iv, *rec_seq; int rc = 0; @@ -658,22 +1053,29 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx) goto out; } - if (ctx->priv_ctx) { - rc = -EEXIST; - goto out; - } - - sw_ctx = kzalloc(sizeof(*sw_ctx), GFP_KERNEL); - if (!sw_ctx) { - rc = -ENOMEM; - goto out; + if (!ctx->priv_ctx) { + sw_ctx = kzalloc(sizeof(*sw_ctx), GFP_KERNEL); + if (!sw_ctx) { + rc = -ENOMEM; + goto out; + } + crypto_init_wait(&sw_ctx->async_wait); + } else { + sw_ctx = ctx->priv_ctx; } - crypto_init_wait(&sw_ctx->async_wait); - ctx->priv_ctx = (struct tls_offload_context *)sw_ctx; - crypto_info = &ctx->crypto_send; + if (tx) { + crypto_info = &ctx->crypto_send; + cctx = &ctx->tx; + aead = &sw_ctx->aead_send; + } else { + crypto_info = &ctx->crypto_recv; + cctx = &ctx->rx; + aead = &sw_ctx->aead_recv; + } + switch (crypto_info->cipher_type) { case TLS_CIPHER_AES_GCM_128: { nonce_size = TLS_CIPHER_AES_GCM_128_IV_SIZE; @@ -692,46 +1094,49 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx) goto free_priv; } - ctx->prepend_size = TLS_HEADER_SIZE + nonce_size; - ctx->tag_size = tag_size; - ctx->overhead_size = ctx->prepend_size + ctx->tag_size; - ctx->iv_size = iv_size; - ctx->iv = kmalloc(iv_size + TLS_CIPHER_AES_GCM_128_SALT_SIZE, GFP_KERNEL); - if (!ctx->iv) { + cctx->prepend_size = TLS_HEADER_SIZE + nonce_size; + cctx->tag_size = tag_size; + cctx->overhead_size = cctx->prepend_size + cctx->tag_size; + cctx->iv_size = iv_size; + cctx->iv = kmalloc(iv_size + TLS_CIPHER_AES_GCM_128_SALT_SIZE, + GFP_KERNEL); + if (!cctx->iv) { rc = -ENOMEM; goto free_priv; } - memcpy(ctx->iv, gcm_128_info->salt, TLS_CIPHER_AES_GCM_128_SALT_SIZE); - memcpy(ctx->iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE, iv, iv_size); - ctx->rec_seq_size = rec_seq_size; - ctx->rec_seq = kmalloc(rec_seq_size, GFP_KERNEL); - if (!ctx->rec_seq) { + memcpy(cctx->iv, gcm_128_info->salt, TLS_CIPHER_AES_GCM_128_SALT_SIZE); + memcpy(cctx->iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE, iv, iv_size); + cctx->rec_seq_size = rec_seq_size; + cctx->rec_seq = kmalloc(rec_seq_size, GFP_KERNEL); + if (!cctx->rec_seq) { rc = -ENOMEM; goto free_iv; } - memcpy(ctx->rec_seq, rec_seq, rec_seq_size); - - sg_init_table(sw_ctx->sg_encrypted_data, - ARRAY_SIZE(sw_ctx->sg_encrypted_data)); - sg_init_table(sw_ctx->sg_plaintext_data, - ARRAY_SIZE(sw_ctx->sg_plaintext_data)); - - sg_init_table(sw_ctx->sg_aead_in, 2); - sg_set_buf(&sw_ctx->sg_aead_in[0], sw_ctx->aad_space, - sizeof(sw_ctx->aad_space)); - sg_unmark_end(&sw_ctx->sg_aead_in[1]); - sg_chain(sw_ctx->sg_aead_in, 2, sw_ctx->sg_plaintext_data); - sg_init_table(sw_ctx->sg_aead_out, 2); - sg_set_buf(&sw_ctx->sg_aead_out[0], sw_ctx->aad_space, - sizeof(sw_ctx->aad_space)); - sg_unmark_end(&sw_ctx->sg_aead_out[1]); - sg_chain(sw_ctx->sg_aead_out, 2, sw_ctx->sg_encrypted_data); - - if (!sw_ctx->aead_send) { - sw_ctx->aead_send = crypto_alloc_aead("gcm(aes)", 0, 0); - if (IS_ERR(sw_ctx->aead_send)) { - rc = PTR_ERR(sw_ctx->aead_send); - sw_ctx->aead_send = NULL; + memcpy(cctx->rec_seq, rec_seq, rec_seq_size); + + if (tx) { + sg_init_table(sw_ctx->sg_encrypted_data, + ARRAY_SIZE(sw_ctx->sg_encrypted_data)); + sg_init_table(sw_ctx->sg_plaintext_data, + ARRAY_SIZE(sw_ctx->sg_plaintext_data)); + + sg_init_table(sw_ctx->sg_aead_in, 2); + sg_set_buf(&sw_ctx->sg_aead_in[0], sw_ctx->aad_space, + sizeof(sw_ctx->aad_space)); + sg_unmark_end(&sw_ctx->sg_aead_in[1]); + sg_chain(sw_ctx->sg_aead_in, 2, sw_ctx->sg_plaintext_data); + sg_init_table(sw_ctx->sg_aead_out, 2); + sg_set_buf(&sw_ctx->sg_aead_out[0], sw_ctx->aad_space, + sizeof(sw_ctx->aad_space)); + sg_unmark_end(&sw_ctx->sg_aead_out[1]); + sg_chain(sw_ctx->sg_aead_out, 2, sw_ctx->sg_encrypted_data); + } + + if (!*aead) { + *aead = crypto_alloc_aead("gcm(aes)", 0, 0); + if (IS_ERR(*aead)) { + rc = PTR_ERR(*aead); + *aead = NULL; goto free_rec_seq; } } @@ -740,24 +1145,44 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx) memcpy(keyval, gcm_128_info->key, TLS_CIPHER_AES_GCM_128_KEY_SIZE); - rc = crypto_aead_setkey(sw_ctx->aead_send, keyval, + rc = crypto_aead_setkey(*aead, keyval, TLS_CIPHER_AES_GCM_128_KEY_SIZE); if (rc) goto free_aead; - rc = crypto_aead_setauthsize(sw_ctx->aead_send, ctx->tag_size); - if (!rc) - return 0; + rc = crypto_aead_setauthsize(*aead, cctx->tag_size); + if (rc) + goto free_aead; + + if (!tx) { + /* Set up strparser */ + memset(&cb, 0, sizeof(cb)); + cb.rcv_msg = tls_queue; + cb.parse_msg = tls_read_size; + + strp_init(&sw_ctx->strp, sk, &cb); + + write_lock_bh(&sk->sk_callback_lock); + sw_ctx->saved_data_ready = sk->sk_data_ready; + sk->sk_data_ready = tls_data_ready; + write_unlock_bh(&sk->sk_callback_lock); + + sw_ctx->sk_poll = sk->sk_socket->ops->poll; + + strp_check_rcv(&sw_ctx->strp); + } + + goto out; free_aead: - crypto_free_aead(sw_ctx->aead_send); - sw_ctx->aead_send = NULL; + crypto_free_aead(*aead); + *aead = NULL; free_rec_seq: - kfree(ctx->rec_seq); - ctx->rec_seq = NULL; + kfree(cctx->rec_seq); + cctx->rec_seq = NULL; free_iv: - kfree(ctx->iv); - ctx->iv = NULL; + kfree(ctx->tx.iv); + ctx->tx.iv = NULL; free_priv: kfree(ctx->priv_ctx); ctx->priv_ctx = NULL; diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index bc2970a8e7f3..aded82da1aea 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2913,7 +2913,6 @@ static void __net_exit unix_net_exit(struct net *net) static struct pernet_operations unix_net_ops = { .init = unix_net_init, .exit = unix_net_exit, - .async = true, }; static int __init af_unix_init(void) diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig index 1abcc4fc4df1..41722046b937 100644 --- a/net/wireless/Kconfig +++ b/net/wireless/Kconfig @@ -34,9 +34,10 @@ config CFG80211 When built as a module it will be called cfg80211. +if CFG80211 + config NL80211_TESTMODE bool "nl80211 testmode command" - depends on CFG80211 help The nl80211 testmode command helps implementing things like factory calibration or validation tools for wireless chips. @@ -51,7 +52,6 @@ config NL80211_TESTMODE config CFG80211_DEVELOPER_WARNINGS bool "enable developer warnings" - depends on CFG80211 default n help This option enables some additional warnings that help @@ -68,7 +68,7 @@ config CFG80211_DEVELOPER_WARNINGS config CFG80211_CERTIFICATION_ONUS bool "cfg80211 certification onus" - depends on CFG80211 && EXPERT + depends on EXPERT default n ---help--- You should disable this option unless you are both capable @@ -159,7 +159,6 @@ config CFG80211_REG_RELAX_NO_IR config CFG80211_DEFAULT_PS bool "enable powersave by default" - depends on CFG80211 default y help This option enables powersave mode by default. @@ -170,7 +169,6 @@ config CFG80211_DEFAULT_PS config CFG80211_DEBUGFS bool "cfg80211 DebugFS entries" - depends on CFG80211 depends on DEBUG_FS ---help--- You can enable this if you want debugfs entries for cfg80211. @@ -180,7 +178,6 @@ config CFG80211_DEBUGFS config CFG80211_CRDA_SUPPORT bool "support CRDA" if EXPERT default y - depends on CFG80211 help You should enable this option unless you know for sure you have no need for it, for example when using internal regdb (above) or the @@ -190,7 +187,6 @@ config CFG80211_CRDA_SUPPORT config CFG80211_WEXT bool "cfg80211 wireless extensions compatibility" if !CFG80211_WEXT_EXPORT - depends on CFG80211 select WEXT_CORE default y if CFG80211_WEXT_EXPORT help @@ -199,11 +195,12 @@ config CFG80211_WEXT config CFG80211_WEXT_EXPORT bool - depends on CFG80211 help Drivers should select this option if they require cfg80211's wext compatibility symbols to be exported. +endif # CFG80211 + config LIB80211 tristate default n diff --git a/net/wireless/ap.c b/net/wireless/ap.c index 63682176c96c..882d97bdc6bf 100644 --- a/net/wireless/ap.c +++ b/net/wireless/ap.c @@ -27,6 +27,7 @@ int __cfg80211_stop_ap(struct cfg80211_registered_device *rdev, err = rdev_stop_ap(rdev, dev); if (!err) { + wdev->conn_owner_nlportid = 0; wdev->beacon_interval = 0; memset(&wdev->chandef, 0, sizeof(wdev->chandef)); wdev->ssid_len = 0; diff --git a/net/wireless/chan.c b/net/wireless/chan.c index a48859982a32..2db713d18f71 100644 --- a/net/wireless/chan.c +++ b/net/wireless/chan.c @@ -579,6 +579,10 @@ static bool cfg80211_get_chans_dfs_available(struct wiphy *wiphy, { struct ieee80211_channel *c; u32 freq, start_freq, end_freq; + bool dfs_offload; + + dfs_offload = wiphy_ext_feature_isset(wiphy, + NL80211_EXT_FEATURE_DFS_OFFLOAD); start_freq = cfg80211_get_start_freq(center_freq, bandwidth); end_freq = cfg80211_get_end_freq(center_freq, bandwidth); @@ -596,8 +600,9 @@ static bool cfg80211_get_chans_dfs_available(struct wiphy *wiphy, if (c->flags & IEEE80211_CHAN_DISABLED) return false; - if ((c->flags & IEEE80211_CHAN_RADAR) && - (c->dfs_state != NL80211_DFS_AVAILABLE)) + if ((c->flags & IEEE80211_CHAN_RADAR) && + (c->dfs_state != NL80211_DFS_AVAILABLE) && + !(c->dfs_state == NL80211_DFS_USABLE && dfs_offload)) return false; } diff --git a/net/wireless/core.c b/net/wireless/core.c index 670aa229168a..a6f3cac8c640 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -1340,7 +1340,6 @@ static void __net_exit cfg80211_pernet_exit(struct net *net) static struct pernet_operations cfg80211_pernet_ops = { .exit = cfg80211_pernet_exit, - .async = true, }; static int __init cfg80211_init(void) diff --git a/net/wireless/core.h b/net/wireless/core.h index eaff636169c2..63eb1b5fdd04 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -282,10 +282,10 @@ void cfg80211_bss_age(struct cfg80211_registered_device *rdev, unsigned long age_secs); /* IBSS */ -int cfg80211_join_ibss(struct cfg80211_registered_device *rdev, - struct net_device *dev, - struct cfg80211_ibss_params *params, - struct cfg80211_cached_keys *connkeys); +int __cfg80211_join_ibss(struct cfg80211_registered_device *rdev, + struct net_device *dev, + struct cfg80211_ibss_params *params, + struct cfg80211_cached_keys *connkeys); void cfg80211_clear_ibss(struct net_device *dev, bool nowext); int __cfg80211_leave_ibss(struct cfg80211_registered_device *rdev, struct net_device *dev, bool nowext); @@ -303,10 +303,6 @@ int __cfg80211_join_mesh(struct cfg80211_registered_device *rdev, struct net_device *dev, struct mesh_setup *setup, const struct mesh_config *conf); -int cfg80211_join_mesh(struct cfg80211_registered_device *rdev, - struct net_device *dev, - struct mesh_setup *setup, - const struct mesh_config *conf); int __cfg80211_leave_mesh(struct cfg80211_registered_device *rdev, struct net_device *dev); int cfg80211_leave_mesh(struct cfg80211_registered_device *rdev, diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c index a1d10993d08a..d1743e6abc34 100644 --- a/net/wireless/ibss.c +++ b/net/wireless/ibss.c @@ -84,14 +84,15 @@ void cfg80211_ibss_joined(struct net_device *dev, const u8 *bssid, } EXPORT_SYMBOL(cfg80211_ibss_joined); -static int __cfg80211_join_ibss(struct cfg80211_registered_device *rdev, - struct net_device *dev, - struct cfg80211_ibss_params *params, - struct cfg80211_cached_keys *connkeys) +int __cfg80211_join_ibss(struct cfg80211_registered_device *rdev, + struct net_device *dev, + struct cfg80211_ibss_params *params, + struct cfg80211_cached_keys *connkeys) { struct wireless_dev *wdev = dev->ieee80211_ptr; int err; + ASSERT_RTNL(); ASSERT_WDEV_LOCK(wdev); if (wdev->ssid_len) @@ -146,23 +147,6 @@ static int __cfg80211_join_ibss(struct cfg80211_registered_device *rdev, return 0; } -int cfg80211_join_ibss(struct cfg80211_registered_device *rdev, - struct net_device *dev, - struct cfg80211_ibss_params *params, - struct cfg80211_cached_keys *connkeys) -{ - struct wireless_dev *wdev = dev->ieee80211_ptr; - int err; - - ASSERT_RTNL(); - - wdev_lock(wdev); - err = __cfg80211_join_ibss(rdev, dev, params, connkeys); - wdev_unlock(wdev); - - return err; -} - static void __cfg80211_clear_ibss(struct net_device *dev, bool nowext) { struct wireless_dev *wdev = dev->ieee80211_ptr; @@ -224,6 +208,7 @@ int __cfg80211_leave_ibss(struct cfg80211_registered_device *rdev, if (err) return err; + wdev->conn_owner_nlportid = 0; __cfg80211_clear_ibss(dev, nowext); return 0; diff --git a/net/wireless/mesh.c b/net/wireless/mesh.c index b12da6ef3c12..eac5aa1419fc 100644 --- a/net/wireless/mesh.c +++ b/net/wireless/mesh.c @@ -217,21 +217,6 @@ int __cfg80211_join_mesh(struct cfg80211_registered_device *rdev, return err; } -int cfg80211_join_mesh(struct cfg80211_registered_device *rdev, - struct net_device *dev, - struct mesh_setup *setup, - const struct mesh_config *conf) -{ - struct wireless_dev *wdev = dev->ieee80211_ptr; - int err; - - wdev_lock(wdev); - err = __cfg80211_join_mesh(rdev, dev, setup, conf); - wdev_unlock(wdev); - - return err; -} - int cfg80211_set_mesh_channel(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev, struct cfg80211_chan_def *chandef) @@ -286,6 +271,7 @@ int __cfg80211_leave_mesh(struct cfg80211_registered_device *rdev, err = rdev_leave_mesh(rdev, dev); if (!err) { + wdev->conn_owner_nlportid = 0; wdev->mesh_id_len = 0; wdev->beacon_interval = 0; memset(&wdev->chandef, 0, sizeof(wdev->chandef)); diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index bbb9907bfa86..12b3edf70a7b 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -872,7 +872,7 @@ void cfg80211_cac_event(struct net_device *netdev, trace_cfg80211_cac_event(netdev, event); - if (WARN_ON(!wdev->cac_started)) + if (WARN_ON(!wdev->cac_started && event != NL80211_RADAR_CAC_STARTED)) return; if (WARN_ON(!wdev->chandef.chan)) @@ -888,14 +888,17 @@ void cfg80211_cac_event(struct net_device *netdev, sizeof(struct cfg80211_chan_def)); queue_work(cfg80211_wq, &rdev->propagate_cac_done_wk); cfg80211_sched_dfs_chan_update(rdev); - break; + /* fall through */ case NL80211_RADAR_CAC_ABORTED: + wdev->cac_started = false; + break; + case NL80211_RADAR_CAC_STARTED: + wdev->cac_started = true; break; default: WARN_ON(1); return; } - wdev->cac_started = false; nl80211_radar_notify(rdev, chandef, event, netdev, gfp); } diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index a910150f8169..ff28f8feeb09 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -287,6 +287,7 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { [NL80211_ATTR_CONTROL_PORT] = { .type = NLA_FLAG }, [NL80211_ATTR_CONTROL_PORT_ETHERTYPE] = { .type = NLA_U16 }, [NL80211_ATTR_CONTROL_PORT_NO_ENCRYPT] = { .type = NLA_FLAG }, + [NL80211_ATTR_CONTROL_PORT_OVER_NL80211] = { .type = NLA_FLAG }, [NL80211_ATTR_PRIVACY] = { .type = NLA_FLAG }, [NL80211_ATTR_CIPHER_SUITE_GROUP] = { .type = NLA_U32 }, [NL80211_ATTR_WPA_VERSIONS] = { .type = NLA_U32 }, @@ -4134,6 +4135,9 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) wdev->chandef = params.chandef; wdev->ssid_len = params.ssid_len; memcpy(wdev->ssid, params.ssid, wdev->ssid_len); + + if (info->attrs[NL80211_ATTR_SOCKET_OWNER]) + wdev->conn_owner_nlportid = info->snd_portid; } wdev_unlock(wdev); @@ -7551,12 +7555,13 @@ static int nl80211_start_radar_detection(struct sk_buff *skb, struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; struct wireless_dev *wdev = dev->ieee80211_ptr; + struct wiphy *wiphy = wdev->wiphy; struct cfg80211_chan_def chandef; enum nl80211_dfs_regions dfs_region; unsigned int cac_time_ms; int err; - dfs_region = reg_get_dfs_region(wdev->wiphy); + dfs_region = reg_get_dfs_region(wiphy); if (dfs_region == NL80211_DFS_UNSET) return -EINVAL; @@ -7570,17 +7575,20 @@ static int nl80211_start_radar_detection(struct sk_buff *skb, if (wdev->cac_started) return -EBUSY; - err = cfg80211_chandef_dfs_required(wdev->wiphy, &chandef, - wdev->iftype); + err = cfg80211_chandef_dfs_required(wiphy, &chandef, wdev->iftype); if (err < 0) return err; if (err == 0) return -EINVAL; - if (!cfg80211_chandef_dfs_usable(wdev->wiphy, &chandef)) + if (!cfg80211_chandef_dfs_usable(wiphy, &chandef)) return -EINVAL; + /* CAC start is offloaded to HW and can't be started manually */ + if (wiphy_ext_feature_isset(wiphy, NL80211_EXT_FEATURE_DFS_OFFLOAD)) + return -EOPNOTSUPP; + if (!rdev->ops->start_radar_detection) return -EOPNOTSUPP; @@ -8204,6 +8212,22 @@ static int nl80211_authenticate(struct sk_buff *skb, struct genl_info *info) return err; } +static int validate_pae_over_nl80211(struct cfg80211_registered_device *rdev, + struct genl_info *info) +{ + if (!info->attrs[NL80211_ATTR_SOCKET_OWNER]) { + GENL_SET_ERR_MSG(info, "SOCKET_OWNER not set"); + return -EINVAL; + } + + if (!rdev->ops->tx_control_port || + !wiphy_ext_feature_isset(&rdev->wiphy, + NL80211_EXT_FEATURE_CONTROL_PORT_OVER_NL80211)) + return -EOPNOTSUPP; + + return 0; +} + static int nl80211_crypto_settings(struct cfg80211_registered_device *rdev, struct genl_info *info, struct cfg80211_crypto_settings *settings, @@ -8227,6 +8251,15 @@ static int nl80211_crypto_settings(struct cfg80211_registered_device *rdev, } else settings->control_port_ethertype = cpu_to_be16(ETH_P_PAE); + if (info->attrs[NL80211_ATTR_CONTROL_PORT_OVER_NL80211]) { + int r = validate_pae_over_nl80211(rdev, info); + + if (r < 0) + return r; + + settings->control_port_over_nl80211 = true; + } + if (info->attrs[NL80211_ATTR_CIPHER_SUITES_PAIRWISE]) { void *data; int len, i; @@ -8672,12 +8705,26 @@ static int nl80211_join_ibss(struct sk_buff *skb, struct genl_info *info) ibss.control_port = nla_get_flag(info->attrs[NL80211_ATTR_CONTROL_PORT]); + if (info->attrs[NL80211_ATTR_CONTROL_PORT_OVER_NL80211]) { + int r = validate_pae_over_nl80211(rdev, info); + + if (r < 0) + return r; + + ibss.control_port_over_nl80211 = true; + } + ibss.userspace_handles_dfs = nla_get_flag(info->attrs[NL80211_ATTR_HANDLE_DFS]); - err = cfg80211_join_ibss(rdev, dev, &ibss, connkeys); + wdev_lock(dev->ieee80211_ptr); + err = __cfg80211_join_ibss(rdev, dev, &ibss, connkeys); if (err) kzfree(connkeys); + else if (info->attrs[NL80211_ATTR_SOCKET_OWNER]) + dev->ieee80211_ptr->conn_owner_nlportid = info->snd_portid; + wdev_unlock(dev->ieee80211_ptr); + return err; } @@ -10083,7 +10130,7 @@ static int nl80211_join_mesh(struct sk_buff *skb, struct genl_info *info) if (err) return err; } else { - /* cfg80211_join_mesh() will sort it out */ + /* __cfg80211_join_mesh() will sort it out */ setup.chandef.chan = NULL; } @@ -10121,7 +10168,22 @@ static int nl80211_join_mesh(struct sk_buff *skb, struct genl_info *info) setup.userspace_handles_dfs = nla_get_flag(info->attrs[NL80211_ATTR_HANDLE_DFS]); - return cfg80211_join_mesh(rdev, dev, &setup, &cfg); + if (info->attrs[NL80211_ATTR_CONTROL_PORT_OVER_NL80211]) { + int r = validate_pae_over_nl80211(rdev, info); + + if (r < 0) + return r; + + setup.control_port_over_nl80211 = true; + } + + wdev_lock(dev->ieee80211_ptr); + err = __cfg80211_join_mesh(rdev, dev, &setup, &cfg); + if (!err && info->attrs[NL80211_ATTR_SOCKET_OWNER]) + dev->ieee80211_ptr->conn_owner_nlportid = info->snd_portid; + wdev_unlock(dev->ieee80211_ptr); + + return err; } static int nl80211_leave_mesh(struct sk_buff *skb, struct genl_info *info) @@ -12517,6 +12579,68 @@ static int nl80211_external_auth(struct sk_buff *skb, struct genl_info *info) return rdev_external_auth(rdev, dev, ¶ms); } +static int nl80211_tx_control_port(struct sk_buff *skb, struct genl_info *info) +{ + struct cfg80211_registered_device *rdev = info->user_ptr[0]; + struct net_device *dev = info->user_ptr[1]; + struct wireless_dev *wdev = dev->ieee80211_ptr; + const u8 *buf; + size_t len; + u8 *dest; + u16 proto; + bool noencrypt; + int err; + + if (!wiphy_ext_feature_isset(&rdev->wiphy, + NL80211_EXT_FEATURE_CONTROL_PORT_OVER_NL80211)) + return -EOPNOTSUPP; + + if (!rdev->ops->tx_control_port) + return -EOPNOTSUPP; + + if (!info->attrs[NL80211_ATTR_FRAME] || + !info->attrs[NL80211_ATTR_MAC] || + !info->attrs[NL80211_ATTR_CONTROL_PORT_ETHERTYPE]) { + GENL_SET_ERR_MSG(info, "Frame, MAC or ethertype missing"); + return -EINVAL; + } + + wdev_lock(wdev); + + switch (wdev->iftype) { + case NL80211_IFTYPE_AP: + case NL80211_IFTYPE_P2P_GO: + case NL80211_IFTYPE_MESH_POINT: + break; + case NL80211_IFTYPE_ADHOC: + case NL80211_IFTYPE_STATION: + case NL80211_IFTYPE_P2P_CLIENT: + if (wdev->current_bss) + break; + err = -ENOTCONN; + goto out; + default: + err = -EOPNOTSUPP; + goto out; + } + + wdev_unlock(wdev); + + buf = nla_data(info->attrs[NL80211_ATTR_FRAME]); + len = nla_len(info->attrs[NL80211_ATTR_FRAME]); + dest = nla_data(info->attrs[NL80211_ATTR_MAC]); + proto = nla_get_u16(info->attrs[NL80211_ATTR_CONTROL_PORT_ETHERTYPE]); + noencrypt = + nla_get_flag(info->attrs[NL80211_ATTR_CONTROL_PORT_NO_ENCRYPT]); + + return rdev_tx_control_port(rdev, dev, buf, len, + dest, cpu_to_be16(proto), noencrypt); + + out: + wdev_unlock(wdev); + return err; +} + #define NL80211_FLAG_NEED_WIPHY 0x01 #define NL80211_FLAG_NEED_NETDEV 0x02 #define NL80211_FLAG_NEED_RTNL 0x04 @@ -13420,7 +13544,14 @@ static const struct genl_ops nl80211_ops[] = { .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, }, - + { + .cmd = NL80211_CMD_CONTROL_PORT_FRAME, + .doit = nl80211_tx_control_port, + .policy = nl80211_policy, + .flags = GENL_UNS_ADMIN_PERM, + .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | + NL80211_FLAG_NEED_RTNL, + }, }; static struct genl_family nl80211_fam __ro_after_init = { @@ -14535,6 +14666,64 @@ void cfg80211_mgmt_tx_status(struct wireless_dev *wdev, u64 cookie, } EXPORT_SYMBOL(cfg80211_mgmt_tx_status); +static int __nl80211_rx_control_port(struct net_device *dev, + const u8 *buf, size_t len, + const u8 *addr, u16 proto, + bool unencrypted, gfp_t gfp) +{ + struct wireless_dev *wdev = dev->ieee80211_ptr; + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); + struct sk_buff *msg; + void *hdr; + u32 nlportid = READ_ONCE(wdev->conn_owner_nlportid); + + if (!nlportid) + return -ENOENT; + + msg = nlmsg_new(100 + len, gfp); + if (!msg) + return -ENOMEM; + + hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_CONTROL_PORT_FRAME); + if (!hdr) { + nlmsg_free(msg); + return -ENOBUFS; + } + + if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || + nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex) || + nla_put_u64_64bit(msg, NL80211_ATTR_WDEV, wdev_id(wdev), + NL80211_ATTR_PAD) || + nla_put(msg, NL80211_ATTR_FRAME, len, buf) || + nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, addr) || + nla_put_u16(msg, NL80211_ATTR_CONTROL_PORT_ETHERTYPE, proto) || + (unencrypted && nla_put_flag(msg, + NL80211_ATTR_CONTROL_PORT_NO_ENCRYPT))) + goto nla_put_failure; + + genlmsg_end(msg, hdr); + + return genlmsg_unicast(wiphy_net(&rdev->wiphy), msg, nlportid); + + nla_put_failure: + nlmsg_free(msg); + return -ENOBUFS; +} + +bool cfg80211_rx_control_port(struct net_device *dev, + const u8 *buf, size_t len, + const u8 *addr, u16 proto, bool unencrypted) +{ + int ret; + + trace_cfg80211_rx_control_port(dev, buf, len, addr, proto, unencrypted); + ret = __nl80211_rx_control_port(dev, buf, len, addr, proto, + unencrypted, GFP_ATOMIC); + trace_cfg80211_return_bool(ret == 0); + return ret == 0; +} +EXPORT_SYMBOL(cfg80211_rx_control_port); + static struct sk_buff *cfg80211_prepare_cqm(struct net_device *dev, const char *mac, gfp_t gfp) { diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h index 84f23ae015fc..87479a53411b 100644 --- a/net/wireless/rdev-ops.h +++ b/net/wireless/rdev-ops.h @@ -714,6 +714,21 @@ static inline int rdev_mgmt_tx(struct cfg80211_registered_device *rdev, return ret; } +static inline int rdev_tx_control_port(struct cfg80211_registered_device *rdev, + struct net_device *dev, + const void *buf, size_t len, + const u8 *dest, __be16 proto, + const bool noencrypt) +{ + int ret; + trace_rdev_tx_control_port(&rdev->wiphy, dev, buf, len, + dest, proto, noencrypt); + ret = rdev->ops->tx_control_port(&rdev->wiphy, dev, buf, len, + dest, proto, noencrypt); + trace_rdev_return_int(&rdev->wiphy, ret); + return ret; +} + static inline int rdev_mgmt_tx_cancel_wait(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev, u64 cookie) diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 7b42f0bacfd8..16c7e4ef5820 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -5,6 +5,7 @@ * Copyright 2008-2011 Luis R. Rodriguez <mcgrof@qca.qualcomm.com> * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright 2017 Intel Deutschland GmbH + * Copyright (C) 2018 Intel Corporation * * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -134,12 +135,12 @@ static void restore_regulatory_settings(bool reset_user); static const struct ieee80211_regdomain *get_cfg80211_regdom(void) { - return rtnl_dereference(cfg80211_regdomain); + return rcu_dereference_rtnl(cfg80211_regdomain); } const struct ieee80211_regdomain *get_wiphy_regdom(struct wiphy *wiphy) { - return rtnl_dereference(wiphy->regd); + return rcu_dereference_rtnl(wiphy->regd); } static const char *reg_dfs_region_str(enum nl80211_dfs_regions dfs_region) @@ -424,23 +425,36 @@ static const struct ieee80211_regdomain * reg_copy_regd(const struct ieee80211_regdomain *src_regd) { struct ieee80211_regdomain *regd; - int size_of_regd; + int size_of_regd, size_of_wmms; unsigned int i; + struct ieee80211_wmm_rule *d_wmm, *s_wmm; size_of_regd = sizeof(struct ieee80211_regdomain) + src_regd->n_reg_rules * sizeof(struct ieee80211_reg_rule); + size_of_wmms = src_regd->n_wmm_rules * + sizeof(struct ieee80211_wmm_rule); - regd = kzalloc(size_of_regd, GFP_KERNEL); + regd = kzalloc(size_of_regd + size_of_wmms, GFP_KERNEL); if (!regd) return ERR_PTR(-ENOMEM); memcpy(regd, src_regd, sizeof(struct ieee80211_regdomain)); - for (i = 0; i < src_regd->n_reg_rules; i++) + d_wmm = (struct ieee80211_wmm_rule *)((u8 *)regd + size_of_regd); + s_wmm = (struct ieee80211_wmm_rule *)((u8 *)src_regd + size_of_regd); + memcpy(d_wmm, s_wmm, size_of_wmms); + + for (i = 0; i < src_regd->n_reg_rules; i++) { memcpy(®d->reg_rules[i], &src_regd->reg_rules[i], sizeof(struct ieee80211_reg_rule)); + if (!src_regd->reg_rules[i].wmm_rule) + continue; + regd->reg_rules[i].wmm_rule = d_wmm + + (src_regd->reg_rules[i].wmm_rule - s_wmm) / + sizeof(struct ieee80211_wmm_rule); + } return regd; } @@ -595,6 +609,17 @@ enum fwdb_flags { FWDB_FLAG_AUTO_BW = BIT(4), }; +struct fwdb_wmm_ac { + u8 ecw; + u8 aifsn; + __be16 cot; +} __packed; + +struct fwdb_wmm_rule { + struct fwdb_wmm_ac client[IEEE80211_NUM_ACS]; + struct fwdb_wmm_ac ap[IEEE80211_NUM_ACS]; +} __packed; + struct fwdb_rule { u8 len; u8 flags; @@ -602,6 +627,7 @@ struct fwdb_rule { __be32 start, end, max_bw; /* start of optional data */ __be16 cac_timeout; + __be16 wmm_ptr; } __packed __aligned(4); #define FWDB_MAGIC 0x52474442 @@ -613,6 +639,31 @@ struct fwdb_header { struct fwdb_country country[]; } __packed __aligned(4); +static int ecw2cw(int ecw) +{ + return (1 << ecw) - 1; +} + +static bool valid_wmm(struct fwdb_wmm_rule *rule) +{ + struct fwdb_wmm_ac *ac = (struct fwdb_wmm_ac *)rule; + int i; + + for (i = 0; i < IEEE80211_NUM_ACS * 2; i++) { + u16 cw_min = ecw2cw((ac[i].ecw & 0xf0) >> 4); + u16 cw_max = ecw2cw(ac[i].ecw & 0x0f); + u8 aifsn = ac[i].aifsn; + + if (cw_min >= cw_max) + return false; + + if (aifsn < 1) + return false; + } + + return true; +} + static bool valid_rule(const u8 *data, unsigned int size, u16 rule_ptr) { struct fwdb_rule *rule = (void *)(data + (rule_ptr << 2)); @@ -623,7 +674,18 @@ static bool valid_rule(const u8 *data, unsigned int size, u16 rule_ptr) /* mandatory fields */ if (rule->len < offsetofend(struct fwdb_rule, max_bw)) return false; + if (rule->len >= offsetofend(struct fwdb_rule, wmm_ptr)) { + u32 wmm_ptr = be16_to_cpu(rule->wmm_ptr) << 2; + struct fwdb_wmm_rule *wmm; + + if (wmm_ptr + sizeof(struct fwdb_wmm_rule) > size) + return false; + wmm = (void *)(data + wmm_ptr); + + if (!valid_wmm(wmm)) + return false; + } return true; } @@ -798,23 +860,118 @@ static bool valid_regdb(const u8 *data, unsigned int size) return true; } +static void set_wmm_rule(struct ieee80211_wmm_rule *rule, + struct fwdb_wmm_rule *wmm) +{ + unsigned int i; + + for (i = 0; i < IEEE80211_NUM_ACS; i++) { + rule->client[i].cw_min = + ecw2cw((wmm->client[i].ecw & 0xf0) >> 4); + rule->client[i].cw_max = ecw2cw(wmm->client[i].ecw & 0x0f); + rule->client[i].aifsn = wmm->client[i].aifsn; + rule->client[i].cot = 1000 * be16_to_cpu(wmm->client[i].cot); + rule->ap[i].cw_min = ecw2cw((wmm->ap[i].ecw & 0xf0) >> 4); + rule->ap[i].cw_max = ecw2cw(wmm->ap[i].ecw & 0x0f); + rule->ap[i].aifsn = wmm->ap[i].aifsn; + rule->ap[i].cot = 1000 * be16_to_cpu(wmm->ap[i].cot); + } +} + +static int __regdb_query_wmm(const struct fwdb_header *db, + const struct fwdb_country *country, int freq, + u32 *dbptr, struct ieee80211_wmm_rule *rule) +{ + unsigned int ptr = be16_to_cpu(country->coll_ptr) << 2; + struct fwdb_collection *coll = (void *)((u8 *)db + ptr); + int i; + + for (i = 0; i < coll->n_rules; i++) { + __be16 *rules_ptr = (void *)((u8 *)coll + ALIGN(coll->len, 2)); + unsigned int rule_ptr = be16_to_cpu(rules_ptr[i]) << 2; + struct fwdb_rule *rrule = (void *)((u8 *)db + rule_ptr); + struct fwdb_wmm_rule *wmm; + unsigned int wmm_ptr; + + if (rrule->len < offsetofend(struct fwdb_rule, wmm_ptr)) + continue; + + if (freq >= KHZ_TO_MHZ(be32_to_cpu(rrule->start)) && + freq <= KHZ_TO_MHZ(be32_to_cpu(rrule->end))) { + wmm_ptr = be16_to_cpu(rrule->wmm_ptr) << 2; + wmm = (void *)((u8 *)db + wmm_ptr); + set_wmm_rule(rule, wmm); + if (dbptr) + *dbptr = wmm_ptr; + return 0; + } + } + + return -ENODATA; +} + +int reg_query_regdb_wmm(char *alpha2, int freq, u32 *dbptr, + struct ieee80211_wmm_rule *rule) +{ + const struct fwdb_header *hdr = regdb; + const struct fwdb_country *country; + + if (IS_ERR(regdb)) + return PTR_ERR(regdb); + + country = &hdr->country[0]; + while (country->coll_ptr) { + if (alpha2_equal(alpha2, country->alpha2)) + return __regdb_query_wmm(regdb, country, freq, dbptr, + rule); + + country++; + } + + return -ENODATA; +} +EXPORT_SYMBOL(reg_query_regdb_wmm); + +struct wmm_ptrs { + struct ieee80211_wmm_rule *rule; + u32 ptr; +}; + +static struct ieee80211_wmm_rule *find_wmm_ptr(struct wmm_ptrs *wmm_ptrs, + u32 wmm_ptr, int n_wmms) +{ + int i; + + for (i = 0; i < n_wmms; i++) { + if (wmm_ptrs[i].ptr == wmm_ptr) + return wmm_ptrs[i].rule; + } + return NULL; +} + static int regdb_query_country(const struct fwdb_header *db, const struct fwdb_country *country) { unsigned int ptr = be16_to_cpu(country->coll_ptr) << 2; struct fwdb_collection *coll = (void *)((u8 *)db + ptr); struct ieee80211_regdomain *regdom; - unsigned int size_of_regd; - unsigned int i; + struct ieee80211_regdomain *tmp_rd; + unsigned int size_of_regd, i, n_wmms = 0; + struct wmm_ptrs *wmm_ptrs; - size_of_regd = - sizeof(struct ieee80211_regdomain) + + size_of_regd = sizeof(struct ieee80211_regdomain) + coll->n_rules * sizeof(struct ieee80211_reg_rule); regdom = kzalloc(size_of_regd, GFP_KERNEL); if (!regdom) return -ENOMEM; + wmm_ptrs = kcalloc(coll->n_rules, sizeof(*wmm_ptrs), GFP_KERNEL); + if (!wmm_ptrs) { + kfree(regdom); + return -ENOMEM; + } + regdom->n_reg_rules = coll->n_rules; regdom->alpha2[0] = country->alpha2[0]; regdom->alpha2[1] = country->alpha2[1]; @@ -851,7 +1008,38 @@ static int regdb_query_country(const struct fwdb_header *db, if (rule->len >= offsetofend(struct fwdb_rule, cac_timeout)) rrule->dfs_cac_ms = 1000 * be16_to_cpu(rule->cac_timeout); + if (rule->len >= offsetofend(struct fwdb_rule, wmm_ptr)) { + u32 wmm_ptr = be16_to_cpu(rule->wmm_ptr) << 2; + struct ieee80211_wmm_rule *wmm_pos = + find_wmm_ptr(wmm_ptrs, wmm_ptr, n_wmms); + struct fwdb_wmm_rule *wmm; + struct ieee80211_wmm_rule *wmm_rule; + + if (wmm_pos) { + rrule->wmm_rule = wmm_pos; + continue; + } + wmm = (void *)((u8 *)db + wmm_ptr); + tmp_rd = krealloc(regdom, size_of_regd + (n_wmms + 1) * + sizeof(struct ieee80211_wmm_rule), + GFP_KERNEL); + + if (!tmp_rd) { + kfree(regdom); + return -ENOMEM; + } + regdom = tmp_rd; + + wmm_rule = (struct ieee80211_wmm_rule *) + ((u8 *)regdom + size_of_regd + n_wmms * + sizeof(struct ieee80211_wmm_rule)); + + set_wmm_rule(wmm_rule, wmm); + wmm_ptrs[n_wmms].ptr = wmm_ptr; + wmm_ptrs[n_wmms++].rule = wmm_rule; + } } + kfree(wmm_ptrs); return reg_schedule_apply(regdom); } diff --git a/net/wireless/sme.c b/net/wireless/sme.c index 701cfd7acc1b..5df6b33db786 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -1239,17 +1239,38 @@ void cfg80211_autodisconnect_wk(struct work_struct *work) wdev_lock(wdev); if (wdev->conn_owner_nlportid) { - /* - * Use disconnect_bssid if still connecting and ops->disconnect - * not implemented. Otherwise we can use cfg80211_disconnect. - */ - if (rdev->ops->disconnect || wdev->current_bss) - cfg80211_disconnect(rdev, wdev->netdev, - WLAN_REASON_DEAUTH_LEAVING, true); - else - cfg80211_mlme_deauth(rdev, wdev->netdev, - wdev->disconnect_bssid, NULL, 0, - WLAN_REASON_DEAUTH_LEAVING, false); + switch (wdev->iftype) { + case NL80211_IFTYPE_ADHOC: + cfg80211_leave_ibss(rdev, wdev->netdev, false); + break; + case NL80211_IFTYPE_AP: + case NL80211_IFTYPE_P2P_GO: + cfg80211_stop_ap(rdev, wdev->netdev, false); + break; + case NL80211_IFTYPE_MESH_POINT: + cfg80211_leave_mesh(rdev, wdev->netdev); + break; + case NL80211_IFTYPE_STATION: + case NL80211_IFTYPE_P2P_CLIENT: + /* + * Use disconnect_bssid if still connecting and + * ops->disconnect not implemented. Otherwise we can + * use cfg80211_disconnect. + */ + if (rdev->ops->disconnect || wdev->current_bss) + cfg80211_disconnect(rdev, wdev->netdev, + WLAN_REASON_DEAUTH_LEAVING, + true); + else + cfg80211_mlme_deauth(rdev, wdev->netdev, + wdev->disconnect_bssid, + NULL, 0, + WLAN_REASON_DEAUTH_LEAVING, + false); + break; + default: + break; + } } wdev_unlock(wdev); diff --git a/net/wireless/trace.h b/net/wireless/trace.h index 5152938b358d..a64291ae52a6 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -1882,6 +1882,32 @@ TRACE_EVENT(rdev_mgmt_tx, BOOL_TO_STR(__entry->dont_wait_for_ack)) ); +TRACE_EVENT(rdev_tx_control_port, + TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, + const u8 *buf, size_t len, const u8 *dest, __be16 proto, + bool unencrypted), + TP_ARGS(wiphy, netdev, buf, len, dest, proto, unencrypted), + TP_STRUCT__entry( + WIPHY_ENTRY + NETDEV_ENTRY + MAC_ENTRY(dest) + __field(__be16, proto) + __field(bool, unencrypted) + ), + TP_fast_assign( + WIPHY_ASSIGN; + NETDEV_ASSIGN; + MAC_ASSIGN(dest, dest); + __entry->proto = proto; + __entry->unencrypted = unencrypted; + ), + TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", " MAC_PR_FMT "," + " proto: 0x%x, unencrypted: %s", + WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(dest), + be16_to_cpu(__entry->proto), + BOOL_TO_STR(__entry->unencrypted)) +); + TRACE_EVENT(rdev_set_noack_map, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, u16 noack_map), @@ -2600,6 +2626,27 @@ TRACE_EVENT(cfg80211_mgmt_tx_status, WDEV_PR_ARG, __entry->cookie, BOOL_TO_STR(__entry->ack)) ); +TRACE_EVENT(cfg80211_rx_control_port, + TP_PROTO(struct net_device *netdev, const u8 *buf, size_t len, + const u8 *addr, u16 proto, bool unencrypted), + TP_ARGS(netdev, buf, len, addr, proto, unencrypted), + TP_STRUCT__entry( + NETDEV_ENTRY + MAC_ENTRY(addr) + __field(u16, proto) + __field(bool, unencrypted) + ), + TP_fast_assign( + NETDEV_ASSIGN; + MAC_ASSIGN(addr, addr); + __entry->proto = proto; + __entry->unencrypted = unencrypted; + ), + TP_printk(NETDEV_PR_FMT ", " MAC_PR_FMT " proto: 0x%x, unencrypted: %s", + NETDEV_PR_ARG, MAC_PR_ARG(addr), + __entry->proto, BOOL_TO_STR(__entry->unencrypted)) +); + TRACE_EVENT(cfg80211_cqm_rssi_notify, TP_PROTO(struct net_device *netdev, enum nl80211_cqm_rssi_threshold_event rssi_event, diff --git a/net/wireless/wext-core.c b/net/wireless/wext-core.c index bc7064486b15..5e677dac2a0c 100644 --- a/net/wireless/wext-core.c +++ b/net/wireless/wext-core.c @@ -347,13 +347,13 @@ void wireless_nlevent_flush(void) struct sk_buff *skb; struct net *net; - ASSERT_RTNL(); - + down_read(&net_rwsem); for_each_net(net) { while ((skb = skb_dequeue(&net->wext_nlevents))) rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, GFP_KERNEL); } + up_read(&net_rwsem); } EXPORT_SYMBOL_GPL(wireless_nlevent_flush); @@ -390,7 +390,6 @@ static void __net_exit wext_pernet_exit(struct net *net) static struct pernet_operations wext_pernet_ops = { .init = wext_pernet_init, .exit = wext_pernet_exit, - .async = true, }; static int __init wireless_nlevent_init(void) @@ -411,9 +410,7 @@ subsys_initcall(wireless_nlevent_init); /* Process events generated by the wireless layer or the driver. */ static void wireless_nlevent_process(struct work_struct *work) { - rtnl_lock(); wireless_nlevent_flush(); - rtnl_unlock(); } static DECLARE_WORK(wireless_nlevent_work, wireless_nlevent_process); diff --git a/net/wireless/wext-proc.c b/net/wireless/wext-proc.c index 5511f989ef47..b4c464594a5e 100644 --- a/net/wireless/wext-proc.c +++ b/net/wireless/wext-proc.c @@ -142,7 +142,7 @@ static const struct file_operations wireless_seq_fops = { int __net_init wext_proc_init(struct net *net) { /* Create /proc/net/wireless entry */ - if (!proc_create("wireless", S_IRUGO, net->proc_net, + if (!proc_create("wireless", 0444, net->proc_net, &wireless_seq_fops)) return -ENOMEM; diff --git a/net/x25/x25_proc.c b/net/x25/x25_proc.c index 0917f047f2cf..64b415e93f6a 100644 --- a/net/x25/x25_proc.c +++ b/net/x25/x25_proc.c @@ -212,16 +212,16 @@ int __init x25_proc_init(void) if (!proc_mkdir("x25", init_net.proc_net)) return -ENOMEM; - if (!proc_create("x25/route", S_IRUGO, init_net.proc_net, - &x25_seq_route_fops)) + if (!proc_create("x25/route", 0444, init_net.proc_net, + &x25_seq_route_fops)) goto out; - if (!proc_create("x25/socket", S_IRUGO, init_net.proc_net, - &x25_seq_socket_fops)) + if (!proc_create("x25/socket", 0444, init_net.proc_net, + &x25_seq_socket_fops)) goto out; - if (!proc_create("x25/forward", S_IRUGO, init_net.proc_net, - &x25_seq_forward_fops)) + if (!proc_create("x25/forward", 0444, init_net.proc_net, + &x25_seq_forward_fops)) goto out; return 0; diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c index 8e70291e586a..175941e15a6e 100644 --- a/net/xfrm/xfrm_device.c +++ b/net/xfrm/xfrm_device.c @@ -217,7 +217,7 @@ bool xfrm_dev_offload_ok(struct sk_buff *skb, struct xfrm_state *x) if (skb->len <= mtu) goto ok; - if (skb_is_gso(skb) && skb_gso_validate_mtu(skb, mtu)) + if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu)) goto ok; } @@ -350,7 +350,7 @@ static struct notifier_block xfrm_dev_notifier = { .notifier_call = xfrm_dev_event, }; -void __net_init xfrm_dev_init(void) +void __init xfrm_dev_init(void) { register_netdevice_notifier(&xfrm_dev_notifier); } diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index 1472c0857975..44fc54dc013c 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -9,6 +9,7 @@ */ #include <linux/bottom_half.h> +#include <linux/cache.h> #include <linux/interrupt.h> #include <linux/slab.h> #include <linux/module.h> @@ -31,7 +32,7 @@ struct xfrm_trans_cb { #define XFRM_TRANS_SKB_CB(__skb) ((struct xfrm_trans_cb *)&((__skb)->cb[0])) -static struct kmem_cache *secpath_cachep __read_mostly; +static struct kmem_cache *secpath_cachep __ro_after_init; static DEFINE_SPINLOCK(xfrm_input_afinfo_lock); static struct xfrm_input_afinfo const __rcu *xfrm_input_afinfo[AF_INET6 + 1]; diff --git a/net/xfrm/xfrm_ipcomp.c b/net/xfrm/xfrm_ipcomp.c index ccfdc7115a83..a00ec715aa46 100644 --- a/net/xfrm/xfrm_ipcomp.c +++ b/net/xfrm/xfrm_ipcomp.c @@ -283,7 +283,7 @@ static struct crypto_comp * __percpu *ipcomp_alloc_tfms(const char *alg_name) struct crypto_comp *tfm; /* This can be any valid CPU ID so we don't need locking. */ - tfm = __this_cpu_read(*pos->tfms); + tfm = this_cpu_read(*pos->tfms); if (!strcmp(crypto_comp_name(tfm), alg_name)) { pos->users++; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 77d9d1ab05ce..40b54cc64243 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -51,7 +51,7 @@ static DEFINE_SPINLOCK(xfrm_policy_afinfo_lock); static struct xfrm_policy_afinfo const __rcu *xfrm_policy_afinfo[AF_INET6 + 1] __read_mostly; -static struct kmem_cache *xfrm_dst_cache __read_mostly; +static struct kmem_cache *xfrm_dst_cache __ro_after_init; static __read_mostly seqcount_t xfrm_policy_hash_generation; static void xfrm_init_pmtu(struct xfrm_dst **bundle, int nr); @@ -1458,10 +1458,13 @@ xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, const struct flowi *fl, static int xfrm_get_tos(const struct flowi *fl, int family) { const struct xfrm_policy_afinfo *afinfo; - int tos = 0; + int tos; afinfo = xfrm_policy_get_afinfo(family); - tos = afinfo ? afinfo->get_tos(fl) : 0; + if (!afinfo) + return 0; + + tos = afinfo->get_tos(fl); rcu_read_unlock(); @@ -1740,7 +1743,7 @@ static void xfrm_pcpu_work_fn(struct work_struct *work) void xfrm_policy_cache_flush(void) { struct xfrm_dst *old; - bool found = 0; + bool found = false; int cpu; might_sleep(); @@ -1891,7 +1894,7 @@ static void xfrm_policy_queue_process(struct timer_list *t) spin_unlock(&pq->hold_queue.lock); dst_hold(xfrm_dst_path(dst)); - dst = xfrm_lookup(net, xfrm_dst_path(dst), &fl, sk, 0); + dst = xfrm_lookup(net, xfrm_dst_path(dst), &fl, sk, XFRM_LOOKUP_QUEUE); if (IS_ERR(dst)) goto purge_queue; @@ -2729,14 +2732,14 @@ static const void *xfrm_get_dst_nexthop(const struct dst_entry *dst, while (dst->xfrm) { const struct xfrm_state *xfrm = dst->xfrm; + dst = xfrm_dst_child(dst); + if (xfrm->props.mode == XFRM_MODE_TRANSPORT) continue; if (xfrm->type->flags & XFRM_TYPE_REMOTE_COADDR) daddr = xfrm->coaddr; else if (!(xfrm->type->flags & XFRM_TYPE_LOCAL_COADDR)) daddr = &xfrm->id.daddr; - - dst = xfrm_dst_child(dst); } return daddr; } @@ -2892,8 +2895,6 @@ static int __net_init xfrm_policy_init(struct net *net) INIT_LIST_HEAD(&net->xfrm.policy_all); INIT_WORK(&net->xfrm.policy_hash_work, xfrm_hash_resize); INIT_WORK(&net->xfrm.policy_hthresh.work, xfrm_hash_rebuild); - if (net_eq(net, &init_net)) - xfrm_dev_init(); return 0; out_bydst: @@ -2982,7 +2983,6 @@ static void __net_exit xfrm_net_exit(struct net *net) static struct pernet_operations __net_initdata xfrm_net_ops = { .init = xfrm_net_init, .exit = xfrm_net_exit, - .async = true, }; void __init xfrm_init(void) @@ -2997,6 +2997,7 @@ void __init xfrm_init(void) INIT_WORK(&xfrm_pcpu_work[i], xfrm_pcpu_work_fn); register_pernet_subsys(&xfrm_net_ops); + xfrm_dev_init(); seqcount_init(&xfrm_policy_hash_generation); xfrm_input_init(); } diff --git a/net/xfrm/xfrm_proc.c b/net/xfrm/xfrm_proc.c index 6d5f85f4e672..ed06903cd84d 100644 --- a/net/xfrm/xfrm_proc.c +++ b/net/xfrm/xfrm_proc.c @@ -79,7 +79,7 @@ static const struct file_operations xfrm_statistics_seq_fops = { int __net_init xfrm_proc_init(struct net *net) { - if (!proc_create("xfrm_stat", S_IRUGO, net->proc_net, + if (!proc_create("xfrm_stat", 0444, net->proc_net, &xfrm_statistics_seq_fops)) return -ENOMEM; return 0; diff --git a/net/xfrm/xfrm_replay.c b/net/xfrm/xfrm_replay.c index 1d38c6acf8af..9e3a5e85f828 100644 --- a/net/xfrm/xfrm_replay.c +++ b/net/xfrm/xfrm_replay.c @@ -660,7 +660,7 @@ static int xfrm_replay_overflow_offload_esn(struct xfrm_state *x, struct sk_buff } else { XFRM_SKB_CB(skb)->seq.output.low = oseq + 1; XFRM_SKB_CB(skb)->seq.output.hi = oseq_hi; - xo->seq.low = oseq = oseq + 1; + xo->seq.low = oseq + 1; xo->seq.hi = oseq_hi; oseq += skb_shinfo(skb)->gso_segs; } diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 54e21f19d722..f9d2f2233f09 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -2056,6 +2056,11 @@ int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen struct xfrm_mgr *km; struct xfrm_policy *pol = NULL; +#ifdef CONFIG_COMPAT + if (in_compat_syscall()) + return -EOPNOTSUPP; +#endif + if (!optval && !optlen) { xfrm_sk_policy_insert(sk, XFRM_POLICY_IN, NULL); xfrm_sk_policy_insert(sk, XFRM_POLICY_OUT, NULL); diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 7f52b8eb177d..080035f056d9 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -121,22 +121,17 @@ static inline int verify_replay(struct xfrm_usersa_info *p, struct nlattr *rt = attrs[XFRMA_REPLAY_ESN_VAL]; struct xfrm_replay_state_esn *rs; - if (p->flags & XFRM_STATE_ESN) { - if (!rt) - return -EINVAL; + if (!rt) + return (p->flags & XFRM_STATE_ESN) ? -EINVAL : 0; - rs = nla_data(rt); + rs = nla_data(rt); - if (rs->bmp_len > XFRMA_REPLAY_ESN_MAX / sizeof(rs->bmp[0]) / 8) - return -EINVAL; - - if (nla_len(rt) < (int)xfrm_replay_state_esn_len(rs) && - nla_len(rt) != sizeof(*rs)) - return -EINVAL; - } + if (rs->bmp_len > XFRMA_REPLAY_ESN_MAX / sizeof(rs->bmp[0]) / 8) + return -EINVAL; - if (!rt) - return 0; + if (nla_len(rt) < (int)xfrm_replay_state_esn_len(rs) && + nla_len(rt) != sizeof(*rs)) + return -EINVAL; /* As only ESP and AH support ESN feature. */ if ((p->id.proto != IPPROTO_ESP) && (p->id.proto != IPPROTO_AH)) |