diff options
Diffstat (limited to 'net')
292 files changed, 9946 insertions, 6531 deletions
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 91d134961357..fbf296137b09 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -27,6 +27,7 @@ #include <linux/phy.h> #include <net/arp.h> #include <net/macsec.h> +#include <net/netdev_lock.h> #include "vlan.h" #include "vlanproc.h" @@ -273,17 +274,6 @@ static int vlan_dev_open(struct net_device *dev) goto out; } - if (dev->flags & IFF_ALLMULTI) { - err = dev_set_allmulti(real_dev, 1); - if (err < 0) - goto del_unicast; - } - if (dev->flags & IFF_PROMISC) { - err = dev_set_promiscuity(real_dev, 1); - if (err < 0) - goto clear_allmulti; - } - ether_addr_copy(vlan->real_dev_addr, real_dev->dev_addr); if (vlan->flags & VLAN_FLAG_GVRP) @@ -297,12 +287,6 @@ static int vlan_dev_open(struct net_device *dev) netif_carrier_on(dev); return 0; -clear_allmulti: - if (dev->flags & IFF_ALLMULTI) - dev_set_allmulti(real_dev, -1); -del_unicast: - if (!ether_addr_equal(dev->dev_addr, real_dev->dev_addr)) - dev_uc_del(real_dev, dev->dev_addr); out: netif_carrier_off(dev); return err; @@ -315,10 +299,6 @@ static int vlan_dev_stop(struct net_device *dev) dev_mc_unsync(real_dev, dev); dev_uc_unsync(real_dev, dev); - if (dev->flags & IFF_ALLMULTI) - dev_set_allmulti(real_dev, -1); - if (dev->flags & IFF_PROMISC) - dev_set_promiscuity(real_dev, -1); if (!ether_addr_equal(dev->dev_addr, real_dev->dev_addr)) dev_uc_del(real_dev, dev->dev_addr); @@ -377,7 +357,6 @@ static int vlan_hwtstamp_set(struct net_device *dev, static int vlan_dev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) { struct net_device *real_dev = vlan_dev_priv(dev)->real_dev; - const struct net_device_ops *ops = real_dev->netdev_ops; struct ifreq ifrr; int err = -EOPNOTSUPP; @@ -388,8 +367,7 @@ static int vlan_dev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) case SIOCGMIIPHY: case SIOCGMIIREG: case SIOCSMIIREG: - if (netif_device_present(real_dev) && ops->ndo_eth_ioctl) - err = ops->ndo_eth_ioctl(real_dev, &ifrr, cmd); + err = dev_eth_ioctl(real_dev, &ifrr, cmd); break; } @@ -490,12 +468,10 @@ static void vlan_dev_change_rx_flags(struct net_device *dev, int change) { struct net_device *real_dev = vlan_dev_priv(dev)->real_dev; - if (dev->flags & IFF_UP) { - if (change & IFF_ALLMULTI) - dev_set_allmulti(real_dev, dev->flags & IFF_ALLMULTI ? 1 : -1); - if (change & IFF_PROMISC) - dev_set_promiscuity(real_dev, dev->flags & IFF_PROMISC ? 1 : -1); - } + if (change & IFF_ALLMULTI) + dev_set_allmulti(real_dev, dev->flags & IFF_ALLMULTI ? 1 : -1); + if (change & IFF_PROMISC) + dev_set_promiscuity(real_dev, dev->flags & IFF_PROMISC ? 1 : -1); } static void vlan_dev_set_rx_mode(struct net_device *vlan_dev) diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c index 134419667d59..a000b1ef0520 100644 --- a/net/8021q/vlan_netlink.c +++ b/net/8021q/vlan_netlink.c @@ -135,11 +135,14 @@ static int vlan_changelink(struct net_device *dev, struct nlattr *tb[], return 0; } -static int vlan_newlink(struct net *src_net, struct net_device *dev, - struct nlattr *tb[], struct nlattr *data[], +static int vlan_newlink(struct net_device *dev, + struct rtnl_newlink_params *params, struct netlink_ext_ack *extack) { + struct net *link_net = rtnl_newlink_link_net(params); struct vlan_dev_priv *vlan = vlan_dev_priv(dev); + struct nlattr **data = params->data; + struct nlattr **tb = params->tb; struct net_device *real_dev; unsigned int max_mtu; __be16 proto; @@ -155,7 +158,7 @@ static int vlan_newlink(struct net *src_net, struct net_device *dev, return -EINVAL; } - real_dev = __dev_get_by_index(src_net, nla_get_u32(tb[IFLA_LINK])); + real_dev = __dev_get_by_index(link_net, nla_get_u32(tb[IFLA_LINK])); if (!real_dev) { NL_SET_ERR_MSG_MOD(extack, "link does not exist"); return -ENODEV; diff --git a/net/atm/mpc.c b/net/atm/mpc.c index 324e3ab96bb3..12da0269275c 100644 --- a/net/atm/mpc.c +++ b/net/atm/mpc.c @@ -1314,6 +1314,8 @@ static void MPOA_cache_impos_rcvd(struct k_message *msg, holding_time = msg->content.eg_info.holding_time; dprintk("(%s) entry = %p, holding_time = %u\n", mpc->dev->name, entry, holding_time); + if (entry == NULL && !holding_time) + return; if (entry == NULL && holding_time) { entry = mpc->eg_ops->add_entry(msg, mpc); mpc->eg_ops->put(entry); diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index 9f3b8b682adb..3ee7dba34310 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -1270,28 +1270,18 @@ static int __must_check ax25_connect(struct socket *sock, } } - /* - * Must bind first - autobinding in this may or may not work. If - * the socket is already bound, check to see if the device has - * been filled in, error if it hasn't. - */ + /* Must bind first - autobinding does not work. */ if (sock_flag(sk, SOCK_ZAPPED)) { - /* check if we can remove this feature. It is broken. */ - printk(KERN_WARNING "ax25_connect(): %s uses autobind, please contact jreuter@yaina.de\n", - current->comm); - if ((err = ax25_rt_autobind(ax25, &fsa->fsa_ax25.sax25_call)) < 0) { - kfree(digi); - goto out_release; - } + kfree(digi); + err = -EINVAL; + goto out_release; + } - ax25_fillin_cb(ax25, ax25->ax25_dev); - ax25_cb_add(ax25); - } else { - if (ax25->ax25_dev == NULL) { - kfree(digi); - err = -EHOSTUNREACH; - goto out_release; - } + /* Check to see if the device has been filled in, error if it hasn't. */ + if (ax25->ax25_dev == NULL) { + kfree(digi); + err = -EHOSTUNREACH; + goto out_release; } if (sk->sk_type == SOCK_SEQPACKET && diff --git a/net/ax25/ax25_route.c b/net/ax25/ax25_route.c index 69de75db0c9c..10577434f40b 100644 --- a/net/ax25/ax25_route.c +++ b/net/ax25/ax25_route.c @@ -373,80 +373,6 @@ ax25_route *ax25_get_route(ax25_address *addr, struct net_device *dev) return ax25_rt; } -/* - * Adjust path: If you specify a default route and want to connect - * a target on the digipeater path but w/o having a special route - * set before, the path has to be truncated from your target on. - */ -static inline void ax25_adjust_path(ax25_address *addr, ax25_digi *digipeat) -{ - int k; - - for (k = 0; k < digipeat->ndigi; k++) { - if (ax25cmp(addr, &digipeat->calls[k]) == 0) - break; - } - - digipeat->ndigi = k; -} - - -/* - * Find which interface to use. - */ -int ax25_rt_autobind(ax25_cb *ax25, ax25_address *addr) -{ - ax25_uid_assoc *user; - ax25_route *ax25_rt; - int err = 0; - - ax25_route_lock_use(); - ax25_rt = ax25_get_route(addr, NULL); - if (!ax25_rt) { - ax25_route_lock_unuse(); - return -EHOSTUNREACH; - } - rcu_read_lock(); - if ((ax25->ax25_dev = ax25_dev_ax25dev(ax25_rt->dev)) == NULL) { - err = -EHOSTUNREACH; - goto put; - } - - user = ax25_findbyuid(current_euid()); - if (user) { - ax25->source_addr = user->call; - ax25_uid_put(user); - } else { - if (ax25_uid_policy && !capable(CAP_NET_BIND_SERVICE)) { - err = -EPERM; - goto put; - } - ax25->source_addr = *(ax25_address *)ax25->ax25_dev->dev->dev_addr; - } - - if (ax25_rt->digipeat != NULL) { - ax25->digipeat = kmemdup(ax25_rt->digipeat, sizeof(ax25_digi), - GFP_ATOMIC); - if (ax25->digipeat == NULL) { - err = -ENOMEM; - goto put; - } - ax25_adjust_path(addr, ax25->digipeat); - } - - if (ax25->sk != NULL) { - local_bh_disable(); - bh_lock_sock(ax25->sk); - sock_reset_flag(ax25->sk, SOCK_ZAPPED); - bh_unlock_sock(ax25->sk); - local_bh_enable(); - } - -put: - rcu_read_unlock(); - ax25_route_lock_unuse(); - return err; -} struct sk_buff *ax25_rt_build_path(struct sk_buff *skb, ax25_address *src, ax25_address *dest, ax25_digi *digi) diff --git a/net/batman-adv/Makefile b/net/batman-adv/Makefile index b51d8b071b56..1cc9be6de456 100644 --- a/net/batman-adv/Makefile +++ b/net/batman-adv/Makefile @@ -19,6 +19,7 @@ batman-adv-y += hard-interface.o batman-adv-y += hash.o batman-adv-$(CONFIG_BATMAN_ADV_DEBUG) += log.o batman-adv-y += main.o +batman-adv-y += mesh-interface.o batman-adv-$(CONFIG_BATMAN_ADV_MCAST) += multicast.o batman-adv-$(CONFIG_BATMAN_ADV_MCAST) += multicast_forw.o batman-adv-y += netlink.o @@ -26,7 +27,6 @@ batman-adv-$(CONFIG_BATMAN_ADV_NC) += network-coding.o batman-adv-y += originator.o batman-adv-y += routing.o batman-adv-y += send.o -batman-adv-y += soft-interface.o batman-adv-$(CONFIG_BATMAN_ADV_TRACING) += trace.o batman-adv-y += tp_meter.o batman-adv-y += translation-table.o diff --git a/net/batman-adv/bat_algo.c b/net/batman-adv/bat_algo.c index 4eee53d19eb0..c0c982b6f029 100644 --- a/net/batman-adv/bat_algo.c +++ b/net/batman-adv/bat_algo.c @@ -90,15 +90,15 @@ int batadv_algo_register(struct batadv_algo_ops *bat_algo_ops) } /** - * batadv_algo_select() - Select algorithm of soft interface - * @bat_priv: the bat priv with all the soft interface information + * batadv_algo_select() - Select algorithm of mesh interface + * @bat_priv: the bat priv with all the mesh interface information * @name: name of the algorithm to select * - * The algorithm callbacks for the soft interface will be set when the algorithm + * The algorithm callbacks for the mesh interface will be set when the algorithm * with the correct name was found. Any previous selected algorithm will not be * deinitialized and the new selected algorithm will also not be initialized. * It is therefore not allowed to call batadv_algo_select outside the creation - * function of the soft interface. + * function of the mesh interface. * * Return: 0 on success or negative error number in case of failure */ diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index b12645949ae5..458879d21d66 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -23,6 +23,7 @@ #include <linux/kref.h> #include <linux/list.h> #include <linux/lockdep.h> +#include <linux/minmax.h> #include <linux/mutex.h> #include <linux/netdevice.h> #include <linux/netlink.h> @@ -129,7 +130,7 @@ static u8 batadv_ring_buffer_avg(const u8 lq_recv[]) /** * batadv_iv_ogm_orig_get() - retrieve or create (if does not exist) an * originator - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @addr: mac address of the originator * * Return: the originator object corresponding to the passed mac address or NULL @@ -332,7 +333,7 @@ batadv_iv_ogm_aggr_packet(int buff_pos, int packet_len, static void batadv_iv_ogm_send_to_if(struct batadv_forw_packet *forw_packet, struct batadv_hard_iface *hard_iface) { - struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); + struct batadv_priv *bat_priv = netdev_priv(hard_iface->mesh_iface); const char *fwd_str; u8 packet_num; s16 buff_pos; @@ -354,7 +355,7 @@ static void batadv_iv_ogm_send_to_if(struct batadv_forw_packet *forw_packet, /* we might have aggregated direct link packets with an * ordinary base packet */ - if (forw_packet->direct_link_flags & BIT(packet_num) && + if (test_bit(packet_num, forw_packet->direct_link_flags) && forw_packet->if_incoming == hard_iface) batadv_ogm_packet->flags |= BATADV_DIRECTLINK; else @@ -395,20 +396,20 @@ static void batadv_iv_ogm_send_to_if(struct batadv_forw_packet *forw_packet, /* send a batman ogm packet */ static void batadv_iv_ogm_emit(struct batadv_forw_packet *forw_packet) { - struct net_device *soft_iface; + struct net_device *mesh_iface; if (!forw_packet->if_incoming) { pr_err("Error - can't forward packet: incoming iface not specified\n"); return; } - soft_iface = forw_packet->if_incoming->soft_iface; + mesh_iface = forw_packet->if_incoming->mesh_iface; if (WARN_ON(!forw_packet->if_outgoing)) return; - if (forw_packet->if_outgoing->soft_iface != soft_iface) { - pr_warn("%s: soft interface switch for queued OGM\n", __func__); + if (forw_packet->if_outgoing->mesh_iface != mesh_iface) { + pr_warn("%s: mesh interface switch for queued OGM\n", __func__); return; } @@ -423,7 +424,7 @@ static void batadv_iv_ogm_emit(struct batadv_forw_packet *forw_packet) * batadv_iv_ogm_can_aggregate() - find out if an OGM can be aggregated on an * existing forward packet * @new_bat_ogm_packet: OGM packet to be aggregated - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @packet_len: (total) length of the OGM * @send_time: timestamp (jiffies) when the packet is to be sent * @directlink: true if this is a direct link packet @@ -443,28 +444,37 @@ batadv_iv_ogm_can_aggregate(const struct batadv_ogm_packet *new_bat_ogm_packet, const struct batadv_forw_packet *forw_packet) { struct batadv_ogm_packet *batadv_ogm_packet; - int aggregated_bytes = forw_packet->packet_len + packet_len; + unsigned int aggregated_bytes = forw_packet->packet_len + packet_len; struct batadv_hard_iface *primary_if = NULL; + u8 packet_num = forw_packet->num_packets; bool res = false; unsigned long aggregation_end_time; + unsigned int max_bytes; batadv_ogm_packet = (struct batadv_ogm_packet *)forw_packet->skb->data; aggregation_end_time = send_time; aggregation_end_time += msecs_to_jiffies(BATADV_MAX_AGGREGATION_MS); + max_bytes = min_t(unsigned int, if_outgoing->net_dev->mtu, + BATADV_MAX_AGGREGATION_BYTES); + /* we can aggregate the current packet to this aggregated packet * if: * * - the send time is within our MAX_AGGREGATION_MS time * - the resulting packet won't be bigger than - * MAX_AGGREGATION_BYTES + * MAX_AGGREGATION_BYTES and MTU of the outgoing interface + * - the number of packets is lower than MAX_AGGREGATION_PACKETS * otherwise aggregation is not possible */ if (!time_before(send_time, forw_packet->send_time) || !time_after_eq(aggregation_end_time, forw_packet->send_time)) return false; - if (aggregated_bytes > BATADV_MAX_AGGREGATION_BYTES) + if (aggregated_bytes > max_bytes) + return false; + + if (packet_num >= BATADV_MAX_AGGREGATION_PACKETS) return false; /* packet is not leaving on the same interface. */ @@ -539,16 +549,16 @@ static void batadv_iv_ogm_aggregate_new(const unsigned char *packet_buff, struct batadv_hard_iface *if_outgoing, int own_packet) { - struct batadv_priv *bat_priv = netdev_priv(if_incoming->soft_iface); + struct batadv_priv *bat_priv = netdev_priv(if_incoming->mesh_iface); struct batadv_forw_packet *forw_packet_aggr; struct sk_buff *skb; unsigned char *skb_buff; unsigned int skb_size; atomic_t *queue_left = own_packet ? NULL : &bat_priv->batman_queue_left; - if (atomic_read(&bat_priv->aggregated_ogms) && - packet_len < BATADV_MAX_AGGREGATION_BYTES) - skb_size = BATADV_MAX_AGGREGATION_BYTES; + if (atomic_read(&bat_priv->aggregated_ogms)) + skb_size = max_t(unsigned int, BATADV_MAX_AGGREGATION_BYTES, + packet_len); else skb_size = packet_len; @@ -573,12 +583,13 @@ static void batadv_iv_ogm_aggregate_new(const unsigned char *packet_buff, memcpy(skb_buff, packet_buff, packet_len); forw_packet_aggr->own = own_packet; - forw_packet_aggr->direct_link_flags = BATADV_NO_FLAGS; + bitmap_zero(forw_packet_aggr->direct_link_flags, + BATADV_MAX_AGGREGATION_PACKETS); forw_packet_aggr->send_time = send_time; /* save packet direct link flag status */ if (direct_link) - forw_packet_aggr->direct_link_flags |= 1; + set_bit(0, forw_packet_aggr->direct_link_flags); INIT_DELAYED_WORK(&forw_packet_aggr->delayed_work, batadv_iv_send_outstanding_bat_ogm_packet); @@ -591,22 +602,20 @@ static void batadv_iv_ogm_aggregate(struct batadv_forw_packet *forw_packet_aggr, const unsigned char *packet_buff, int packet_len, bool direct_link) { - unsigned long new_direct_link_flag; - skb_put_data(forw_packet_aggr->skb, packet_buff, packet_len); forw_packet_aggr->packet_len += packet_len; - forw_packet_aggr->num_packets++; /* save packet direct link flag status */ - if (direct_link) { - new_direct_link_flag = BIT(forw_packet_aggr->num_packets); - forw_packet_aggr->direct_link_flags |= new_direct_link_flag; - } + if (direct_link) + set_bit(forw_packet_aggr->num_packets, + forw_packet_aggr->direct_link_flags); + + forw_packet_aggr->num_packets++; } /** * batadv_iv_ogm_queue_add() - queue up an OGM for transmission - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @packet_buff: pointer to the OGM * @packet_len: (total) length of the OGM * @if_incoming: interface where the packet was received @@ -685,7 +694,7 @@ static void batadv_iv_ogm_forward(struct batadv_orig_node *orig_node, struct batadv_hard_iface *if_incoming, struct batadv_hard_iface *if_outgoing) { - struct batadv_priv *bat_priv = netdev_priv(if_incoming->soft_iface); + struct batadv_priv *bat_priv = netdev_priv(if_incoming->mesh_iface); u16 tvlv_len; if (batadv_ogm_packet->ttl <= 1) { @@ -738,7 +747,7 @@ static void batadv_iv_ogm_forward(struct batadv_orig_node *orig_node, static void batadv_iv_ogm_slide_own_bcast_window(struct batadv_hard_iface *hard_iface) { - struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); + struct batadv_priv *bat_priv = netdev_priv(hard_iface->mesh_iface); struct batadv_hashtable *hash = bat_priv->orig_hash; struct hlist_head *head; struct batadv_orig_node *orig_node; @@ -777,7 +786,7 @@ batadv_iv_ogm_slide_own_bcast_window(struct batadv_hard_iface *hard_iface) */ static void batadv_iv_ogm_schedule_buff(struct batadv_hard_iface *hard_iface) { - struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); + struct batadv_priv *bat_priv = netdev_priv(hard_iface->mesh_iface); unsigned char **ogm_buff = &hard_iface->bat_iv.ogm_buff; struct batadv_ogm_packet *batadv_ogm_packet; struct batadv_hard_iface *primary_if, *tmp_hard_iface; @@ -839,7 +848,7 @@ static void batadv_iv_ogm_schedule_buff(struct batadv_hard_iface *hard_iface) */ rcu_read_lock(); list_for_each_entry_rcu(tmp_hard_iface, &batadv_hardif_list, list) { - if (tmp_hard_iface->soft_iface != hard_iface->soft_iface) + if (tmp_hard_iface->mesh_iface != hard_iface->mesh_iface) continue; if (!kref_get_unless_zero(&tmp_hard_iface->refcount)) @@ -900,7 +909,7 @@ static u8 batadv_iv_orig_ifinfo_sum(struct batadv_orig_node *orig_node, /** * batadv_iv_ogm_orig_update() - use OGM to update corresponding data in an * originator - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @orig_node: the orig node who originally emitted the ogm packet * @orig_ifinfo: ifinfo for the outgoing interface of the orig_node * @ethhdr: Ethernet header of the OGM @@ -1064,7 +1073,7 @@ static bool batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node, struct batadv_hard_iface *if_incoming, struct batadv_hard_iface *if_outgoing) { - struct batadv_priv *bat_priv = netdev_priv(if_incoming->soft_iface); + struct batadv_priv *bat_priv = netdev_priv(if_incoming->mesh_iface); struct batadv_neigh_node *neigh_node = NULL, *tmp_neigh_node; struct batadv_neigh_ifinfo *neigh_ifinfo; u8 total_count; @@ -1206,7 +1215,7 @@ batadv_iv_ogm_update_seqnos(const struct ethhdr *ethhdr, const struct batadv_hard_iface *if_incoming, struct batadv_hard_iface *if_outgoing) { - struct batadv_priv *bat_priv = netdev_priv(if_incoming->soft_iface); + struct batadv_priv *bat_priv = netdev_priv(if_incoming->mesh_iface); struct batadv_orig_node *orig_node; struct batadv_orig_ifinfo *orig_ifinfo = NULL; struct batadv_neigh_node *neigh_node; @@ -1308,7 +1317,7 @@ batadv_iv_ogm_process_per_outif(const struct sk_buff *skb, int ogm_offset, struct batadv_hard_iface *if_incoming, struct batadv_hard_iface *if_outgoing) { - struct batadv_priv *bat_priv = netdev_priv(if_incoming->soft_iface); + struct batadv_priv *bat_priv = netdev_priv(if_incoming->mesh_iface); struct batadv_hardif_neigh_node *hardif_neigh = NULL; struct batadv_neigh_node *router = NULL; struct batadv_neigh_node *router_router = NULL; @@ -1548,7 +1557,7 @@ static void batadv_iv_ogm_process_reply(struct batadv_ogm_packet *ogm_packet, static void batadv_iv_ogm_process(const struct sk_buff *skb, int ogm_offset, struct batadv_hard_iface *if_incoming) { - struct batadv_priv *bat_priv = netdev_priv(if_incoming->soft_iface); + struct batadv_priv *bat_priv = netdev_priv(if_incoming->mesh_iface); struct batadv_orig_node *orig_neigh_node, *orig_node; struct batadv_hard_iface *hard_iface; struct batadv_ogm_packet *ogm_packet; @@ -1598,7 +1607,7 @@ static void batadv_iv_ogm_process(const struct sk_buff *skb, int ogm_offset, if (hard_iface->if_status != BATADV_IF_ACTIVE) continue; - if (hard_iface->soft_iface != if_incoming->soft_iface) + if (hard_iface->mesh_iface != if_incoming->mesh_iface) continue; if (batadv_compare_eth(ethhdr->h_source, @@ -1663,7 +1672,7 @@ static void batadv_iv_ogm_process(const struct sk_buff *skb, int ogm_offset, if (hard_iface->if_status != BATADV_IF_ACTIVE) continue; - if (hard_iface->soft_iface != bat_priv->soft_iface) + if (hard_iface->mesh_iface != bat_priv->mesh_iface) continue; if (!kref_get_unless_zero(&hard_iface->refcount)) @@ -1689,7 +1698,7 @@ static void batadv_iv_send_outstanding_bat_ogm_packet(struct work_struct *work) delayed_work = to_delayed_work(work); forw_packet = container_of(delayed_work, struct batadv_forw_packet, delayed_work); - bat_priv = netdev_priv(forw_packet->if_incoming->soft_iface); + bat_priv = netdev_priv(forw_packet->if_incoming->mesh_iface); if (atomic_read(&bat_priv->mesh_state) == BATADV_MESH_DEACTIVATING) { dropped = true; @@ -1720,7 +1729,7 @@ out: static int batadv_iv_ogm_receive(struct sk_buff *skb, struct batadv_hard_iface *if_incoming) { - struct batadv_priv *bat_priv = netdev_priv(if_incoming->soft_iface); + struct batadv_priv *bat_priv = netdev_priv(if_incoming->mesh_iface); struct batadv_ogm_packet *ogm_packet; u8 *packet_pos; int ogm_offset; @@ -1799,7 +1808,7 @@ batadv_iv_ogm_neigh_get_tq_avg(struct batadv_neigh_node *neigh_node, * @msg: Netlink message to dump into * @portid: Port making netlink request * @seq: Sequence number of netlink message - * @bat_priv: The bat priv with all the soft interface information + * @bat_priv: The bat priv with all the mesh interface information * @if_outgoing: Limit dump to entries with this outgoing interface * @orig_node: Originator to dump * @neigh_node: Single hops neighbour @@ -1862,7 +1871,7 @@ batadv_iv_ogm_orig_dump_subentry(struct sk_buff *msg, u32 portid, u32 seq, * @msg: Netlink message to dump into * @portid: Port making netlink request * @seq: Sequence number of netlink message - * @bat_priv: The bat priv with all the soft interface information + * @bat_priv: The bat priv with all the mesh interface information * @if_outgoing: Limit dump to entries with this outgoing interface * @orig_node: Originator to dump * @sub_s: Number of sub entries to skip @@ -1924,7 +1933,7 @@ batadv_iv_ogm_orig_dump_entry(struct sk_buff *msg, u32 portid, u32 seq, * @msg: Netlink message to dump into * @portid: Port making netlink request * @seq: Sequence number of netlink message - * @bat_priv: The bat priv with all the soft interface information + * @bat_priv: The bat priv with all the mesh interface information * @if_outgoing: Limit dump to entries with this outgoing interface * @head: Bucket to be dumped * @idx_s: Number of entries to be skipped @@ -1965,7 +1974,7 @@ batadv_iv_ogm_orig_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq, * batadv_iv_ogm_orig_dump() - Dump the originators into a message * @msg: Netlink message to dump into * @cb: Control block containing additional options - * @bat_priv: The bat priv with all the soft interface information + * @bat_priv: The bat priv with all the mesh interface information * @if_outgoing: Limit dump to entries with this outgoing interface */ static void @@ -2087,7 +2096,7 @@ batadv_iv_ogm_neigh_dump_neigh(struct sk_buff *msg, u32 portid, u32 seq, * @msg: Netlink message to dump into * @portid: Port making netlink request * @seq: Sequence number of netlink message - * @bat_priv: The bat priv with all the soft interface information + * @bat_priv: The bat priv with all the mesh interface information * @hard_iface: Hard interface to dump the neighbours for * @idx_s: Number of entries to skip * @@ -2124,7 +2133,7 @@ batadv_iv_ogm_neigh_dump_hardif(struct sk_buff *msg, u32 portid, u32 seq, * batadv_iv_ogm_neigh_dump() - Dump the neighbours into a message * @msg: Netlink message to dump into * @cb: Control block containing additional options - * @bat_priv: The bat priv with all the soft interface information + * @bat_priv: The bat priv with all the mesh interface information * @single_hardif: Limit dump to this hard interface */ static void @@ -2151,7 +2160,7 @@ batadv_iv_ogm_neigh_dump(struct sk_buff *msg, struct netlink_callback *cb, } else { list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) { - if (hard_iface->soft_iface != bat_priv->soft_iface) + if (hard_iface->mesh_iface != bat_priv->mesh_iface) continue; if (i_hardif++ < i_hardif_s) @@ -2235,7 +2244,7 @@ static void batadv_iv_iface_enabled(struct batadv_hard_iface *hard_iface) /** * batadv_iv_init_sel_class() - initialize GW selection class - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information */ static void batadv_iv_init_sel_class(struct batadv_priv *bat_priv) { @@ -2390,7 +2399,7 @@ out: * @msg: Netlink message to dump into * @portid: Port making netlink request * @cb: Control block containing additional options - * @bat_priv: The bat priv with all the soft interface information + * @bat_priv: The bat priv with all the mesh interface information * @gw_node: Gateway to be dumped * * Return: Error code, or 0 on success @@ -2465,7 +2474,7 @@ out: * batadv_iv_gw_dump() - Dump gateways into a message * @msg: Netlink message to dump into * @cb: Control block containing additional options - * @bat_priv: The bat priv with all the soft interface information + * @bat_priv: The bat priv with all the mesh interface information */ static void batadv_iv_gw_dump(struct sk_buff *msg, struct netlink_callback *cb, struct batadv_priv *bat_priv) diff --git a/net/batman-adv/bat_v.c b/net/batman-adv/bat_v.c index d35479c465e2..c16c2e60889d 100644 --- a/net/batman-adv/bat_v.c +++ b/net/batman-adv/bat_v.c @@ -43,7 +43,7 @@ static void batadv_v_iface_activate(struct batadv_hard_iface *hard_iface) { - struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); + struct batadv_priv *bat_priv = netdev_priv(hard_iface->mesh_iface); struct batadv_hard_iface *primary_if; primary_if = batadv_primary_if_get_selected(bat_priv); @@ -97,7 +97,7 @@ static void batadv_v_primary_iface_set(struct batadv_hard_iface *hard_iface) */ static void batadv_v_iface_update_mac(struct batadv_hard_iface *hard_iface) { - struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); + struct batadv_priv *bat_priv = netdev_priv(hard_iface->mesh_iface); struct batadv_hard_iface *primary_if; primary_if = batadv_primary_if_get_selected(bat_priv); @@ -166,7 +166,7 @@ batadv_v_neigh_dump_neigh(struct sk_buff *msg, u32 portid, u32 seq, * @msg: Netlink message to dump into * @portid: Port making netlink request * @seq: Sequence number of netlink message - * @bat_priv: The bat priv with all the soft interface information + * @bat_priv: The bat priv with all the mesh interface information * @hard_iface: The hard interface to be dumped * @idx_s: Entries to be skipped * @@ -203,7 +203,7 @@ batadv_v_neigh_dump_hardif(struct sk_buff *msg, u32 portid, u32 seq, * message * @msg: Netlink message to dump into * @cb: Control block containing additional options - * @bat_priv: The bat priv with all the soft interface information + * @bat_priv: The bat priv with all the mesh interface information * @single_hardif: Limit dumping to this hard interface */ static void @@ -228,7 +228,7 @@ batadv_v_neigh_dump(struct sk_buff *msg, struct netlink_callback *cb, } } else { list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) { - if (hard_iface->soft_iface != bat_priv->soft_iface) + if (hard_iface->mesh_iface != bat_priv->mesh_iface) continue; if (i_hardif++ < i_hardif_s) @@ -254,7 +254,7 @@ batadv_v_neigh_dump(struct sk_buff *msg, struct netlink_callback *cb, * @msg: Netlink message to dump into * @portid: Port making netlink request * @seq: Sequence number of netlink message - * @bat_priv: The bat priv with all the soft interface information + * @bat_priv: The bat priv with all the mesh interface information * @if_outgoing: Limit dump to entries with this outgoing interface * @orig_node: Originator to dump * @neigh_node: Single hops neighbour @@ -322,7 +322,7 @@ batadv_v_orig_dump_subentry(struct sk_buff *msg, u32 portid, u32 seq, * @msg: Netlink message to dump into * @portid: Port making netlink request * @seq: Sequence number of netlink message - * @bat_priv: The bat priv with all the soft interface information + * @bat_priv: The bat priv with all the mesh interface information * @if_outgoing: Limit dump to entries with this outgoing interface * @orig_node: Originator to dump * @sub_s: Number of sub entries to skip @@ -374,7 +374,7 @@ batadv_v_orig_dump_entry(struct sk_buff *msg, u32 portid, u32 seq, * @msg: Netlink message to dump into * @portid: Port making netlink request * @seq: Sequence number of netlink message - * @bat_priv: The bat priv with all the soft interface information + * @bat_priv: The bat priv with all the mesh interface information * @if_outgoing: Limit dump to entries with this outgoing interface * @head: Bucket to be dumped * @idx_s: Number of entries to be skipped @@ -414,7 +414,7 @@ batadv_v_orig_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq, * batadv_v_orig_dump() - Dump the originators into a message * @msg: Netlink message to dump into * @cb: Control block containing additional options - * @bat_priv: The bat priv with all the soft interface information + * @bat_priv: The bat priv with all the mesh interface information * @if_outgoing: Limit dump to entries with this outgoing interface */ static void @@ -502,7 +502,7 @@ err_ifinfo1: /** * batadv_v_init_sel_class() - initialize GW selection class - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information */ static void batadv_v_init_sel_class(struct batadv_priv *bat_priv) { @@ -553,7 +553,7 @@ out: /** * batadv_v_gw_get_best_gw_node() - retrieve the best GW node - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * * Return: the GW node having the best GW-metric, NULL if no GW is known */ @@ -590,7 +590,7 @@ next: /** * batadv_v_gw_is_eligible() - check if a originator would be selected as GW - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @curr_gw_orig: originator representing the currently selected GW * @orig_node: the originator representing the new candidate * @@ -647,7 +647,7 @@ out: * @msg: Netlink message to dump into * @portid: Port making netlink request * @cb: Control block containing additional options - * @bat_priv: The bat priv with all the soft interface information + * @bat_priv: The bat priv with all the mesh interface information * @gw_node: Gateway to be dumped * * Return: Error code, or 0 on success @@ -746,7 +746,7 @@ out: * batadv_v_gw_dump() - Dump gateways into a message * @msg: Netlink message to dump into * @cb: Control block containing additional options - * @bat_priv: The bat priv with all the soft interface information + * @bat_priv: The bat priv with all the mesh interface information */ static void batadv_v_gw_dump(struct sk_buff *msg, struct netlink_callback *cb, struct batadv_priv *bat_priv) diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c index b065578b4436..70d6778da0d7 100644 --- a/net/batman-adv/bat_v_elp.c +++ b/net/batman-adv/bat_v_elp.c @@ -82,7 +82,7 @@ static bool batadv_v_elp_get_throughput(struct batadv_hardif_neigh_node *neigh, u32 *pthroughput) { struct batadv_hard_iface *hard_iface = neigh->if_incoming; - struct net_device *soft_iface = hard_iface->soft_iface; + struct net_device *mesh_iface = hard_iface->mesh_iface; struct ethtool_link_ksettings link_settings; struct net_device *real_netdev; struct station_info sinfo; @@ -92,7 +92,7 @@ static bool batadv_v_elp_get_throughput(struct batadv_hardif_neigh_node *neigh, /* don't query throughput when no longer associated with any * batman-adv interface */ - if (!soft_iface) + if (!mesh_iface) return false; /* if the user specified a customised value for this interface, then @@ -180,7 +180,7 @@ static bool batadv_v_elp_get_throughput(struct batadv_hardif_neigh_node *neigh, default_throughput: if (!(hard_iface->bat_v.flags & BATADV_WARNING_DEFAULT)) { - batadv_info(soft_iface, + batadv_info(mesh_iface, "WiFi driver or ethtool info does not provide information about link speeds on interface %s, therefore defaulting to hardcoded throughput values of %u.%1u Mbps. Consider overriding the throughput manually or checking your driver.\n", hard_iface->net_dev->name, BATADV_THROUGHPUT_DEFAULT_VALUE / 10, @@ -226,7 +226,7 @@ static bool batadv_v_elp_wifi_neigh_probe(struct batadv_hardif_neigh_node *neigh) { struct batadv_hard_iface *hard_iface = neigh->if_incoming; - struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); + struct batadv_priv *bat_priv = netdev_priv(hard_iface->mesh_iface); unsigned long last_tx_diff; struct sk_buff *skb; int probe_len, i; @@ -295,7 +295,7 @@ static void batadv_v_elp_periodic_work(struct work_struct *work) bat_v = container_of(work, struct batadv_hard_iface_bat_v, elp_wq.work); hard_iface = container_of(bat_v, struct batadv_hard_iface, bat_v); - bat_priv = netdev_priv(hard_iface->soft_iface); + bat_priv = netdev_priv(hard_iface->mesh_iface); if (atomic_read(&bat_priv->mesh_state) == BATADV_MESH_DEACTIVATING) goto out; @@ -476,7 +476,7 @@ void batadv_v_elp_primary_iface_set(struct batadv_hard_iface *primary_iface) /* update orig field of every elp iface belonging to this mesh */ rcu_read_lock(); list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) { - if (primary_iface->soft_iface != hard_iface->soft_iface) + if (primary_iface->mesh_iface != hard_iface->mesh_iface) continue; batadv_v_elp_iface_activate(primary_iface, hard_iface); @@ -486,7 +486,7 @@ void batadv_v_elp_primary_iface_set(struct batadv_hard_iface *primary_iface) /** * batadv_v_elp_neigh_update() - update an ELP neighbour node - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @neigh_addr: the neighbour interface address * @if_incoming: the interface the packet was received through * @elp_packet: the received ELP packet @@ -552,7 +552,7 @@ orig_free: int batadv_v_elp_packet_recv(struct sk_buff *skb, struct batadv_hard_iface *if_incoming) { - struct batadv_priv *bat_priv = netdev_priv(if_incoming->soft_iface); + struct batadv_priv *bat_priv = netdev_priv(if_incoming->mesh_iface); struct batadv_elp_packet *elp_packet; struct batadv_hard_iface *primary_if; struct ethhdr *ethhdr; diff --git a/net/batman-adv/bat_v_ogm.c b/net/batman-adv/bat_v_ogm.c index 8f89ffe6020c..b86bb647da5b 100644 --- a/net/batman-adv/bat_v_ogm.c +++ b/net/batman-adv/bat_v_ogm.c @@ -45,7 +45,7 @@ /** * batadv_v_ogm_orig_get() - retrieve and possibly create an originator node - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @addr: the address of the originator * * Return: the orig_node corresponding to the specified address. If such an @@ -96,7 +96,7 @@ static void batadv_v_ogm_start_queue_timer(struct batadv_hard_iface *hard_iface) /** * batadv_v_ogm_start_timer() - restart the OGM sending timer - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information */ static void batadv_v_ogm_start_timer(struct batadv_priv *bat_priv) { @@ -121,7 +121,7 @@ static void batadv_v_ogm_start_timer(struct batadv_priv *bat_priv) static void batadv_v_ogm_send_to_if(struct sk_buff *skb, struct batadv_hard_iface *hard_iface) { - struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); + struct batadv_priv *bat_priv = netdev_priv(hard_iface->mesh_iface); if (hard_iface->if_status != BATADV_IF_ACTIVE) { kfree_skb(skb); @@ -239,7 +239,7 @@ static void batadv_v_ogm_aggr_send(struct batadv_hard_iface *hard_iface) static void batadv_v_ogm_queue_on_if(struct sk_buff *skb, struct batadv_hard_iface *hard_iface) { - struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); + struct batadv_priv *bat_priv = netdev_priv(hard_iface->mesh_iface); if (!atomic_read(&bat_priv->aggregated_ogms)) { batadv_v_ogm_send_to_if(skb, hard_iface); @@ -256,10 +256,10 @@ static void batadv_v_ogm_queue_on_if(struct sk_buff *skb, } /** - * batadv_v_ogm_send_softif() - periodic worker broadcasting the own OGM - * @bat_priv: the bat priv with all the soft interface information + * batadv_v_ogm_send_meshif() - periodic worker broadcasting the own OGM + * @bat_priv: the bat priv with all the mesh interface information */ -static void batadv_v_ogm_send_softif(struct batadv_priv *bat_priv) +static void batadv_v_ogm_send_meshif(struct batadv_priv *bat_priv) { struct batadv_hard_iface *hard_iface; struct batadv_ogm2_packet *ogm_packet; @@ -302,7 +302,7 @@ static void batadv_v_ogm_send_softif(struct batadv_priv *bat_priv) /* broadcast on every interface */ rcu_read_lock(); list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) { - if (hard_iface->soft_iface != bat_priv->soft_iface) + if (hard_iface->mesh_iface != bat_priv->mesh_iface) continue; if (!kref_get_unless_zero(&hard_iface->refcount)) @@ -373,7 +373,7 @@ static void batadv_v_ogm_send(struct work_struct *work) bat_priv = container_of(bat_v, struct batadv_priv, bat_v); mutex_lock(&bat_priv->bat_v.ogm_buff_mutex); - batadv_v_ogm_send_softif(bat_priv); + batadv_v_ogm_send_meshif(bat_priv); mutex_unlock(&bat_priv->bat_v.ogm_buff_mutex); } @@ -408,7 +408,7 @@ void batadv_v_ogm_aggr_work(struct work_struct *work) */ int batadv_v_ogm_iface_enable(struct batadv_hard_iface *hard_iface) { - struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); + struct batadv_priv *bat_priv = netdev_priv(hard_iface->mesh_iface); batadv_v_ogm_start_queue_timer(hard_iface); batadv_v_ogm_start_timer(bat_priv); @@ -435,7 +435,7 @@ void batadv_v_ogm_iface_disable(struct batadv_hard_iface *hard_iface) */ void batadv_v_ogm_primary_iface_set(struct batadv_hard_iface *primary_iface) { - struct batadv_priv *bat_priv = netdev_priv(primary_iface->soft_iface); + struct batadv_priv *bat_priv = netdev_priv(primary_iface->mesh_iface); struct batadv_ogm2_packet *ogm_packet; mutex_lock(&bat_priv->bat_v.ogm_buff_mutex); @@ -452,7 +452,7 @@ unlock: /** * batadv_v_forward_penalty() - apply a penalty to the throughput metric * forwarded with B.A.T.M.A.N. V OGMs - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @if_incoming: the interface where the OGM has been received * @if_outgoing: the interface where the OGM has to be forwarded to * @throughput: the current throughput @@ -505,7 +505,7 @@ static u32 batadv_v_forward_penalty(struct batadv_priv *bat_priv, /** * batadv_v_ogm_forward() - check conditions and forward an OGM to the given * outgoing interface - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @ogm_received: previously received OGM to be forwarded * @orig_node: the originator which has been updated * @neigh_node: the neigh_node through with the OGM has been received @@ -592,7 +592,7 @@ out: /** * batadv_v_ogm_metric_update() - update route metric based on OGM - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @ogm2: OGM2 structure * @orig_node: Originator structure for which the OGM has been received * @neigh_node: the neigh_node through with the OGM has been received @@ -675,7 +675,7 @@ out: /** * batadv_v_ogm_route_update() - update routes based on OGM - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @ethhdr: the Ethernet header of the OGM2 * @ogm2: OGM2 structure * @orig_node: Originator structure for which the OGM has been received @@ -770,7 +770,7 @@ out: /** * batadv_v_ogm_process_per_outif() - process a batman v OGM for an outgoing if - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @ethhdr: the Ethernet header of the OGM2 * @ogm2: OGM2 structure * @orig_node: Originator structure for which the OGM has been received @@ -851,7 +851,7 @@ batadv_v_ogm_aggr_packet(int buff_pos, int packet_len, static void batadv_v_ogm_process(const struct sk_buff *skb, int ogm_offset, struct batadv_hard_iface *if_incoming) { - struct batadv_priv *bat_priv = netdev_priv(if_incoming->soft_iface); + struct batadv_priv *bat_priv = netdev_priv(if_incoming->mesh_iface); struct ethhdr *ethhdr; struct batadv_orig_node *orig_node = NULL; struct batadv_hardif_neigh_node *hardif_neigh = NULL; @@ -925,7 +925,7 @@ static void batadv_v_ogm_process(const struct sk_buff *skb, int ogm_offset, if (hard_iface->if_status != BATADV_IF_ACTIVE) continue; - if (hard_iface->soft_iface != bat_priv->soft_iface) + if (hard_iface->mesh_iface != bat_priv->mesh_iface) continue; if (!kref_get_unless_zero(&hard_iface->refcount)) @@ -984,7 +984,7 @@ out: int batadv_v_ogm_packet_recv(struct sk_buff *skb, struct batadv_hard_iface *if_incoming) { - struct batadv_priv *bat_priv = netdev_priv(if_incoming->soft_iface); + struct batadv_priv *bat_priv = netdev_priv(if_incoming->mesh_iface); struct batadv_ogm2_packet *ogm_packet; struct ethhdr *ethhdr; int ogm_offset; @@ -1035,7 +1035,7 @@ free_skb: /** * batadv_v_ogm_init() - initialise the OGM2 engine - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * * Return: 0 on success or a negative error code in case of failure */ @@ -1070,7 +1070,7 @@ int batadv_v_ogm_init(struct batadv_priv *bat_priv) /** * batadv_v_ogm_free() - free OGM private resources - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information */ void batadv_v_ogm_free(struct batadv_priv *bat_priv) { diff --git a/net/batman-adv/bitarray.c b/net/batman-adv/bitarray.c index 649c41f393e1..2c49b2711650 100644 --- a/net/batman-adv/bitarray.c +++ b/net/batman-adv/bitarray.c @@ -23,7 +23,7 @@ static void batadv_bitmap_shift_left(unsigned long *seq_bits, s32 n) /** * batadv_bit_get_packet() - receive and process one packet within the sequence * number window - * @priv: the bat priv with all the soft interface information + * @priv: the bat priv with all the mesh interface information * @seq_bits: pointer to the sequence number receive packet * @seq_num_diff: difference between the current/received sequence number and * the last sequence number diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index 8c814f790d17..747755647c6a 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -209,7 +209,7 @@ static void batadv_claim_put(struct batadv_bla_claim *claim) /** * batadv_claim_hash_find() - looks for a claim in the claim hash - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @data: search data (may be local/static data) * * Return: claim if found or NULL otherwise. @@ -248,7 +248,7 @@ batadv_claim_hash_find(struct batadv_priv *bat_priv, /** * batadv_backbone_hash_find() - looks for a backbone gateway in the hash - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @addr: the address of the originator * @vid: the VLAN ID * @@ -332,7 +332,7 @@ batadv_bla_del_backbone_claims(struct batadv_bla_backbone_gw *backbone_gw) /** * batadv_bla_send_claim() - sends a claim frame according to the provided info - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @mac: the mac address to be announced within the claim * @vid: the VLAN ID * @claimtype: the type of the claim (CLAIM, UNCLAIM, ANNOUNCE, ...) @@ -343,7 +343,7 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, const u8 *mac, struct sk_buff *skb; struct ethhdr *ethhdr; struct batadv_hard_iface *primary_if; - struct net_device *soft_iface; + struct net_device *mesh_iface; u8 *hw_src; struct batadv_bla_claim_dst local_claim_dest; __be32 zeroip = 0; @@ -356,12 +356,12 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, const u8 *mac, sizeof(local_claim_dest)); local_claim_dest.type = claimtype; - soft_iface = primary_if->soft_iface; + mesh_iface = primary_if->mesh_iface; skb = arp_create(ARPOP_REPLY, ETH_P_ARP, /* IP DST: 0.0.0.0 */ zeroip, - primary_if->soft_iface, + primary_if->mesh_iface, /* IP SRC: 0.0.0.0 */ zeroip, /* Ethernet DST: Broadcast */ @@ -439,7 +439,7 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, const u8 *mac, } skb_reset_mac_header(skb); - skb->protocol = eth_type_trans(skb, soft_iface); + skb->protocol = eth_type_trans(skb, mesh_iface); batadv_inc_counter(bat_priv, BATADV_CNT_RX); batadv_add_counter(bat_priv, BATADV_CNT_RX_BYTES, skb->len + ETH_HLEN); @@ -466,7 +466,7 @@ static void batadv_bla_loopdetect_report(struct work_struct *work) report_work); bat_priv = backbone_gw->bat_priv; - batadv_info(bat_priv->soft_iface, + batadv_info(bat_priv->mesh_iface, "Possible loop on VLAN %d detected which can't be handled by BLA - please check your network setup!\n", batadv_print_vid(backbone_gw->vid)); snprintf(vid_str, sizeof(vid_str), "%d", @@ -481,7 +481,7 @@ static void batadv_bla_loopdetect_report(struct work_struct *work) /** * batadv_bla_get_backbone_gw() - finds or creates a backbone gateway - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @orig: the mac address of the originator * @vid: the VLAN ID * @own_backbone: set if the requested backbone is local @@ -554,7 +554,7 @@ batadv_bla_get_backbone_gw(struct batadv_priv *bat_priv, const u8 *orig, /** * batadv_bla_update_own_backbone_gw() - updates the own backbone gw for a VLAN - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @primary_if: the selected primary interface * @vid: VLAN identifier * @@ -580,7 +580,7 @@ batadv_bla_update_own_backbone_gw(struct batadv_priv *bat_priv, /** * batadv_bla_answer_request() - answer a bla request by sending own claims - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @primary_if: interface where the request came on * @vid: the vid where the request came on * @@ -657,7 +657,7 @@ static void batadv_bla_send_request(struct batadv_bla_backbone_gw *backbone_gw) /** * batadv_bla_send_announce() - Send an announcement frame - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @backbone_gw: our backbone gateway which should be announced */ static void batadv_bla_send_announce(struct batadv_priv *bat_priv, @@ -678,7 +678,7 @@ static void batadv_bla_send_announce(struct batadv_priv *bat_priv, /** * batadv_bla_add_claim() - Adds a claim in the claim hash - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @mac: the mac address of the claim * @vid: the VLAN ID of the frame * @backbone_gw: the backbone gateway which claims it @@ -788,7 +788,7 @@ batadv_bla_claim_get_backbone_gw(struct batadv_bla_claim *claim) /** * batadv_bla_del_claim() - delete a claim from the claim hash - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @mac: mac address of the claim to be removed * @vid: VLAN id for the claim to be removed */ @@ -826,7 +826,7 @@ free_claim: /** * batadv_handle_announce() - check for ANNOUNCE frame - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @an_addr: announcement mac address (ARP Sender HW address) * @backbone_addr: originator address of the sender (Ethernet source MAC) * @vid: the VLAN ID of the frame @@ -884,8 +884,8 @@ static bool batadv_handle_announce(struct batadv_priv *bat_priv, u8 *an_addr, /** * batadv_handle_request() - check for REQUEST frame - * @bat_priv: the bat priv with all the soft interface information - * @primary_if: the primary hard interface of this batman soft interface + * @bat_priv: the bat priv with all the mesh interface information + * @primary_if: the primary hard interface of this batman mesh interface * @backbone_addr: backbone address to be requested (ARP sender HW MAC) * @ethhdr: ethernet header of a packet * @vid: the VLAN ID of the frame @@ -917,8 +917,8 @@ static bool batadv_handle_request(struct batadv_priv *bat_priv, /** * batadv_handle_unclaim() - check for UNCLAIM frame - * @bat_priv: the bat priv with all the soft interface information - * @primary_if: the primary hard interface of this batman soft interface + * @bat_priv: the bat priv with all the mesh interface information + * @primary_if: the primary hard interface of this batman mesh interface * @backbone_addr: originator address of the backbone (Ethernet source) * @claim_addr: Client to be unclaimed (ARP sender HW MAC) * @vid: the VLAN ID of the frame @@ -955,8 +955,8 @@ static bool batadv_handle_unclaim(struct batadv_priv *bat_priv, /** * batadv_handle_claim() - check for CLAIM frame - * @bat_priv: the bat priv with all the soft interface information - * @primary_if: the primary hard interface of this batman soft interface + * @bat_priv: the bat priv with all the mesh interface information + * @primary_if: the primary hard interface of this batman mesh interface * @backbone_addr: originator address of the backbone (Ethernet Source) * @claim_addr: client mac address to be claimed (ARP sender HW MAC) * @vid: the VLAN ID of the frame @@ -992,7 +992,7 @@ static bool batadv_handle_claim(struct batadv_priv *bat_priv, /** * batadv_check_claim_group() - check for claim group membership - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @primary_if: the primary interface of this batman interface * @hw_src: the Hardware source in the ARP Header * @hw_dst: the Hardware destination in the ARP Header @@ -1067,8 +1067,8 @@ static int batadv_check_claim_group(struct batadv_priv *bat_priv, /** * batadv_bla_process_claim() - Check if this is a claim frame, and process it - * @bat_priv: the bat priv with all the soft interface information - * @primary_if: the primary hard interface of this batman soft interface + * @bat_priv: the bat priv with all the mesh interface information + * @primary_if: the primary hard interface of this batman mesh interface * @skb: the frame to be checked * * Return: true if it was a claim frame, otherwise return false to @@ -1210,7 +1210,7 @@ static bool batadv_bla_process_claim(struct batadv_priv *bat_priv, /** * batadv_bla_purge_backbone_gw() - Remove backbone gateways after a timeout or * immediately - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @now: whether the whole hash shall be wiped now * * Check when we last heard from other nodes, and remove them in case of @@ -1262,7 +1262,7 @@ purge_now: /** * batadv_bla_purge_claims() - Remove claims after a timeout or immediately - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @primary_if: the selected primary interface, may be NULL if now is set * @now: whether the whole hash shall be wiped now * @@ -1321,7 +1321,7 @@ skip: /** * batadv_bla_update_orig_address() - Update the backbone gateways when the own * originator address changes - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @primary_if: the new selected primary_if * @oldif: the old primary interface, may be NULL */ @@ -1376,7 +1376,7 @@ void batadv_bla_update_orig_address(struct batadv_priv *bat_priv, /** * batadv_bla_send_loopdetect() - send a loopdetect frame - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @backbone_gw: the backbone gateway for which a loop should be detected * * To detect loops that the bridge loop avoidance can't handle, send a loop @@ -1396,7 +1396,7 @@ batadv_bla_send_loopdetect(struct batadv_priv *bat_priv, /** * batadv_bla_status_update() - purge bla interfaces if necessary - * @net_dev: the soft interface net device + * @net_dev: the mesh interface net device */ void batadv_bla_status_update(struct net_device *net_dev) { @@ -1520,7 +1520,7 @@ static struct lock_class_key batadv_backbone_hash_lock_class_key; /** * batadv_bla_init() - initialize all bla structures - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * * Return: 0 on success, < 0 on error. */ @@ -1586,7 +1586,7 @@ int batadv_bla_init(struct batadv_priv *bat_priv) /** * batadv_bla_check_duplist() - Check if a frame is in the broadcast dup. - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: contains the multicast packet to be checked * @payload_ptr: pointer to position inside the head buffer of the skb * marking the start of the data to be CRC'ed @@ -1680,7 +1680,7 @@ out: /** * batadv_bla_check_ucast_duplist() - Check if a frame is in the broadcast dup. - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: contains the multicast packet to be checked, decapsulated from a * unicast_packet * @@ -1698,7 +1698,7 @@ static bool batadv_bla_check_ucast_duplist(struct batadv_priv *bat_priv, /** * batadv_bla_check_bcast_duplist() - Check if a frame is in the broadcast dup. - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: contains the bcast_packet to be checked * * Check if it is on our broadcast list. Another gateway might have sent the @@ -1723,7 +1723,7 @@ bool batadv_bla_check_bcast_duplist(struct batadv_priv *bat_priv, /** * batadv_bla_is_backbone_gw_orig() - Check if the originator is a gateway for * the VLAN identified by vid. - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @orig: originator mac address * @vid: VLAN identifier * @@ -1766,7 +1766,7 @@ bool batadv_bla_is_backbone_gw_orig(struct batadv_priv *bat_priv, u8 *orig, * @orig_node: the orig_node of the frame * @hdr_size: maximum length of the frame * - * Return: true if the orig_node is also a gateway on the soft interface, + * Return: true if the orig_node is also a gateway on the mesh interface, * otherwise it returns false. */ bool batadv_bla_is_backbone_gw(struct sk_buff *skb, @@ -1796,9 +1796,9 @@ bool batadv_bla_is_backbone_gw(struct sk_buff *skb, /** * batadv_bla_free() - free all bla structures - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * - * for softinterface free or module unload + * for meshinterface free or module unload */ void batadv_bla_free(struct batadv_priv *bat_priv) { @@ -1822,7 +1822,7 @@ void batadv_bla_free(struct batadv_priv *bat_priv) /** * batadv_bla_loopdetect_check() - check and handle a detected loop - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: the packet to check * @primary_if: interface where the request came on * @vid: the VLAN ID of the frame @@ -1877,7 +1877,7 @@ batadv_bla_loopdetect_check(struct batadv_priv *bat_priv, struct sk_buff *skb, /** * batadv_bla_rx() - check packets coming from the mesh. - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: the frame to be checked * @vid: the VLAN ID of the frame * @packet_type: the batman packet type this frame came in @@ -2010,7 +2010,7 @@ out: /** * batadv_bla_tx() - check packets going into the mesh - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: the frame to be checked * @vid: the VLAN ID of the frame * @@ -2232,18 +2232,18 @@ int batadv_bla_claim_dump(struct sk_buff *msg, struct netlink_callback *cb) { struct batadv_hard_iface *primary_if = NULL; int portid = NETLINK_CB(cb->skb).portid; - struct net_device *soft_iface; + struct net_device *mesh_iface; struct batadv_hashtable *hash; struct batadv_priv *bat_priv; int bucket = cb->args[0]; int idx = cb->args[1]; int ret = 0; - soft_iface = batadv_netlink_get_softif(cb); - if (IS_ERR(soft_iface)) - return PTR_ERR(soft_iface); + mesh_iface = batadv_netlink_get_meshif(cb); + if (IS_ERR(mesh_iface)) + return PTR_ERR(mesh_iface); - bat_priv = netdev_priv(soft_iface); + bat_priv = netdev_priv(mesh_iface); hash = bat_priv->bla.claim_hash; primary_if = batadv_primary_if_get_selected(bat_priv); @@ -2267,7 +2267,7 @@ int batadv_bla_claim_dump(struct sk_buff *msg, struct netlink_callback *cb) out: batadv_hardif_put(primary_if); - dev_put(soft_iface); + dev_put(mesh_iface); return ret; } @@ -2393,18 +2393,18 @@ int batadv_bla_backbone_dump(struct sk_buff *msg, struct netlink_callback *cb) { struct batadv_hard_iface *primary_if = NULL; int portid = NETLINK_CB(cb->skb).portid; - struct net_device *soft_iface; + struct net_device *mesh_iface; struct batadv_hashtable *hash; struct batadv_priv *bat_priv; int bucket = cb->args[0]; int idx = cb->args[1]; int ret = 0; - soft_iface = batadv_netlink_get_softif(cb); - if (IS_ERR(soft_iface)) - return PTR_ERR(soft_iface); + mesh_iface = batadv_netlink_get_meshif(cb); + if (IS_ERR(mesh_iface)) + return PTR_ERR(mesh_iface); - bat_priv = netdev_priv(soft_iface); + bat_priv = netdev_priv(mesh_iface); hash = bat_priv->bla.backbone_hash; primary_if = batadv_primary_if_get_selected(bat_priv); @@ -2428,7 +2428,7 @@ int batadv_bla_backbone_dump(struct sk_buff *msg, struct netlink_callback *cb) out: batadv_hardif_put(primary_if); - dev_put(soft_iface); + dev_put(mesh_iface); return ret; } @@ -2437,7 +2437,7 @@ out: /** * batadv_bla_check_claim() - check if address is claimed * - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @addr: mac address of which the claim status is checked * @vid: the VLAN ID * diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c index e5a07152d4ec..8b8132eb0a79 100644 --- a/net/batman-adv/distributed-arp-table.c +++ b/net/batman-adv/distributed-arp-table.c @@ -96,7 +96,7 @@ static void batadv_dat_purge(struct work_struct *work); /** * batadv_dat_start_timer() - initialise the DAT periodic worker - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information */ static void batadv_dat_start_timer(struct batadv_priv *bat_priv) { @@ -145,7 +145,7 @@ static bool batadv_dat_to_purge(struct batadv_dat_entry *dat_entry) /** * __batadv_dat_purge() - delete entries from the DAT local storage - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @to_purge: function in charge to decide whether an entry has to be purged or * not. This function takes the dat_entry as argument and has to * returns a boolean value: true is the entry has to be deleted, @@ -315,7 +315,7 @@ static u32 batadv_hash_dat(const void *data, u32 size) /** * batadv_dat_entry_hash_find() - look for a given dat_entry in the local hash * table - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @ip: search key * @vid: VLAN identifier * @@ -357,7 +357,7 @@ batadv_dat_entry_hash_find(struct batadv_priv *bat_priv, __be32 ip, /** * batadv_dat_entry_add() - add a new dat entry or update it if already exists - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @ip: ipv4 to add/edit * @mac_addr: mac address to assign to the given ipv4 * @vid: VLAN identifier @@ -414,7 +414,7 @@ out: /** * batadv_dbg_arp() - print a debug message containing all the ARP packet * details - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: ARP packet * @hdr_size: size of the possible header before the ARP packet * @msg: message to print together with the debugging information @@ -549,7 +549,7 @@ out: /** * batadv_choose_next_candidate() - select the next DHT candidate - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @cands: candidates array * @select: number of candidates already present in the array * @ip_key: key to look up in the DHT @@ -613,7 +613,7 @@ static void batadv_choose_next_candidate(struct batadv_priv *bat_priv, /** * batadv_dat_select_candidates() - select the nodes which the DHT message has * to be sent to - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @ip_dst: ipv4 to look up in the DHT * @vid: VLAN identifier * @@ -658,7 +658,7 @@ batadv_dat_select_candidates(struct batadv_priv *bat_priv, __be32 ip_dst, /** * batadv_dat_forward_data() - copy and send payload to the selected candidates - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: payload to send * @ip: the DHT key * @vid: VLAN identifier @@ -734,7 +734,7 @@ free_orig: /** * batadv_dat_tvlv_container_update() - update the dat tvlv container after dat * setting change - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information */ static void batadv_dat_tvlv_container_update(struct batadv_priv *bat_priv) { @@ -756,7 +756,7 @@ static void batadv_dat_tvlv_container_update(struct batadv_priv *bat_priv) /** * batadv_dat_status_update() - update the dat tvlv container after dat * setting change - * @net_dev: the soft interface net device + * @net_dev: the mesh interface net device */ void batadv_dat_status_update(struct net_device *net_dev) { @@ -767,7 +767,7 @@ void batadv_dat_status_update(struct net_device *net_dev) /** * batadv_dat_tvlv_ogm_handler_v1() - process incoming dat tvlv container - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @orig: the orig_node of the ogm * @flags: flags indicating the tvlv state (see batadv_tvlv_handler_flags) * @tvlv_value: tvlv buffer containing the gateway data @@ -786,7 +786,7 @@ static void batadv_dat_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv, /** * batadv_dat_hash_free() - free the local DAT hash table - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information */ static void batadv_dat_hash_free(struct batadv_priv *bat_priv) { @@ -802,7 +802,7 @@ static void batadv_dat_hash_free(struct batadv_priv *bat_priv) /** * batadv_dat_init() - initialise the DAT internals - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * * Return: 0 in case of success, a negative error code otherwise */ @@ -828,7 +828,7 @@ int batadv_dat_init(struct batadv_priv *bat_priv) /** * batadv_dat_free() - free the DAT internals - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information */ void batadv_dat_free(struct batadv_priv *bat_priv) { @@ -936,18 +936,18 @@ int batadv_dat_cache_dump(struct sk_buff *msg, struct netlink_callback *cb) { struct batadv_hard_iface *primary_if = NULL; int portid = NETLINK_CB(cb->skb).portid; - struct net_device *soft_iface; + struct net_device *mesh_iface; struct batadv_hashtable *hash; struct batadv_priv *bat_priv; int bucket = cb->args[0]; int idx = cb->args[1]; int ret = 0; - soft_iface = batadv_netlink_get_softif(cb); - if (IS_ERR(soft_iface)) - return PTR_ERR(soft_iface); + mesh_iface = batadv_netlink_get_meshif(cb); + if (IS_ERR(mesh_iface)) + return PTR_ERR(mesh_iface); - bat_priv = netdev_priv(soft_iface); + bat_priv = netdev_priv(mesh_iface); hash = bat_priv->dat.hash; primary_if = batadv_primary_if_get_selected(bat_priv); @@ -973,14 +973,14 @@ int batadv_dat_cache_dump(struct sk_buff *msg, struct netlink_callback *cb) out: batadv_hardif_put(primary_if); - dev_put(soft_iface); + dev_put(mesh_iface); return ret; } /** * batadv_arp_get_type() - parse an ARP packet and gets the type - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: packet to analyse * @hdr_size: size of the possible header before the ARP packet in the skb * @@ -1080,7 +1080,7 @@ static unsigned short batadv_dat_get_vid(struct sk_buff *skb, int *hdr_size) /** * batadv_dat_arp_create_reply() - create an ARP Reply - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @ip_src: ARP sender IP * @ip_dst: ARP target IP * @hw_src: Ethernet source and ARP sender MAC @@ -1099,7 +1099,7 @@ batadv_dat_arp_create_reply(struct batadv_priv *bat_priv, __be32 ip_src, { struct sk_buff *skb; - skb = arp_create(ARPOP_REPLY, ETH_P_ARP, ip_dst, bat_priv->soft_iface, + skb = arp_create(ARPOP_REPLY, ETH_P_ARP, ip_dst, bat_priv->mesh_iface, ip_src, hw_dst, hw_src, hw_dst); if (!skb) return NULL; @@ -1116,7 +1116,7 @@ batadv_dat_arp_create_reply(struct batadv_priv *bat_priv, __be32 ip_src, /** * batadv_dat_snoop_outgoing_arp_request() - snoop the ARP request and try to * answer using DAT - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: packet to check * * Return: true if the message has been sent to the dht candidates, false @@ -1132,7 +1132,7 @@ bool batadv_dat_snoop_outgoing_arp_request(struct batadv_priv *bat_priv, bool ret = false; struct batadv_dat_entry *dat_entry = NULL; struct sk_buff *skb_new; - struct net_device *soft_iface = bat_priv->soft_iface; + struct net_device *mesh_iface = bat_priv->mesh_iface; int hdr_size = 0; unsigned short vid; @@ -1162,7 +1162,7 @@ bool batadv_dat_snoop_outgoing_arp_request(struct batadv_priv *bat_priv, * client will answer itself. DAT would only generate a * duplicate packet. * - * Moreover, if the soft-interface is enslaved into a bridge, an + * Moreover, if the mesh-interface is enslaved into a bridge, an * additional DAT answer may trigger kernel warnings about * a packet coming from the wrong port. */ @@ -1191,7 +1191,7 @@ bool batadv_dat_snoop_outgoing_arp_request(struct batadv_priv *bat_priv, if (!skb_new) goto out; - skb_new->protocol = eth_type_trans(skb_new, soft_iface); + skb_new->protocol = eth_type_trans(skb_new, mesh_iface); batadv_inc_counter(bat_priv, BATADV_CNT_RX); batadv_add_counter(bat_priv, BATADV_CNT_RX_BYTES, @@ -1213,7 +1213,7 @@ out: /** * batadv_dat_snoop_incoming_arp_request() - snoop the ARP request and try to * answer using the local DAT storage - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: packet to check * @hdr_size: size of the encapsulation header * @@ -1281,7 +1281,7 @@ out: /** * batadv_dat_snoop_outgoing_arp_reply() - snoop the ARP reply and fill the DHT - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: packet to check */ void batadv_dat_snoop_outgoing_arp_reply(struct batadv_priv *bat_priv, @@ -1324,7 +1324,7 @@ void batadv_dat_snoop_outgoing_arp_reply(struct batadv_priv *bat_priv, /** * batadv_dat_snoop_incoming_arp_reply() - snoop the ARP reply and fill the * local DAT storage only - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: packet to check * @hdr_size: size of the encapsulation header * @@ -1605,7 +1605,7 @@ static bool batadv_dat_get_dhcp_chaddr(struct sk_buff *skb, u8 *buf) /** * batadv_dat_put_dhcp() - puts addresses from a DHCP packet into the DHT and * DAT cache - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @chaddr: the DHCP client MAC address * @yiaddr: the DHCP client IP address * @hw_dst: the DHCP server MAC address @@ -1690,7 +1690,7 @@ batadv_dat_check_dhcp_ack(struct sk_buff *skb, __be16 proto, __be32 *ip_src, /** * batadv_dat_snoop_outgoing_dhcp_ack() - snoop DHCPACK and fill DAT with it - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: the packet to snoop * @proto: ethernet protocol hint (behind a potential vlan) * @vid: VLAN identifier @@ -1723,7 +1723,7 @@ void batadv_dat_snoop_outgoing_dhcp_ack(struct batadv_priv *bat_priv, /** * batadv_dat_snoop_incoming_dhcp_ack() - snoop DHCPACK and fill DAT cache - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: the packet to snoop * @hdr_size: header size, up to the tail of the batman-adv header * @@ -1771,7 +1771,7 @@ void batadv_dat_snoop_incoming_dhcp_ack(struct batadv_priv *bat_priv, /** * batadv_dat_drop_broadcast_packet() - check if an ARP request has to be * dropped (because the node has already obtained the reply via DAT) or not - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @forw_packet: the broadcast packet * * Return: true if the node can drop the packet, false otherwise. diff --git a/net/batman-adv/distributed-arp-table.h b/net/batman-adv/distributed-arp-table.h index bed7f3d20844..e7b75e82eb1d 100644 --- a/net/batman-adv/distributed-arp-table.h +++ b/net/batman-adv/distributed-arp-table.h @@ -56,7 +56,7 @@ batadv_dat_init_orig_node_addr(struct batadv_orig_node *orig_node) /** * batadv_dat_init_own_addr() - assign a DAT address to the node itself - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @primary_if: a pointer to the primary interface */ static inline void @@ -77,7 +77,7 @@ int batadv_dat_cache_dump(struct sk_buff *msg, struct netlink_callback *cb); /** * batadv_dat_inc_counter() - increment the correct DAT packet counter - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @subtype: the 4addr subtype of the packet to be counted * * Updates the ethtool statistics for the received packet if it is a DAT subtype diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c index 757c084ac2d1..cc14bc41381e 100644 --- a/net/batman-adv/fragmentation.c +++ b/net/batman-adv/fragmentation.c @@ -349,7 +349,7 @@ bool batadv_frag_skb_fwd(struct sk_buff *skb, struct batadv_hard_iface *recv_if, struct batadv_orig_node *orig_node_src) { - struct batadv_priv *bat_priv = netdev_priv(recv_if->soft_iface); + struct batadv_priv *bat_priv = netdev_priv(recv_if->mesh_iface); struct batadv_neigh_node *neigh_node = NULL; struct batadv_frag_packet *packet; u16 total_size; diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c index f68e34ed1f62..7a11b245e9f4 100644 --- a/net/batman-adv/gateway_client.c +++ b/net/batman-adv/gateway_client.c @@ -71,7 +71,7 @@ void batadv_gw_node_release(struct kref *ref) /** * batadv_gw_get_selected_gw_node() - Get currently selected gateway - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * * Return: selected gateway (with increased refcnt), NULL on errors */ @@ -95,7 +95,7 @@ out: /** * batadv_gw_get_selected_orig() - Get originator of currently selected gateway - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * * Return: orig_node of selected gateway (with increased refcnt), NULL on errors */ @@ -144,7 +144,7 @@ static void batadv_gw_select(struct batadv_priv *bat_priv, /** * batadv_gw_reselect() - force a gateway reselection - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * * Set a flag to remind the GW component to perform a new gateway reselection. * However this function does not ensure that the current gateway is going to be @@ -160,7 +160,7 @@ void batadv_gw_reselect(struct batadv_priv *bat_priv) /** * batadv_gw_check_client_stop() - check if client mode has been switched off - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * * This function assumes the caller has checked that the gw state *is actually * changing*. This function is not supposed to be called when there is no state @@ -192,7 +192,7 @@ void batadv_gw_check_client_stop(struct batadv_priv *bat_priv) /** * batadv_gw_election() - Elect the best gateway - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information */ void batadv_gw_election(struct batadv_priv *bat_priv) { @@ -280,7 +280,7 @@ out: /** * batadv_gw_check_election() - Elect orig node as best gateway when eligible - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @orig_node: orig node which is to be checked */ void batadv_gw_check_election(struct batadv_priv *bat_priv, @@ -314,7 +314,7 @@ out: /** * batadv_gw_node_add() - add gateway node to list of available gateways - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @orig_node: originator announcing gateway capabilities * @gateway: announced bandwidth information * @@ -361,7 +361,7 @@ static void batadv_gw_node_add(struct batadv_priv *bat_priv, /** * batadv_gw_node_get() - retrieve gateway node from list of available gateways - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @orig_node: originator announcing gateway capabilities * * Return: gateway node if found or NULL otherwise. @@ -391,7 +391,7 @@ struct batadv_gw_node *batadv_gw_node_get(struct batadv_priv *bat_priv, /** * batadv_gw_node_update() - update list of available gateways with changed * bandwidth information - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @orig_node: originator announcing gateway capabilities * @gateway: announced bandwidth information */ @@ -458,7 +458,7 @@ out: /** * batadv_gw_node_delete() - Remove orig_node from gateway list - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @orig_node: orig node which is currently in process of being removed */ void batadv_gw_node_delete(struct batadv_priv *bat_priv, @@ -473,8 +473,8 @@ void batadv_gw_node_delete(struct batadv_priv *bat_priv, } /** - * batadv_gw_node_free() - Free gateway information from soft interface - * @bat_priv: the bat priv with all the soft interface information + * batadv_gw_node_free() - Free gateway information from mesh interface + * @bat_priv: the bat priv with all the mesh interface information */ void batadv_gw_node_free(struct batadv_priv *bat_priv) { @@ -501,15 +501,15 @@ void batadv_gw_node_free(struct batadv_priv *bat_priv) int batadv_gw_dump(struct sk_buff *msg, struct netlink_callback *cb) { struct batadv_hard_iface *primary_if = NULL; - struct net_device *soft_iface; + struct net_device *mesh_iface; struct batadv_priv *bat_priv; int ret; - soft_iface = batadv_netlink_get_softif(cb); - if (IS_ERR(soft_iface)) - return PTR_ERR(soft_iface); + mesh_iface = batadv_netlink_get_meshif(cb); + if (IS_ERR(mesh_iface)) + return PTR_ERR(mesh_iface); - bat_priv = netdev_priv(soft_iface); + bat_priv = netdev_priv(mesh_iface); primary_if = batadv_primary_if_get_selected(bat_priv); if (!primary_if || primary_if->if_status != BATADV_IF_ACTIVE) { @@ -528,7 +528,7 @@ int batadv_gw_dump(struct sk_buff *msg, struct netlink_callback *cb) out: batadv_hardif_put(primary_if); - dev_put(soft_iface); + dev_put(mesh_iface); return ret; } @@ -657,7 +657,7 @@ batadv_gw_dhcp_recipient_get(struct sk_buff *skb, unsigned int *header_len, /** * batadv_gw_out_of_range() - check if the dhcp request destination is the best * gateway - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: the outgoing packet * * Check if the skb is a DHCP request and if it is sent to the current best GW diff --git a/net/batman-adv/gateway_common.c b/net/batman-adv/gateway_common.c index 2dd36ef03c84..315fa90f0c94 100644 --- a/net/batman-adv/gateway_common.c +++ b/net/batman-adv/gateway_common.c @@ -20,7 +20,7 @@ /** * batadv_gw_tvlv_container_update() - update the gw tvlv container after * gateway setting change - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information */ void batadv_gw_tvlv_container_update(struct batadv_priv *bat_priv) { @@ -48,7 +48,7 @@ void batadv_gw_tvlv_container_update(struct batadv_priv *bat_priv) /** * batadv_gw_tvlv_ogm_handler_v1() - process incoming gateway tvlv container - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @orig: the orig_node of the ogm * @flags: flags indicating the tvlv state (see batadv_tvlv_handler_flags) * @tvlv_value: tvlv buffer containing the gateway data @@ -89,7 +89,7 @@ static void batadv_gw_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv, /** * batadv_gw_init() - initialise the gateway handling internals - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information */ void batadv_gw_init(struct batadv_priv *bat_priv) { @@ -105,7 +105,7 @@ void batadv_gw_init(struct batadv_priv *bat_priv) /** * batadv_gw_free() - free the gateway handling internals - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information */ void batadv_gw_free(struct batadv_priv *bat_priv) { diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index 96a412beab2d..f145f9662653 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -36,9 +36,9 @@ #include "distributed-arp-table.h" #include "gateway_client.h" #include "log.h" +#include "mesh-interface.h" #include "originator.h" #include "send.h" -#include "soft-interface.h" #include "translation-table.h" /** @@ -51,7 +51,7 @@ void batadv_hardif_release(struct kref *ref) struct batadv_hard_iface *hard_iface; hard_iface = container_of(ref, struct batadv_hard_iface, refcount); - dev_put(hard_iface->net_dev); + netdev_put(hard_iface->net_dev, &hard_iface->dev_tracker); kfree_rcu(hard_iface, rcu); } @@ -141,7 +141,7 @@ static bool batadv_mutual_parents(const struct net_device *dev1, * is important to prevent this new interface from being used to create a new * mesh network (this behaviour would lead to a batman-over-batman * configuration). This function recursively checks all the fathers of the - * device passed as argument looking for a batman-adv soft interface. + * device passed as argument looking for a batman-adv mesh interface. * * Return: true if the device is descendant of a batman-adv mesh interface (or * if it is a batman-adv interface itself), false otherwise @@ -155,7 +155,7 @@ static bool batadv_is_on_batman_iface(const struct net_device *net_dev) bool ret; /* check if this is a batman-adv mesh interface */ - if (batadv_softif_is_valid(net_dev)) + if (batadv_meshif_is_valid(net_dev)) return true; iflink = dev_get_iflink(net_dev); @@ -233,10 +233,10 @@ static struct net_device *batadv_get_real_netdevice(struct net_device *netdev) } hard_iface = batadv_hardif_get_by_netdev(netdev); - if (!hard_iface || !hard_iface->soft_iface) + if (!hard_iface || !hard_iface->mesh_iface) goto out; - net = dev_net(hard_iface->soft_iface); + net = dev_net(hard_iface->mesh_iface); real_net = batadv_getlink_net(netdev, net); /* iflink to itself, most likely physical device */ @@ -438,13 +438,13 @@ out: } static struct batadv_hard_iface * -batadv_hardif_get_active(const struct net_device *soft_iface) +batadv_hardif_get_active(const struct net_device *mesh_iface) { struct batadv_hard_iface *hard_iface; rcu_read_lock(); list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) { - if (hard_iface->soft_iface != soft_iface) + if (hard_iface->mesh_iface != mesh_iface) continue; if (hard_iface->if_status == BATADV_IF_ACTIVE && @@ -532,9 +532,9 @@ static void batadv_check_known_mac_addr(const struct net_device *net_dev) /** * batadv_hardif_recalc_extra_skbroom() - Recalculate skbuff extra head/tailroom - * @soft_iface: netdev struct of the mesh interface + * @mesh_iface: netdev struct of the mesh interface */ -static void batadv_hardif_recalc_extra_skbroom(struct net_device *soft_iface) +static void batadv_hardif_recalc_extra_skbroom(struct net_device *mesh_iface) { const struct batadv_hard_iface *hard_iface; unsigned short lower_header_len = ETH_HLEN; @@ -547,7 +547,7 @@ static void batadv_hardif_recalc_extra_skbroom(struct net_device *soft_iface) if (hard_iface->if_status == BATADV_IF_NOT_IN_USE) continue; - if (hard_iface->soft_iface != soft_iface) + if (hard_iface->mesh_iface != mesh_iface) continue; lower_header_len = max_t(unsigned short, lower_header_len, @@ -567,20 +567,20 @@ static void batadv_hardif_recalc_extra_skbroom(struct net_device *soft_iface) /* fragmentation headers don't strip the unicast/... header */ needed_headroom += sizeof(struct batadv_frag_packet); - soft_iface->needed_headroom = needed_headroom; - soft_iface->needed_tailroom = lower_tailroom; + mesh_iface->needed_headroom = needed_headroom; + mesh_iface->needed_tailroom = lower_tailroom; } /** - * batadv_hardif_min_mtu() - Calculate maximum MTU for soft interface - * @soft_iface: netdev struct of the soft interface + * batadv_hardif_min_mtu() - Calculate maximum MTU for mesh interface + * @mesh_iface: netdev struct of the mesh interface * - * Return: MTU for the soft-interface (limited by the minimal MTU of all active + * Return: MTU for the mesh-interface (limited by the minimal MTU of all active * slave interfaces) */ -int batadv_hardif_min_mtu(struct net_device *soft_iface) +int batadv_hardif_min_mtu(struct net_device *mesh_iface) { - struct batadv_priv *bat_priv = netdev_priv(soft_iface); + struct batadv_priv *bat_priv = netdev_priv(mesh_iface); const struct batadv_hard_iface *hard_iface; int min_mtu = INT_MAX; @@ -590,7 +590,7 @@ int batadv_hardif_min_mtu(struct net_device *soft_iface) hard_iface->if_status != BATADV_IF_TO_BE_ACTIVATED) continue; - if (hard_iface->soft_iface != soft_iface) + if (hard_iface->mesh_iface != mesh_iface) continue; min_mtu = min_t(int, hard_iface->net_dev->mtu, min_mtu); @@ -616,26 +616,24 @@ out: */ atomic_set(&bat_priv->packet_size_max, min_mtu); - /* the real soft-interface MTU is computed by removing the payload + /* the real mesh-interface MTU is computed by removing the payload * overhead from the maximum amount of bytes that was just computed. - * - * However batman-adv does not support MTUs bigger than ETH_DATA_LEN */ - return min_t(int, min_mtu - batadv_max_header_len(), ETH_DATA_LEN); + return min_t(int, min_mtu - batadv_max_header_len(), BATADV_MAX_MTU); } /** * batadv_update_min_mtu() - Adjusts the MTU if a new interface with a smaller * MTU appeared - * @soft_iface: netdev struct of the soft interface + * @mesh_iface: netdev struct of the mesh interface */ -void batadv_update_min_mtu(struct net_device *soft_iface) +void batadv_update_min_mtu(struct net_device *mesh_iface) { - struct batadv_priv *bat_priv = netdev_priv(soft_iface); + struct batadv_priv *bat_priv = netdev_priv(mesh_iface); int limit_mtu; int mtu; - mtu = batadv_hardif_min_mtu(soft_iface); + mtu = batadv_hardif_min_mtu(mesh_iface); if (bat_priv->mtu_set_by_user) limit_mtu = bat_priv->mtu_set_by_user; @@ -643,12 +641,12 @@ void batadv_update_min_mtu(struct net_device *soft_iface) limit_mtu = ETH_DATA_LEN; mtu = min(mtu, limit_mtu); - dev_set_mtu(soft_iface, mtu); + dev_set_mtu(mesh_iface, mtu); /* Check if the local translate table should be cleaned up to match a * new (and smaller) MTU. */ - batadv_tt_local_resize_to_mtu(soft_iface); + batadv_tt_local_resize_to_mtu(mesh_iface); } static void @@ -660,7 +658,7 @@ batadv_hardif_activate_interface(struct batadv_hard_iface *hard_iface) if (hard_iface->if_status != BATADV_IF_INACTIVE) goto out; - bat_priv = netdev_priv(hard_iface->soft_iface); + bat_priv = netdev_priv(hard_iface->mesh_iface); bat_priv->algo_ops->iface.update_mac(hard_iface); hard_iface->if_status = BATADV_IF_TO_BE_ACTIVATED; @@ -672,10 +670,10 @@ batadv_hardif_activate_interface(struct batadv_hard_iface *hard_iface) if (!primary_if) batadv_primary_if_select(bat_priv, hard_iface); - batadv_info(hard_iface->soft_iface, "Interface activated: %s\n", + batadv_info(hard_iface->mesh_iface, "Interface activated: %s\n", hard_iface->net_dev->name); - batadv_update_min_mtu(hard_iface->soft_iface); + batadv_update_min_mtu(hard_iface->mesh_iface); if (bat_priv->algo_ops->iface.activate) bat_priv->algo_ops->iface.activate(hard_iface); @@ -693,21 +691,21 @@ batadv_hardif_deactivate_interface(struct batadv_hard_iface *hard_iface) hard_iface->if_status = BATADV_IF_INACTIVE; - batadv_info(hard_iface->soft_iface, "Interface deactivated: %s\n", + batadv_info(hard_iface->mesh_iface, "Interface deactivated: %s\n", hard_iface->net_dev->name); - batadv_update_min_mtu(hard_iface->soft_iface); + batadv_update_min_mtu(hard_iface->mesh_iface); } /** - * batadv_hardif_enable_interface() - Enslave hard interface to soft interface - * @hard_iface: hard interface to add to soft interface - * @soft_iface: netdev struct of the mesh interface + * batadv_hardif_enable_interface() - Enslave hard interface to mesh interface + * @hard_iface: hard interface to add to mesh interface + * @mesh_iface: netdev struct of the mesh interface * * Return: 0 on success or negative error number in case of failure */ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface, - struct net_device *soft_iface) + struct net_device *mesh_iface) { struct batadv_priv *bat_priv; __be16 ethertype = htons(ETH_P_BATMAN); @@ -717,7 +715,7 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface, int ret; hardif_mtu = READ_ONCE(hard_iface->net_dev->mtu); - required_mtu = READ_ONCE(soft_iface->mtu) + max_header_len; + required_mtu = READ_ONCE(mesh_iface->mtu) + max_header_len; if (hardif_mtu < ETH_MIN_MTU + max_header_len) return -EINVAL; @@ -727,12 +725,13 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface, kref_get(&hard_iface->refcount); - dev_hold(soft_iface); - hard_iface->soft_iface = soft_iface; - bat_priv = netdev_priv(hard_iface->soft_iface); + dev_hold(mesh_iface); + netdev_hold(mesh_iface, &hard_iface->meshif_dev_tracker, GFP_ATOMIC); + hard_iface->mesh_iface = mesh_iface; + bat_priv = netdev_priv(hard_iface->mesh_iface); ret = netdev_master_upper_dev_link(hard_iface->net_dev, - soft_iface, NULL, NULL, NULL); + mesh_iface, NULL, NULL, NULL); if (ret) goto err_dev; @@ -748,19 +747,19 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface, hard_iface->batman_adv_ptype.dev = hard_iface->net_dev; dev_add_pack(&hard_iface->batman_adv_ptype); - batadv_info(hard_iface->soft_iface, "Adding interface: %s\n", + batadv_info(hard_iface->mesh_iface, "Adding interface: %s\n", hard_iface->net_dev->name); if (atomic_read(&bat_priv->fragmentation) && hardif_mtu < required_mtu) - batadv_info(hard_iface->soft_iface, + batadv_info(hard_iface->mesh_iface, "The MTU of interface %s is too small (%i) to handle the transport of batman-adv packets. Packets going over this interface will be fragmented on layer2 which could impact the performance. Setting the MTU to %i would solve the problem.\n", hard_iface->net_dev->name, hardif_mtu, required_mtu); if (!atomic_read(&bat_priv->fragmentation) && hardif_mtu < required_mtu) - batadv_info(hard_iface->soft_iface, + batadv_info(hard_iface->mesh_iface, "The MTU of interface %s is too small (%i) to handle the transport of batman-adv packets. If you experience problems getting traffic through try increasing the MTU to %i.\n", hard_iface->net_dev->name, hardif_mtu, required_mtu); @@ -768,11 +767,11 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface, if (batadv_hardif_is_iface_up(hard_iface)) batadv_hardif_activate_interface(hard_iface); else - batadv_err(hard_iface->soft_iface, + batadv_err(hard_iface->mesh_iface, "Not using interface %s (retrying later): interface not active\n", hard_iface->net_dev->name); - batadv_hardif_recalc_extra_skbroom(soft_iface); + batadv_hardif_recalc_extra_skbroom(mesh_iface); if (bat_priv->algo_ops->iface.enabled) bat_priv->algo_ops->iface.enabled(hard_iface); @@ -781,17 +780,17 @@ out: return 0; err_upper: - netdev_upper_dev_unlink(hard_iface->net_dev, soft_iface); + netdev_upper_dev_unlink(hard_iface->net_dev, mesh_iface); err_dev: - hard_iface->soft_iface = NULL; - dev_put(soft_iface); + hard_iface->mesh_iface = NULL; + netdev_put(mesh_iface, &hard_iface->meshif_dev_tracker); batadv_hardif_put(hard_iface); return ret; } /** - * batadv_hardif_cnt() - get number of interfaces enslaved to soft interface - * @soft_iface: soft interface to check + * batadv_hardif_cnt() - get number of interfaces enslaved to mesh interface + * @mesh_iface: mesh interface to check * * This function is only using RCU for locking - the result can therefore be * off when another function is modifying the list at the same time. The @@ -799,14 +798,14 @@ err_dev: * * Return: number of connected/enslaved hard interfaces */ -static size_t batadv_hardif_cnt(const struct net_device *soft_iface) +static size_t batadv_hardif_cnt(const struct net_device *mesh_iface) { struct batadv_hard_iface *hard_iface; size_t count = 0; rcu_read_lock(); list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) { - if (hard_iface->soft_iface != soft_iface) + if (hard_iface->mesh_iface != mesh_iface) continue; count++; @@ -817,12 +816,12 @@ static size_t batadv_hardif_cnt(const struct net_device *soft_iface) } /** - * batadv_hardif_disable_interface() - Remove hard interface from soft interface + * batadv_hardif_disable_interface() - Remove hard interface from mesh interface * @hard_iface: hard interface to be removed */ void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface) { - struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); + struct batadv_priv *bat_priv = netdev_priv(hard_iface->mesh_iface); struct batadv_hard_iface *primary_if = NULL; batadv_hardif_deactivate_interface(hard_iface); @@ -830,7 +829,7 @@ void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface) if (hard_iface->if_status != BATADV_IF_INACTIVE) goto out; - batadv_info(hard_iface->soft_iface, "Removing interface: %s\n", + batadv_info(hard_iface->mesh_iface, "Removing interface: %s\n", hard_iface->net_dev->name); dev_remove_pack(&hard_iface->batman_adv_ptype); batadv_hardif_put(hard_iface); @@ -839,7 +838,7 @@ void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface) if (hard_iface == primary_if) { struct batadv_hard_iface *new_if; - new_if = batadv_hardif_get_active(hard_iface->soft_iface); + new_if = batadv_hardif_get_active(hard_iface->mesh_iface); batadv_primary_if_select(bat_priv, new_if); batadv_hardif_put(new_if); @@ -851,16 +850,16 @@ void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface) /* delete all references to this hard_iface */ batadv_purge_orig_ref(bat_priv); batadv_purge_outstanding_packets(bat_priv, hard_iface); - dev_put(hard_iface->soft_iface); + netdev_put(hard_iface->mesh_iface, &hard_iface->meshif_dev_tracker); - netdev_upper_dev_unlink(hard_iface->net_dev, hard_iface->soft_iface); - batadv_hardif_recalc_extra_skbroom(hard_iface->soft_iface); + netdev_upper_dev_unlink(hard_iface->net_dev, hard_iface->mesh_iface); + batadv_hardif_recalc_extra_skbroom(hard_iface->mesh_iface); /* nobody uses this interface anymore */ - if (batadv_hardif_cnt(hard_iface->soft_iface) <= 1) + if (batadv_hardif_cnt(hard_iface->mesh_iface) <= 1) batadv_gw_check_client_stop(bat_priv); - hard_iface->soft_iface = NULL; + hard_iface->mesh_iface = NULL; batadv_hardif_put(hard_iface); out: @@ -875,16 +874,16 @@ batadv_hardif_add_interface(struct net_device *net_dev) ASSERT_RTNL(); if (!batadv_is_valid_iface(net_dev)) - goto out; - - dev_hold(net_dev); + return NULL; hard_iface = kzalloc(sizeof(*hard_iface), GFP_ATOMIC); if (!hard_iface) - goto release_dev; + return NULL; + netdev_hold(net_dev, &hard_iface->dev_tracker, GFP_ATOMIC); hard_iface->net_dev = net_dev; - hard_iface->soft_iface = NULL; + + hard_iface->mesh_iface = NULL; hard_iface->if_status = BATADV_IF_NOT_IN_USE; INIT_LIST_HEAD(&hard_iface->list); @@ -909,11 +908,6 @@ batadv_hardif_add_interface(struct net_device *net_dev) batadv_hardif_generation++; return hard_iface; - -release_dev: - dev_put(net_dev); -out: - return NULL; } static void batadv_hardif_remove_interface(struct batadv_hard_iface *hard_iface) @@ -932,13 +926,13 @@ static void batadv_hardif_remove_interface(struct batadv_hard_iface *hard_iface) } /** - * batadv_hard_if_event_softif() - Handle events for soft interfaces + * batadv_hard_if_event_meshif() - Handle events for mesh interfaces * @event: NETDEV_* event to handle * @net_dev: net_device which generated an event * * Return: NOTIFY_* result */ -static int batadv_hard_if_event_softif(unsigned long event, +static int batadv_hard_if_event_meshif(unsigned long event, struct net_device *net_dev) { struct batadv_priv *bat_priv; @@ -946,7 +940,7 @@ static int batadv_hard_if_event_softif(unsigned long event, switch (event) { case NETDEV_REGISTER: bat_priv = netdev_priv(net_dev); - batadv_softif_create_vlan(bat_priv, BATADV_NO_FLAGS); + batadv_meshif_create_vlan(bat_priv, BATADV_NO_FLAGS); break; } @@ -961,8 +955,8 @@ static int batadv_hard_if_event(struct notifier_block *this, struct batadv_hard_iface *primary_if = NULL; struct batadv_priv *bat_priv; - if (batadv_softif_is_valid(net_dev)) - return batadv_hard_if_event_softif(event, net_dev); + if (batadv_meshif_is_valid(net_dev)) + return batadv_hard_if_event_meshif(event, net_dev); hard_iface = batadv_hardif_get_by_netdev(net_dev); if (!hard_iface && (event == NETDEV_REGISTER || @@ -988,8 +982,8 @@ static int batadv_hard_if_event(struct notifier_block *this, batadv_hardif_remove_interface(hard_iface); break; case NETDEV_CHANGEMTU: - if (hard_iface->soft_iface) - batadv_update_min_mtu(hard_iface->soft_iface); + if (hard_iface->mesh_iface) + batadv_update_min_mtu(hard_iface->mesh_iface); break; case NETDEV_CHANGEADDR: if (hard_iface->if_status == BATADV_IF_NOT_IN_USE) @@ -997,7 +991,7 @@ static int batadv_hard_if_event(struct notifier_block *this, batadv_check_known_mac_addr(hard_iface->net_dev); - bat_priv = netdev_priv(hard_iface->soft_iface); + bat_priv = netdev_priv(hard_iface->mesh_iface); bat_priv->algo_ops->iface.update_mac(hard_iface); primary_if = batadv_primary_if_get_selected(bat_priv); diff --git a/net/batman-adv/hard-interface.h b/net/batman-adv/hard-interface.h index 64f660dbbe54..262a78364742 100644 --- a/net/batman-adv/hard-interface.h +++ b/net/batman-adv/hard-interface.h @@ -23,12 +23,12 @@ enum batadv_hard_if_state { /** * @BATADV_IF_NOT_IN_USE: interface is not used as slave interface of a - * batman-adv soft interface + * batman-adv mesh interface */ BATADV_IF_NOT_IN_USE, /** - * @BATADV_IF_TO_BE_REMOVED: interface will be removed from soft + * @BATADV_IF_TO_BE_REMOVED: interface will be removed from mesh * interface */ BATADV_IF_TO_BE_REMOVED, @@ -74,10 +74,10 @@ bool batadv_is_wifi_hardif(struct batadv_hard_iface *hard_iface); struct batadv_hard_iface* batadv_hardif_get_by_netdev(const struct net_device *net_dev); int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface, - struct net_device *soft_iface); + struct net_device *mesh_iface); void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface); -int batadv_hardif_min_mtu(struct net_device *soft_iface); -void batadv_update_min_mtu(struct net_device *soft_iface); +int batadv_hardif_min_mtu(struct net_device *mesh_iface); +void batadv_update_min_mtu(struct net_device *mesh_iface); void batadv_hardif_release(struct kref *ref); int batadv_hardif_no_broadcast(struct batadv_hard_iface *if_outgoing, u8 *orig_addr, u8 *orig_neigh); @@ -97,7 +97,7 @@ static inline void batadv_hardif_put(struct batadv_hard_iface *hard_iface) /** * batadv_primary_if_get_selected() - Get reference to primary interface - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * * Return: primary interface (with increased refcnt), otherwise NULL */ diff --git a/net/batman-adv/log.c b/net/batman-adv/log.c index 7a93a1e94c40..c19d07eeb070 100644 --- a/net/batman-adv/log.c +++ b/net/batman-adv/log.c @@ -13,7 +13,7 @@ /** * batadv_debug_log() - Add debug log entry - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @fmt: format string * * Return: 0 on success or negative error number in case of failure diff --git a/net/batman-adv/log.h b/net/batman-adv/log.h index 6717c965f0fa..567afaa8df99 100644 --- a/net/batman-adv/log.h +++ b/net/batman-adv/log.h @@ -71,7 +71,7 @@ __printf(2, 3); /** * _batadv_dbg() - Store debug output with(out) rate limiting * @type: type of debug message - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @ratelimited: whether output should be rate limited * @fmt: format string * @arg: variable arguments @@ -97,7 +97,7 @@ static inline void _batadv_dbg(int type __always_unused, /** * batadv_dbg() - Store debug output without rate limiting * @type: type of debug message - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @arg: format string and variable arguments */ #define batadv_dbg(type, bat_priv, arg...) \ @@ -106,7 +106,7 @@ static inline void _batadv_dbg(int type __always_unused, /** * batadv_dbg_ratelimited() - Store debug output with rate limiting * @type: type of debug message - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @arg: format string and variable arguments */ #define batadv_dbg_ratelimited(type, bat_priv, arg...) \ @@ -114,7 +114,7 @@ static inline void _batadv_dbg(int type __always_unused, /** * batadv_info() - Store message in debug buffer and print it to kmsg buffer - * @net_dev: the soft interface net device + * @net_dev: the mesh interface net device * @fmt: format string * @arg: variable arguments */ @@ -128,7 +128,7 @@ static inline void _batadv_dbg(int type __always_unused, /** * batadv_err() - Store error in debug buffer and print it to kmsg buffer - * @net_dev: the soft interface net device + * @net_dev: the mesh interface net device * @fmt: format string * @arg: variable arguments */ diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c index 333e947afcce..a08132888a3d 100644 --- a/net/batman-adv/main.c +++ b/net/batman-adv/main.c @@ -51,13 +51,13 @@ #include "gateway_common.h" #include "hard-interface.h" #include "log.h" +#include "mesh-interface.h" #include "multicast.h" #include "netlink.h" #include "network-coding.h" #include "originator.h" #include "routing.h" #include "send.h" -#include "soft-interface.h" #include "tp_meter.h" #include "translation-table.h" @@ -143,14 +143,14 @@ static void __exit batadv_exit(void) } /** - * batadv_mesh_init() - Initialize soft interface - * @soft_iface: netdev struct of the soft interface + * batadv_mesh_init() - Initialize mesh interface + * @mesh_iface: netdev struct of the mesh interface * * Return: 0 on success or negative error number in case of failure */ -int batadv_mesh_init(struct net_device *soft_iface) +int batadv_mesh_init(struct net_device *mesh_iface) { - struct batadv_priv *bat_priv = netdev_priv(soft_iface); + struct batadv_priv *bat_priv = netdev_priv(mesh_iface); int ret; spin_lock_init(&bat_priv->forw_bat_list_lock); @@ -167,7 +167,7 @@ int batadv_mesh_init(struct net_device *soft_iface) #endif spin_lock_init(&bat_priv->tvlv.container_list_lock); spin_lock_init(&bat_priv->tvlv.handler_list_lock); - spin_lock_init(&bat_priv->softif_vlan_list_lock); + spin_lock_init(&bat_priv->meshif_vlan_list_lock); spin_lock_init(&bat_priv->tp_list_lock); INIT_HLIST_HEAD(&bat_priv->forw_bat_list); @@ -186,7 +186,7 @@ int batadv_mesh_init(struct net_device *soft_iface) #endif INIT_HLIST_HEAD(&bat_priv->tvlv.container_list); INIT_HLIST_HEAD(&bat_priv->tvlv.handler_list); - INIT_HLIST_HEAD(&bat_priv->softif_vlan_list); + INIT_HLIST_HEAD(&bat_priv->meshif_vlan_list); INIT_HLIST_HEAD(&bat_priv->tp_list); bat_priv->gw.generation = 0; @@ -253,12 +253,12 @@ err_orig: } /** - * batadv_mesh_free() - Deinitialize soft interface - * @soft_iface: netdev struct of the soft interface + * batadv_mesh_free() - Deinitialize mesh interface + * @mesh_iface: netdev struct of the mesh interface */ -void batadv_mesh_free(struct net_device *soft_iface) +void batadv_mesh_free(struct net_device *mesh_iface) { - struct batadv_priv *bat_priv = netdev_priv(soft_iface); + struct batadv_priv *bat_priv = netdev_priv(mesh_iface); atomic_set(&bat_priv->mesh_state, BATADV_MESH_DEACTIVATING); @@ -297,7 +297,7 @@ void batadv_mesh_free(struct net_device *soft_iface) /** * batadv_is_my_mac() - check if the given mac address belongs to any of the * real interfaces in the current mesh - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @addr: the address to check * * Return: 'true' if the mac address was found, false otherwise. @@ -312,7 +312,7 @@ bool batadv_is_my_mac(struct batadv_priv *bat_priv, const u8 *addr) if (hard_iface->if_status != BATADV_IF_ACTIVE) continue; - if (hard_iface->soft_iface != bat_priv->soft_iface) + if (hard_iface->mesh_iface != bat_priv->mesh_iface) continue; if (batadv_compare_eth(hard_iface->net_dev->dev_addr, addr)) { @@ -457,10 +457,10 @@ int batadv_batman_skb_recv(struct sk_buff *skb, struct net_device *dev, if (unlikely(skb->mac_len != ETH_HLEN || !skb_mac_header(skb))) goto err_free; - if (!hard_iface->soft_iface) + if (!hard_iface->mesh_iface) goto err_free; - bat_priv = netdev_priv(hard_iface->soft_iface); + bat_priv = netdev_priv(hard_iface->mesh_iface); if (atomic_read(&bat_priv->mesh_state) != BATADV_MESH_ACTIVE) goto err_free; @@ -651,7 +651,7 @@ unsigned short batadv_get_vid(struct sk_buff *skb, size_t header_len) /** * batadv_vlan_ap_isola_get() - return AP isolation status for the given vlan - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @vid: the VLAN identifier for which the AP isolation attributed as to be * looked up * @@ -661,15 +661,15 @@ unsigned short batadv_get_vid(struct sk_buff *skb, size_t header_len) bool batadv_vlan_ap_isola_get(struct batadv_priv *bat_priv, unsigned short vid) { bool ap_isolation_enabled = false; - struct batadv_softif_vlan *vlan; + struct batadv_meshif_vlan *vlan; /* if the AP isolation is requested on a VLAN, then check for its * setting in the proper VLAN private data structure */ - vlan = batadv_softif_vlan_get(bat_priv, vid); + vlan = batadv_meshif_vlan_get(bat_priv, vid); if (vlan) { ap_isolation_enabled = atomic_read(&vlan->ap_isolation); - batadv_softif_vlan_put(vlan); + batadv_meshif_vlan_put(vlan); } return ap_isolation_enabled; @@ -677,7 +677,7 @@ bool batadv_vlan_ap_isola_get(struct batadv_priv *bat_priv, unsigned short vid) /** * batadv_throw_uevent() - Send an uevent with batman-adv specific env data - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @type: subsystem type of event. Stored in uevent's BATTYPE * @action: action type of event. Stored in uevent's BATACTION * @data: string with additional information to the event (ignored for @@ -692,7 +692,7 @@ int batadv_throw_uevent(struct batadv_priv *bat_priv, enum batadv_uev_type type, struct kobject *bat_kobj; char *uevent_env[4] = { NULL, NULL, NULL, NULL }; - bat_kobj = &bat_priv->soft_iface->dev.kobj; + bat_kobj = &bat_priv->mesh_iface->dev.kobj; uevent_env[0] = kasprintf(GFP_ATOMIC, "%s%s", BATADV_UEV_TYPE_VAR, diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h index 964f3088af5b..67af435ee04e 100644 --- a/net/batman-adv/main.h +++ b/net/batman-adv/main.h @@ -13,7 +13,7 @@ #define BATADV_DRIVER_DEVICE "batman-adv" #ifndef BATADV_SOURCE_VERSION -#define BATADV_SOURCE_VERSION "2025.0" +#define BATADV_SOURCE_VERSION "2025.1" #endif /* B.A.T.M.A.N. parameters */ @@ -22,6 +22,8 @@ #define BATADV_THROUGHPUT_MAX_VALUE 0xFFFFFFFF #define BATADV_JITTER 20 +#define BATADV_MAX_MTU (ETH_MAX_MTU - batadv_max_header_len()) + /* Time To Live of broadcast messages */ #define BATADV_TTL 50 @@ -102,9 +104,7 @@ */ #define BATADV_TQ_SIMILARITY_THRESHOLD 50 -/* should not be bigger than 512 bytes or change the size of - * forw_packet->direct_link_flags - */ +#define BATADV_MAX_AGGREGATION_PACKETS 32 #define BATADV_MAX_AGGREGATION_BYTES 512 #define BATADV_MAX_AGGREGATION_MS 100 @@ -129,10 +129,10 @@ #define BATADV_TP_MAX_NUM 5 /** - * enum batadv_mesh_state - State of a soft interface + * enum batadv_mesh_state - State of a mesh interface */ enum batadv_mesh_state { - /** @BATADV_MESH_INACTIVE: soft interface is not yet running */ + /** @BATADV_MESH_INACTIVE: mesh interface is not yet running */ BATADV_MESH_INACTIVE, /** @BATADV_MESH_ACTIVE: interface is up and running */ @@ -238,8 +238,8 @@ extern unsigned int batadv_hardif_generation; extern unsigned char batadv_broadcast_addr[]; extern struct workqueue_struct *batadv_event_workqueue; -int batadv_mesh_init(struct net_device *soft_iface); -void batadv_mesh_free(struct net_device *soft_iface); +int batadv_mesh_init(struct net_device *mesh_iface); +void batadv_mesh_free(struct net_device *mesh_iface); bool batadv_is_my_mac(struct batadv_priv *bat_priv, const u8 *addr); int batadv_max_header_len(void); void batadv_skb_set_priority(struct sk_buff *skb, int offset); @@ -345,8 +345,8 @@ static inline bool batadv_has_timed_out(unsigned long timestamp, #define batadv_seq_after(x, y) batadv_seq_before(y, x) /** - * batadv_add_counter() - Add to per cpu statistics counter of soft interface - * @bat_priv: the bat priv with all the soft interface information + * batadv_add_counter() - Add to per cpu statistics counter of mesh interface + * @bat_priv: the bat priv with all the mesh interface information * @idx: counter index which should be modified * @count: value to increase counter by * @@ -359,8 +359,8 @@ static inline void batadv_add_counter(struct batadv_priv *bat_priv, size_t idx, } /** - * batadv_inc_counter() - Increase per cpu statistics counter of soft interface - * @b: the bat priv with all the soft interface information + * batadv_inc_counter() - Increase per cpu statistics counter of mesh interface + * @b: the bat priv with all the mesh interface information * @i: counter index which should be modified */ #define batadv_inc_counter(b, i) batadv_add_counter(b, i, 1) diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/mesh-interface.c index 822d788a5f86..59e7b5aacbc9 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/mesh-interface.c @@ -4,7 +4,7 @@ * Marek Lindner, Simon Wunderlich */ -#include "soft-interface.h" +#include "mesh-interface.h" #include "main.h" #include <linux/atomic.h> @@ -91,7 +91,7 @@ static int batadv_interface_release(struct net_device *dev) /** * batadv_sum_counter() - Sum the cpu-local counters for index 'idx' - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @idx: index of counter to sum up * * Return: sum of all cpu-local counters @@ -125,7 +125,7 @@ static struct net_device_stats *batadv_interface_stats(struct net_device *dev) static int batadv_interface_set_mac_addr(struct net_device *dev, void *p) { struct batadv_priv *bat_priv = netdev_priv(dev); - struct batadv_softif_vlan *vlan; + struct batadv_meshif_vlan *vlan; struct sockaddr *addr = p; u8 old_addr[ETH_ALEN]; @@ -140,7 +140,7 @@ static int batadv_interface_set_mac_addr(struct net_device *dev, void *p) return 0; rcu_read_lock(); - hlist_for_each_entry_rcu(vlan, &bat_priv->softif_vlan_list, list) { + hlist_for_each_entry_rcu(vlan, &bat_priv->meshif_vlan_list, list) { batadv_tt_local_remove(bat_priv, old_addr, vlan->vid, "mac address changed", false); batadv_tt_local_add(dev, addr->sa_data, vlan->vid, @@ -170,7 +170,7 @@ static int batadv_interface_change_mtu(struct net_device *dev, int new_mtu) * @dev: registered network device to modify * * We do not actually need to set any rx filters for the virtual batman - * soft interface. However a dummy handler enables a user to set static + * mesh interface. However a dummy handler enables a user to set static * multicast listeners for instance. */ static void batadv_interface_set_rx_mode(struct net_device *dev) @@ -178,10 +178,10 @@ static void batadv_interface_set_rx_mode(struct net_device *dev) } static netdev_tx_t batadv_interface_tx(struct sk_buff *skb, - struct net_device *soft_iface) + struct net_device *mesh_iface) { struct ethhdr *ethhdr; - struct batadv_priv *bat_priv = netdev_priv(soft_iface); + struct batadv_priv *bat_priv = netdev_priv(mesh_iface); struct batadv_hard_iface *primary_if = NULL; struct batadv_bcast_packet *bcast_packet; static const u8 stp_addr[ETH_ALEN] = {0x01, 0x80, 0xC2, 0x00, @@ -209,7 +209,7 @@ static netdev_tx_t batadv_interface_tx(struct sk_buff *skb, /* reset control block to avoid left overs from previous users */ memset(skb->cb, 0, sizeof(struct batadv_skb_cb)); - netif_trans_update(soft_iface); + netif_trans_update(mesh_iface); vid = batadv_get_vid(skb, 0); skb_reset_mac_header(skb); @@ -246,7 +246,7 @@ static netdev_tx_t batadv_interface_tx(struct sk_buff *skb, /* Register the client MAC in the transtable */ if (!is_multicast_ether_addr(ethhdr->h_source) && !batadv_bla_is_loopdetect_mac(ethhdr->h_source)) { - client_added = batadv_tt_local_add(soft_iface, ethhdr->h_source, + client_added = batadv_tt_local_add(mesh_iface, ethhdr->h_source, vid, skb->skb_iif, skb->mark); if (!client_added) @@ -397,12 +397,12 @@ end: /** * batadv_interface_rx() - receive ethernet frame on local batman-adv interface - * @soft_iface: local interface which will receive the ethernet frame - * @skb: ethernet frame for @soft_iface + * @mesh_iface: local interface which will receive the ethernet frame + * @skb: ethernet frame for @mesh_iface * @hdr_size: size of already parsed batman-adv header * @orig_node: originator from which the batman-adv packet was sent * - * Sends an ethernet frame to the receive path of the local @soft_iface. + * Sends an ethernet frame to the receive path of the local @mesh_iface. * skb->data has still point to the batman-adv header with the size @hdr_size. * The caller has to have parsed this header already and made sure that at least * @hdr_size bytes are still available for pull in @skb. @@ -412,12 +412,12 @@ end: * unicast packets will be dropped directly when it was sent between two * isolated clients. */ -void batadv_interface_rx(struct net_device *soft_iface, +void batadv_interface_rx(struct net_device *mesh_iface, struct sk_buff *skb, int hdr_size, struct batadv_orig_node *orig_node) { struct batadv_bcast_packet *batadv_bcast_packet; - struct batadv_priv *bat_priv = netdev_priv(soft_iface); + struct batadv_priv *bat_priv = netdev_priv(mesh_iface); struct vlan_ethhdr *vhdr; struct ethhdr *ethhdr; unsigned short vid; @@ -457,7 +457,7 @@ void batadv_interface_rx(struct net_device *soft_iface, } /* skb->dev & skb->pkt_type are set here */ - skb->protocol = eth_type_trans(skb, soft_iface); + skb->protocol = eth_type_trans(skb, mesh_iface); skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); batadv_inc_counter(bat_priv, BATADV_CNT_RX); @@ -502,38 +502,38 @@ out: } /** - * batadv_softif_vlan_release() - release vlan from lists and queue for free + * batadv_meshif_vlan_release() - release vlan from lists and queue for free * after rcu grace period * @ref: kref pointer of the vlan object */ -void batadv_softif_vlan_release(struct kref *ref) +void batadv_meshif_vlan_release(struct kref *ref) { - struct batadv_softif_vlan *vlan; + struct batadv_meshif_vlan *vlan; - vlan = container_of(ref, struct batadv_softif_vlan, refcount); + vlan = container_of(ref, struct batadv_meshif_vlan, refcount); - spin_lock_bh(&vlan->bat_priv->softif_vlan_list_lock); + spin_lock_bh(&vlan->bat_priv->meshif_vlan_list_lock); hlist_del_rcu(&vlan->list); - spin_unlock_bh(&vlan->bat_priv->softif_vlan_list_lock); + spin_unlock_bh(&vlan->bat_priv->meshif_vlan_list_lock); kfree_rcu(vlan, rcu); } /** - * batadv_softif_vlan_get() - get the vlan object for a specific vid - * @bat_priv: the bat priv with all the soft interface information + * batadv_meshif_vlan_get() - get the vlan object for a specific vid + * @bat_priv: the bat priv with all the mesh interface information * @vid: the identifier of the vlan object to retrieve * * Return: the private data of the vlan matching the vid passed as argument or * NULL otherwise. The refcounter of the returned object is incremented by 1. */ -struct batadv_softif_vlan *batadv_softif_vlan_get(struct batadv_priv *bat_priv, +struct batadv_meshif_vlan *batadv_meshif_vlan_get(struct batadv_priv *bat_priv, unsigned short vid) { - struct batadv_softif_vlan *vlan_tmp, *vlan = NULL; + struct batadv_meshif_vlan *vlan_tmp, *vlan = NULL; rcu_read_lock(); - hlist_for_each_entry_rcu(vlan_tmp, &bat_priv->softif_vlan_list, list) { + hlist_for_each_entry_rcu(vlan_tmp, &bat_priv->meshif_vlan_list, list) { if (vlan_tmp->vid != vid) continue; @@ -549,28 +549,28 @@ struct batadv_softif_vlan *batadv_softif_vlan_get(struct batadv_priv *bat_priv, } /** - * batadv_softif_create_vlan() - allocate the needed resources for a new vlan - * @bat_priv: the bat priv with all the soft interface information + * batadv_meshif_create_vlan() - allocate the needed resources for a new vlan + * @bat_priv: the bat priv with all the mesh interface information * @vid: the VLAN identifier * * Return: 0 on success, a negative error otherwise. */ -int batadv_softif_create_vlan(struct batadv_priv *bat_priv, unsigned short vid) +int batadv_meshif_create_vlan(struct batadv_priv *bat_priv, unsigned short vid) { - struct batadv_softif_vlan *vlan; + struct batadv_meshif_vlan *vlan; - spin_lock_bh(&bat_priv->softif_vlan_list_lock); + spin_lock_bh(&bat_priv->meshif_vlan_list_lock); - vlan = batadv_softif_vlan_get(bat_priv, vid); + vlan = batadv_meshif_vlan_get(bat_priv, vid); if (vlan) { - batadv_softif_vlan_put(vlan); - spin_unlock_bh(&bat_priv->softif_vlan_list_lock); + batadv_meshif_vlan_put(vlan); + spin_unlock_bh(&bat_priv->meshif_vlan_list_lock); return -EEXIST; } vlan = kzalloc(sizeof(*vlan), GFP_ATOMIC); if (!vlan) { - spin_unlock_bh(&bat_priv->softif_vlan_list_lock); + spin_unlock_bh(&bat_priv->meshif_vlan_list_lock); return -ENOMEM; } @@ -581,37 +581,37 @@ int batadv_softif_create_vlan(struct batadv_priv *bat_priv, unsigned short vid) atomic_set(&vlan->ap_isolation, 0); kref_get(&vlan->refcount); - hlist_add_head_rcu(&vlan->list, &bat_priv->softif_vlan_list); - spin_unlock_bh(&bat_priv->softif_vlan_list_lock); + hlist_add_head_rcu(&vlan->list, &bat_priv->meshif_vlan_list); + spin_unlock_bh(&bat_priv->meshif_vlan_list_lock); /* add a new TT local entry. This one will be marked with the NOPURGE * flag */ - batadv_tt_local_add(bat_priv->soft_iface, - bat_priv->soft_iface->dev_addr, vid, + batadv_tt_local_add(bat_priv->mesh_iface, + bat_priv->mesh_iface->dev_addr, vid, BATADV_NULL_IFINDEX, BATADV_NO_MARK); - /* don't return reference to new softif_vlan */ - batadv_softif_vlan_put(vlan); + /* don't return reference to new meshif_vlan */ + batadv_meshif_vlan_put(vlan); return 0; } /** - * batadv_softif_destroy_vlan() - remove and destroy a softif_vlan object - * @bat_priv: the bat priv with all the soft interface information + * batadv_meshif_destroy_vlan() - remove and destroy a meshif_vlan object + * @bat_priv: the bat priv with all the mesh interface information * @vlan: the object to remove */ -static void batadv_softif_destroy_vlan(struct batadv_priv *bat_priv, - struct batadv_softif_vlan *vlan) +static void batadv_meshif_destroy_vlan(struct batadv_priv *bat_priv, + struct batadv_meshif_vlan *vlan) { /* explicitly remove the associated TT local entry because it is marked * with the NOPURGE flag */ - batadv_tt_local_remove(bat_priv, bat_priv->soft_iface->dev_addr, + batadv_tt_local_remove(bat_priv, bat_priv->mesh_iface->dev_addr, vlan->vid, "vlan interface destroyed", false); - batadv_softif_vlan_put(vlan); + batadv_meshif_vlan_put(vlan); } /** @@ -629,7 +629,7 @@ static int batadv_interface_add_vid(struct net_device *dev, __be16 proto, unsigned short vid) { struct batadv_priv *bat_priv = netdev_priv(dev); - struct batadv_softif_vlan *vlan; + struct batadv_meshif_vlan *vlan; /* only 802.1Q vlans are supported. * batman-adv does not know how to handle other types @@ -648,21 +648,21 @@ static int batadv_interface_add_vid(struct net_device *dev, __be16 proto, vid |= BATADV_VLAN_HAS_TAG; /* if a new vlan is getting created and it already exists, it means that - * it was not deleted yet. batadv_softif_vlan_get() increases the + * it was not deleted yet. batadv_meshif_vlan_get() increases the * refcount in order to revive the object. * * if it does not exist then create it. */ - vlan = batadv_softif_vlan_get(bat_priv, vid); + vlan = batadv_meshif_vlan_get(bat_priv, vid); if (!vlan) - return batadv_softif_create_vlan(bat_priv, vid); + return batadv_meshif_create_vlan(bat_priv, vid); /* add a new TT local entry. This one will be marked with the NOPURGE * flag. This must be added again, even if the vlan object already * exists, because the entry was deleted by kill_vid() */ - batadv_tt_local_add(bat_priv->soft_iface, - bat_priv->soft_iface->dev_addr, vid, + batadv_tt_local_add(bat_priv->mesh_iface, + bat_priv->mesh_iface->dev_addr, vid, BATADV_NULL_IFINDEX, BATADV_NO_MARK); return 0; @@ -684,7 +684,7 @@ static int batadv_interface_kill_vid(struct net_device *dev, __be16 proto, unsigned short vid) { struct batadv_priv *bat_priv = netdev_priv(dev); - struct batadv_softif_vlan *vlan; + struct batadv_meshif_vlan *vlan; /* only 802.1Q vlans are supported. batman-adv does not know how to * handle other types @@ -693,19 +693,19 @@ static int batadv_interface_kill_vid(struct net_device *dev, __be16 proto, return -EINVAL; /* "priority tag" frames are handled like "untagged" frames - * and no softif_vlan needs to be destroyed + * and no meshif_vlan needs to be destroyed */ if (vid == 0) return 0; - vlan = batadv_softif_vlan_get(bat_priv, vid | BATADV_VLAN_HAS_TAG); + vlan = batadv_meshif_vlan_get(bat_priv, vid | BATADV_VLAN_HAS_TAG); if (!vlan) return -ENOENT; - batadv_softif_destroy_vlan(bat_priv, vlan); + batadv_meshif_destroy_vlan(bat_priv, vlan); /* finally free the vlan object */ - batadv_softif_vlan_put(vlan); + batadv_meshif_vlan_put(vlan); return 0; } @@ -741,12 +741,12 @@ static void batadv_set_lockdep_class(struct net_device *dev) } /** - * batadv_softif_init_late() - late stage initialization of soft interface + * batadv_meshif_init_late() - late stage initialization of mesh interface * @dev: registered network device to modify * * Return: error code on failures */ -static int batadv_softif_init_late(struct net_device *dev) +static int batadv_meshif_init_late(struct net_device *dev) { struct batadv_priv *bat_priv; u32 random_seqno; @@ -756,7 +756,7 @@ static int batadv_softif_init_late(struct net_device *dev) batadv_set_lockdep_class(dev); bat_priv = netdev_priv(dev); - bat_priv->soft_iface = dev; + bat_priv->mesh_iface = dev; /* batadv_interface_stats() needs to be available as soon as * register_netdevice() has been called @@ -790,7 +790,7 @@ static int batadv_softif_init_late(struct net_device *dev) atomic_set(&bat_priv->log_level, 0); #endif atomic_set(&bat_priv->fragmentation, 1); - atomic_set(&bat_priv->packet_size_max, ETH_DATA_LEN); + atomic_set(&bat_priv->packet_size_max, BATADV_MAX_MTU); atomic_set(&bat_priv->bcast_queue_left, BATADV_BCAST_QUEUE_LEN); atomic_set(&bat_priv->batman_queue_left, BATADV_BATMAN_QUEUE_LEN); @@ -837,14 +837,14 @@ free_bat_counters: } /** - * batadv_softif_slave_add() - Add a slave interface to a batadv_soft_interface - * @dev: batadv_soft_interface used as master interface + * batadv_meshif_slave_add() - Add a slave interface to a batadv_mesh_interface + * @dev: batadv_mesh_interface used as master interface * @slave_dev: net_device which should become the slave interface * @extack: extended ACK report struct * * Return: 0 if successful or error otherwise. */ -static int batadv_softif_slave_add(struct net_device *dev, +static int batadv_meshif_slave_add(struct net_device *dev, struct net_device *slave_dev, struct netlink_ext_ack *extack) { @@ -852,7 +852,7 @@ static int batadv_softif_slave_add(struct net_device *dev, int ret = -EINVAL; hard_iface = batadv_hardif_get_by_netdev(slave_dev); - if (!hard_iface || hard_iface->soft_iface) + if (!hard_iface || hard_iface->mesh_iface) goto out; ret = batadv_hardif_enable_interface(hard_iface, dev); @@ -863,13 +863,13 @@ out: } /** - * batadv_softif_slave_del() - Delete a slave iface from a batadv_soft_interface - * @dev: batadv_soft_interface used as master interface + * batadv_meshif_slave_del() - Delete a slave iface from a batadv_mesh_interface + * @dev: batadv_mesh_interface used as master interface * @slave_dev: net_device which should be removed from the master interface * * Return: 0 if successful or error otherwise. */ -static int batadv_softif_slave_del(struct net_device *dev, +static int batadv_meshif_slave_del(struct net_device *dev, struct net_device *slave_dev) { struct batadv_hard_iface *hard_iface; @@ -877,7 +877,7 @@ static int batadv_softif_slave_del(struct net_device *dev, hard_iface = batadv_hardif_get_by_netdev(slave_dev); - if (!hard_iface || hard_iface->soft_iface != dev) + if (!hard_iface || hard_iface->mesh_iface != dev) goto out; batadv_hardif_disable_interface(hard_iface); @@ -889,7 +889,7 @@ out: } static const struct net_device_ops batadv_netdev_ops = { - .ndo_init = batadv_softif_init_late, + .ndo_init = batadv_meshif_init_late, .ndo_open = batadv_interface_open, .ndo_stop = batadv_interface_release, .ndo_get_stats = batadv_interface_stats, @@ -900,8 +900,8 @@ static const struct net_device_ops batadv_netdev_ops = { .ndo_set_rx_mode = batadv_interface_set_rx_mode, .ndo_start_xmit = batadv_interface_tx, .ndo_validate_addr = eth_validate_addr, - .ndo_add_slave = batadv_softif_slave_add, - .ndo_del_slave = batadv_softif_slave_del, + .ndo_add_slave = batadv_meshif_slave_add, + .ndo_del_slave = batadv_meshif_slave_del, }; static void batadv_get_drvinfo(struct net_device *dev, @@ -1009,10 +1009,10 @@ static const struct ethtool_ops batadv_ethtool_ops = { }; /** - * batadv_softif_free() - Deconstructor of batadv_soft_interface + * batadv_meshif_free() - Deconstructor of batadv_mesh_interface * @dev: Device to cleanup and remove */ -static void batadv_softif_free(struct net_device *dev) +static void batadv_meshif_free(struct net_device *dev) { batadv_mesh_free(dev); @@ -1024,25 +1024,26 @@ static void batadv_softif_free(struct net_device *dev) } /** - * batadv_softif_init_early() - early stage initialization of soft interface + * batadv_meshif_init_early() - early stage initialization of mesh interface * @dev: registered network device to modify */ -static void batadv_softif_init_early(struct net_device *dev) +static void batadv_meshif_init_early(struct net_device *dev) { ether_setup(dev); dev->netdev_ops = &batadv_netdev_ops; dev->needs_free_netdev = true; - dev->priv_destructor = batadv_softif_free; + dev->priv_destructor = batadv_meshif_free; dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER; dev->priv_flags |= IFF_NO_QUEUE; dev->lltx = true; - dev->netns_local = true; + dev->netns_immutable = true; /* can't call min_mtu, because the needed variables * have not been initialized yet */ dev->mtu = ETH_DATA_LEN; + dev->max_mtu = BATADV_MAX_MTU; /* generate random address */ eth_hw_addr_random(dev); @@ -1051,14 +1052,14 @@ static void batadv_softif_init_early(struct net_device *dev) } /** - * batadv_softif_validate() - validate configuration of new batadv link + * batadv_meshif_validate() - validate configuration of new batadv link * @tb: IFLA_INFO_DATA netlink attributes * @data: enum batadv_ifla_attrs attributes * @extack: extended ACK report struct * * Return: 0 if successful or error otherwise. */ -static int batadv_softif_validate(struct nlattr *tb[], struct nlattr *data[], +static int batadv_meshif_validate(struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { struct batadv_algo_ops *algo_ops; @@ -1076,20 +1077,19 @@ static int batadv_softif_validate(struct nlattr *tb[], struct nlattr *data[], } /** - * batadv_softif_newlink() - pre-initialize and register new batadv link - * @src_net: the applicable net namespace + * batadv_meshif_newlink() - pre-initialize and register new batadv link * @dev: network device to register - * @tb: IFLA_INFO_DATA netlink attributes - * @data: enum batadv_ifla_attrs attributes + * @params: rtnl newlink parameters * @extack: extended ACK report struct * * Return: 0 if successful or error otherwise. */ -static int batadv_softif_newlink(struct net *src_net, struct net_device *dev, - struct nlattr *tb[], struct nlattr *data[], +static int batadv_meshif_newlink(struct net_device *dev, + struct rtnl_newlink_params *params, struct netlink_ext_ack *extack) { struct batadv_priv *bat_priv = netdev_priv(dev); + struct nlattr **data = params->data; const char *algo_name; int err; @@ -1104,40 +1104,40 @@ static int batadv_softif_newlink(struct net *src_net, struct net_device *dev, } /** - * batadv_softif_destroy_netlink() - deletion of batadv_soft_interface via + * batadv_meshif_destroy_netlink() - deletion of batadv_mesh_interface via * netlink - * @soft_iface: the to-be-removed batman-adv interface + * @mesh_iface: the to-be-removed batman-adv interface * @head: list pointer */ -static void batadv_softif_destroy_netlink(struct net_device *soft_iface, +static void batadv_meshif_destroy_netlink(struct net_device *mesh_iface, struct list_head *head) { - struct batadv_priv *bat_priv = netdev_priv(soft_iface); + struct batadv_priv *bat_priv = netdev_priv(mesh_iface); struct batadv_hard_iface *hard_iface; - struct batadv_softif_vlan *vlan; + struct batadv_meshif_vlan *vlan; list_for_each_entry(hard_iface, &batadv_hardif_list, list) { - if (hard_iface->soft_iface == soft_iface) + if (hard_iface->mesh_iface == mesh_iface) batadv_hardif_disable_interface(hard_iface); } /* destroy the "untagged" VLAN */ - vlan = batadv_softif_vlan_get(bat_priv, BATADV_NO_FLAGS); + vlan = batadv_meshif_vlan_get(bat_priv, BATADV_NO_FLAGS); if (vlan) { - batadv_softif_destroy_vlan(bat_priv, vlan); - batadv_softif_vlan_put(vlan); + batadv_meshif_destroy_vlan(bat_priv, vlan); + batadv_meshif_vlan_put(vlan); } - unregister_netdevice_queue(soft_iface, head); + unregister_netdevice_queue(mesh_iface, head); } /** - * batadv_softif_is_valid() - Check whether device is a batadv soft interface + * batadv_meshif_is_valid() - Check whether device is a batadv mesh interface * @net_dev: device which should be checked * * Return: true when net_dev is a batman-adv interface, false otherwise */ -bool batadv_softif_is_valid(const struct net_device *net_dev) +bool batadv_meshif_is_valid(const struct net_device *net_dev) { if (net_dev->netdev_ops->ndo_start_xmit == batadv_interface_tx) return true; @@ -1152,10 +1152,10 @@ static const struct nla_policy batadv_ifla_policy[IFLA_BATADV_MAX + 1] = { struct rtnl_link_ops batadv_link_ops __read_mostly = { .kind = "batadv", .priv_size = sizeof(struct batadv_priv), - .setup = batadv_softif_init_early, + .setup = batadv_meshif_init_early, .maxtype = IFLA_BATADV_MAX, .policy = batadv_ifla_policy, - .validate = batadv_softif_validate, - .newlink = batadv_softif_newlink, - .dellink = batadv_softif_destroy_netlink, + .validate = batadv_meshif_validate, + .newlink = batadv_meshif_newlink, + .dellink = batadv_meshif_destroy_netlink, }; diff --git a/net/batman-adv/soft-interface.h b/net/batman-adv/mesh-interface.h index 9f2003f1a497..7ba055b2bc26 100644 --- a/net/batman-adv/soft-interface.h +++ b/net/batman-adv/mesh-interface.h @@ -4,8 +4,8 @@ * Marek Lindner */ -#ifndef _NET_BATMAN_ADV_SOFT_INTERFACE_H_ -#define _NET_BATMAN_ADV_SOFT_INTERFACE_H_ +#ifndef _NET_BATMAN_ADV_MESH_INTERFACE_H_ +#define _NET_BATMAN_ADV_MESH_INTERFACE_H_ #include "main.h" @@ -16,27 +16,27 @@ #include <net/rtnetlink.h> int batadv_skb_head_push(struct sk_buff *skb, unsigned int len); -void batadv_interface_rx(struct net_device *soft_iface, +void batadv_interface_rx(struct net_device *mesh_iface, struct sk_buff *skb, int hdr_size, struct batadv_orig_node *orig_node); -bool batadv_softif_is_valid(const struct net_device *net_dev); +bool batadv_meshif_is_valid(const struct net_device *net_dev); extern struct rtnl_link_ops batadv_link_ops; -int batadv_softif_create_vlan(struct batadv_priv *bat_priv, unsigned short vid); -void batadv_softif_vlan_release(struct kref *ref); -struct batadv_softif_vlan *batadv_softif_vlan_get(struct batadv_priv *bat_priv, +int batadv_meshif_create_vlan(struct batadv_priv *bat_priv, unsigned short vid); +void batadv_meshif_vlan_release(struct kref *ref); +struct batadv_meshif_vlan *batadv_meshif_vlan_get(struct batadv_priv *bat_priv, unsigned short vid); /** - * batadv_softif_vlan_put() - decrease the vlan object refcounter and + * batadv_meshif_vlan_put() - decrease the vlan object refcounter and * possibly release it * @vlan: the vlan object to release */ -static inline void batadv_softif_vlan_put(struct batadv_softif_vlan *vlan) +static inline void batadv_meshif_vlan_put(struct batadv_meshif_vlan *vlan) { if (!vlan) return; - kref_put(&vlan->refcount, batadv_softif_vlan_release); + kref_put(&vlan->refcount, batadv_meshif_vlan_release); } -#endif /* _NET_BATMAN_ADV_SOFT_INTERFACE_H_ */ +#endif /* _NET_BATMAN_ADV_MESH_INTERFACE_H_ */ diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c index d95c418484fa..5786680aff30 100644 --- a/net/batman-adv/multicast.c +++ b/net/batman-adv/multicast.c @@ -63,7 +63,7 @@ static void batadv_mcast_mla_update(struct work_struct *work); /** * batadv_mcast_start_timer() - schedule the multicast periodic worker - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information */ static void batadv_mcast_start_timer(struct batadv_priv *bat_priv) { @@ -72,18 +72,18 @@ static void batadv_mcast_start_timer(struct batadv_priv *bat_priv) } /** - * batadv_mcast_get_bridge() - get the bridge on top of the softif if it exists - * @soft_iface: netdev struct of the mesh interface + * batadv_mcast_get_bridge() - get the bridge on top of the meshif if it exists + * @mesh_iface: netdev struct of the mesh interface * - * If the given soft interface has a bridge on top then the refcount + * If the given mesh interface has a bridge on top then the refcount * of the according net device is increased. * * Return: NULL if no such bridge exists. Otherwise the net device of the * bridge. */ -static struct net_device *batadv_mcast_get_bridge(struct net_device *soft_iface) +static struct net_device *batadv_mcast_get_bridge(struct net_device *mesh_iface) { - struct net_device *upper = soft_iface; + struct net_device *upper = mesh_iface; rcu_read_lock(); do { @@ -97,7 +97,7 @@ static struct net_device *batadv_mcast_get_bridge(struct net_device *soft_iface) } /** - * batadv_mcast_mla_rtr_flags_softif_get_ipv4() - get mcast router flags from + * batadv_mcast_mla_rtr_flags_meshif_get_ipv4() - get mcast router flags from * node for IPv4 * @dev: the interface to check * @@ -107,7 +107,7 @@ static struct net_device *batadv_mcast_get_bridge(struct net_device *soft_iface) * * Return: BATADV_NO_FLAGS if present, BATADV_MCAST_WANT_NO_RTR4 otherwise. */ -static u8 batadv_mcast_mla_rtr_flags_softif_get_ipv4(struct net_device *dev) +static u8 batadv_mcast_mla_rtr_flags_meshif_get_ipv4(struct net_device *dev) { struct in_device *in_dev = __in_dev_get_rcu(dev); @@ -118,7 +118,7 @@ static u8 batadv_mcast_mla_rtr_flags_softif_get_ipv4(struct net_device *dev) } /** - * batadv_mcast_mla_rtr_flags_softif_get_ipv6() - get mcast router flags from + * batadv_mcast_mla_rtr_flags_meshif_get_ipv6() - get mcast router flags from * node for IPv6 * @dev: the interface to check * @@ -129,7 +129,7 @@ static u8 batadv_mcast_mla_rtr_flags_softif_get_ipv4(struct net_device *dev) * Return: BATADV_NO_FLAGS if present, BATADV_MCAST_WANT_NO_RTR6 otherwise. */ #if IS_ENABLED(CONFIG_IPV6_MROUTE) -static u8 batadv_mcast_mla_rtr_flags_softif_get_ipv6(struct net_device *dev) +static u8 batadv_mcast_mla_rtr_flags_meshif_get_ipv6(struct net_device *dev) { struct inet6_dev *in6_dev = __in6_dev_get(dev); @@ -140,16 +140,16 @@ static u8 batadv_mcast_mla_rtr_flags_softif_get_ipv6(struct net_device *dev) } #else static inline u8 -batadv_mcast_mla_rtr_flags_softif_get_ipv6(struct net_device *dev) +batadv_mcast_mla_rtr_flags_meshif_get_ipv6(struct net_device *dev) { return BATADV_MCAST_WANT_NO_RTR6; } #endif /** - * batadv_mcast_mla_rtr_flags_softif_get() - get mcast router flags from node - * @bat_priv: the bat priv with all the soft interface information - * @bridge: bridge interface on top of the soft_iface if present, + * batadv_mcast_mla_rtr_flags_meshif_get() - get mcast router flags from node + * @bat_priv: the bat priv with all the mesh interface information + * @bridge: bridge interface on top of the mesh_iface if present, * otherwise pass NULL * * Checks the presence of IPv4 and IPv6 multicast routers on this @@ -161,16 +161,16 @@ batadv_mcast_mla_rtr_flags_softif_get_ipv6(struct net_device *dev) * BATADV_MCAST_WANT_NO_RTR6: No IPv6 multicast router is present * The former two OR'd: no multicast router is present */ -static u8 batadv_mcast_mla_rtr_flags_softif_get(struct batadv_priv *bat_priv, +static u8 batadv_mcast_mla_rtr_flags_meshif_get(struct batadv_priv *bat_priv, struct net_device *bridge) { - struct net_device *dev = bridge ? bridge : bat_priv->soft_iface; + struct net_device *dev = bridge ? bridge : bat_priv->mesh_iface; u8 flags = BATADV_NO_FLAGS; rcu_read_lock(); - flags |= batadv_mcast_mla_rtr_flags_softif_get_ipv4(dev); - flags |= batadv_mcast_mla_rtr_flags_softif_get_ipv6(dev); + flags |= batadv_mcast_mla_rtr_flags_meshif_get_ipv4(dev); + flags |= batadv_mcast_mla_rtr_flags_meshif_get_ipv6(dev); rcu_read_unlock(); @@ -179,8 +179,8 @@ static u8 batadv_mcast_mla_rtr_flags_softif_get(struct batadv_priv *bat_priv, /** * batadv_mcast_mla_rtr_flags_bridge_get() - get mcast router flags from bridge - * @bat_priv: the bat priv with all the soft interface information - * @bridge: bridge interface on top of the soft_iface if present, + * @bat_priv: the bat priv with all the mesh interface information + * @bridge: bridge interface on top of the mesh_iface if present, * otherwise pass NULL * * Checks the presence of IPv4 and IPv6 multicast routers behind a bridge. @@ -194,7 +194,7 @@ static u8 batadv_mcast_mla_rtr_flags_softif_get(struct batadv_priv *bat_priv, static u8 batadv_mcast_mla_rtr_flags_bridge_get(struct batadv_priv *bat_priv, struct net_device *bridge) { - struct net_device *dev = bat_priv->soft_iface; + struct net_device *dev = bat_priv->mesh_iface; u8 flags = BATADV_NO_FLAGS; if (!bridge) @@ -210,8 +210,8 @@ static u8 batadv_mcast_mla_rtr_flags_bridge_get(struct batadv_priv *bat_priv, /** * batadv_mcast_mla_rtr_flags_get() - get multicast router flags - * @bat_priv: the bat priv with all the soft interface information - * @bridge: bridge interface on top of the soft_iface if present, + * @bat_priv: the bat priv with all the mesh interface information + * @bridge: bridge interface on top of the mesh_iface if present, * otherwise pass NULL * * Checks the presence of IPv4 and IPv6 multicast routers on this @@ -228,7 +228,7 @@ static u8 batadv_mcast_mla_rtr_flags_get(struct batadv_priv *bat_priv, { u8 flags = BATADV_MCAST_WANT_NO_RTR4 | BATADV_MCAST_WANT_NO_RTR6; - flags &= batadv_mcast_mla_rtr_flags_softif_get(bat_priv, bridge); + flags &= batadv_mcast_mla_rtr_flags_meshif_get(bat_priv, bridge); flags &= batadv_mcast_mla_rtr_flags_bridge_get(bat_priv, bridge); return flags; @@ -236,7 +236,7 @@ static u8 batadv_mcast_mla_rtr_flags_get(struct batadv_priv *bat_priv, /** * batadv_mcast_mla_forw_flags_get() - get multicast forwarding flags - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * * Checks if all active hard interfaces have an MTU larger or equal to 1280 * bytes (IPv6 minimum MTU). @@ -252,7 +252,7 @@ static u8 batadv_mcast_mla_forw_flags_get(struct batadv_priv *bat_priv) if (hard_iface->if_status != BATADV_IF_ACTIVE) continue; - if (hard_iface->soft_iface != bat_priv->soft_iface) + if (hard_iface->mesh_iface != bat_priv->mesh_iface) continue; if (hard_iface->net_dev->mtu < IPV6_MIN_MTU) { @@ -267,7 +267,7 @@ static u8 batadv_mcast_mla_forw_flags_get(struct batadv_priv *bat_priv) /** * batadv_mcast_mla_flags_get() - get the new multicast flags - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * * Return: A set of flags for the current/next TVLV, querier and * bridge state. @@ -275,7 +275,7 @@ static u8 batadv_mcast_mla_forw_flags_get(struct batadv_priv *bat_priv) static struct batadv_mcast_mla_flags batadv_mcast_mla_flags_get(struct batadv_priv *bat_priv) { - struct net_device *dev = bat_priv->soft_iface; + struct net_device *dev = bat_priv->mesh_iface; struct batadv_mcast_querier_state *qr4, *qr6; struct batadv_mcast_mla_flags mla_flags; struct net_device *bridge; @@ -351,13 +351,13 @@ static bool batadv_mcast_mla_is_duplicate(u8 *mcast_addr, } /** - * batadv_mcast_mla_softif_get_ipv4() - get softif IPv4 multicast listeners + * batadv_mcast_mla_meshif_get_ipv4() - get meshif IPv4 multicast listeners * @dev: the device to collect multicast addresses from * @mcast_list: a list to put found addresses into * @flags: flags indicating the new multicast state * * Collects multicast addresses of IPv4 multicast listeners residing - * on this kernel on the given soft interface, dev, in + * on this kernel on the given mesh interface, dev, in * the given mcast_list. In general, multicast listeners provided by * your multicast receiving applications run directly on this node. * @@ -365,7 +365,7 @@ static bool batadv_mcast_mla_is_duplicate(u8 *mcast_addr, * items added to the mcast_list otherwise. */ static int -batadv_mcast_mla_softif_get_ipv4(struct net_device *dev, +batadv_mcast_mla_meshif_get_ipv4(struct net_device *dev, struct hlist_head *mcast_list, struct batadv_mcast_mla_flags *flags) { @@ -417,13 +417,13 @@ batadv_mcast_mla_softif_get_ipv4(struct net_device *dev, } /** - * batadv_mcast_mla_softif_get_ipv6() - get softif IPv6 multicast listeners + * batadv_mcast_mla_meshif_get_ipv6() - get meshif IPv6 multicast listeners * @dev: the device to collect multicast addresses from * @mcast_list: a list to put found addresses into * @flags: flags indicating the new multicast state * * Collects multicast addresses of IPv6 multicast listeners residing - * on this kernel on the given soft interface, dev, in + * on this kernel on the given mesh interface, dev, in * the given mcast_list. In general, multicast listeners provided by * your multicast receiving applications run directly on this node. * @@ -432,7 +432,7 @@ batadv_mcast_mla_softif_get_ipv4(struct net_device *dev, */ #if IS_ENABLED(CONFIG_IPV6) static int -batadv_mcast_mla_softif_get_ipv6(struct net_device *dev, +batadv_mcast_mla_meshif_get_ipv6(struct net_device *dev, struct hlist_head *mcast_list, struct batadv_mcast_mla_flags *flags) { @@ -490,7 +490,7 @@ batadv_mcast_mla_softif_get_ipv6(struct net_device *dev, } #else static inline int -batadv_mcast_mla_softif_get_ipv6(struct net_device *dev, +batadv_mcast_mla_meshif_get_ipv6(struct net_device *dev, struct hlist_head *mcast_list, struct batadv_mcast_mla_flags *flags) { @@ -499,13 +499,13 @@ batadv_mcast_mla_softif_get_ipv6(struct net_device *dev, #endif /** - * batadv_mcast_mla_softif_get() - get softif multicast listeners + * batadv_mcast_mla_meshif_get() - get meshif multicast listeners * @dev: the device to collect multicast addresses from * @mcast_list: a list to put found addresses into * @flags: flags indicating the new multicast state * * Collects multicast addresses of multicast listeners residing - * on this kernel on the given soft interface, dev, in + * on this kernel on the given mesh interface, dev, in * the given mcast_list. In general, multicast listeners provided by * your multicast receiving applications run directly on this node. * @@ -518,7 +518,7 @@ batadv_mcast_mla_softif_get_ipv6(struct net_device *dev, * items added to the mcast_list otherwise. */ static int -batadv_mcast_mla_softif_get(struct net_device *dev, +batadv_mcast_mla_meshif_get(struct net_device *dev, struct hlist_head *mcast_list, struct batadv_mcast_mla_flags *flags) { @@ -528,11 +528,11 @@ batadv_mcast_mla_softif_get(struct net_device *dev, if (bridge) dev = bridge; - ret4 = batadv_mcast_mla_softif_get_ipv4(dev, mcast_list, flags); + ret4 = batadv_mcast_mla_meshif_get_ipv4(dev, mcast_list, flags); if (ret4 < 0) goto out; - ret6 = batadv_mcast_mla_softif_get_ipv6(dev, mcast_list, flags); + ret6 = batadv_mcast_mla_meshif_get_ipv6(dev, mcast_list, flags); if (ret6 < 0) { ret4 = 0; goto out; @@ -576,7 +576,7 @@ static void batadv_mcast_mla_br_addr_cpy(char *dst, const struct br_ip *src) * * Collects multicast addresses of multicast listeners residing * on foreign, non-mesh devices which we gave access to our mesh via - * a bridge on top of the given soft interface, dev, in the given + * a bridge on top of the given mesh interface, dev, in the given * mcast_list. * * Return: -ENOMEM on memory allocation error or the number of @@ -672,7 +672,7 @@ static void batadv_mcast_mla_list_free(struct hlist_head *mcast_list) /** * batadv_mcast_mla_tt_retract() - clean up multicast listener announcements - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @mcast_list: a list of addresses which should _not_ be removed * * Retracts the announcement of any multicast listener from the @@ -704,7 +704,7 @@ static void batadv_mcast_mla_tt_retract(struct batadv_priv *bat_priv, /** * batadv_mcast_mla_tt_add() - add multicast listener announcements - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @mcast_list: a list of addresses which are going to get added * * Adds multicast listener announcements from the given mcast_list to the @@ -724,7 +724,7 @@ static void batadv_mcast_mla_tt_add(struct batadv_priv *bat_priv, &bat_priv->mcast.mla_list)) continue; - if (!batadv_tt_local_add(bat_priv->soft_iface, + if (!batadv_tt_local_add(bat_priv->mesh_iface, mcast_entry->addr, BATADV_NO_FLAGS, BATADV_NULL_IFINDEX, BATADV_NO_MARK)) continue; @@ -737,7 +737,7 @@ static void batadv_mcast_mla_tt_add(struct batadv_priv *bat_priv, /** * batadv_mcast_querier_log() - debug output regarding the querier status on * link - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @str_proto: a string for the querier protocol (e.g. "IGMP" or "MLD") * @old_state: the previous querier state on our link * @new_state: the new querier state on our link @@ -754,7 +754,7 @@ static void batadv_mcast_mla_tt_add(struct batadv_priv *bat_priv, * potentially shadowing listeners from us then. * * This is only interesting for nodes with a bridge on top of their - * soft interface. + * mesh interface. */ static void batadv_mcast_querier_log(struct batadv_priv *bat_priv, char *str_proto, @@ -762,14 +762,14 @@ batadv_mcast_querier_log(struct batadv_priv *bat_priv, char *str_proto, struct batadv_mcast_querier_state *new_state) { if (!old_state->exists && new_state->exists) - batadv_info(bat_priv->soft_iface, "%s Querier appeared\n", + batadv_info(bat_priv->mesh_iface, "%s Querier appeared\n", str_proto); else if (old_state->exists && !new_state->exists) - batadv_info(bat_priv->soft_iface, + batadv_info(bat_priv->mesh_iface, "%s Querier disappeared - multicast optimizations disabled\n", str_proto); else if (!bat_priv->mcast.mla_flags.bridged && !new_state->exists) - batadv_info(bat_priv->soft_iface, + batadv_info(bat_priv->mesh_iface, "No %s Querier present - multicast optimizations disabled\n", str_proto); @@ -789,7 +789,7 @@ batadv_mcast_querier_log(struct batadv_priv *bat_priv, char *str_proto, /** * batadv_mcast_bridge_log() - debug output for topology changes in bridged * setups - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @new_flags: flags indicating the new multicast state * * If no bridges are ever used on this node, then this function does nothing. @@ -798,7 +798,7 @@ batadv_mcast_querier_log(struct batadv_priv *bat_priv, char *str_proto, * which might be relevant to our multicast optimizations. * * More precisely, it outputs information when a bridge interface is added or - * removed from a soft interface. And when a bridge is present, it further + * removed from a mesh interface. And when a bridge is present, it further * outputs information about the querier state which is relevant for the * multicast flags this node is going to set. */ @@ -827,7 +827,7 @@ batadv_mcast_bridge_log(struct batadv_priv *bat_priv, /** * batadv_mcast_flags_log() - output debug information about mcast flag changes - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @flags: TVLV flags indicating the new multicast state * * Whenever the multicast TVLV flags this node announces change, this function @@ -860,7 +860,7 @@ static void batadv_mcast_flags_log(struct batadv_priv *bat_priv, u8 flags) /** * batadv_mcast_mla_flags_update() - update multicast flags - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @flags: flags indicating the new multicast state * * Updates the own multicast tvlv with our current multicast related settings, @@ -889,7 +889,7 @@ batadv_mcast_mla_flags_update(struct batadv_priv *bat_priv, /** * __batadv_mcast_mla_update() - update the own MLAs - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * * Updates the own multicast listener announcements in the translation * table as well as the own, announced multicast tvlv container. @@ -901,18 +901,18 @@ batadv_mcast_mla_flags_update(struct batadv_priv *bat_priv, */ static void __batadv_mcast_mla_update(struct batadv_priv *bat_priv) { - struct net_device *soft_iface = bat_priv->soft_iface; + struct net_device *mesh_iface = bat_priv->mesh_iface; struct hlist_head mcast_list = HLIST_HEAD_INIT; struct batadv_mcast_mla_flags flags; int ret; flags = batadv_mcast_mla_flags_get(bat_priv); - ret = batadv_mcast_mla_softif_get(soft_iface, &mcast_list, &flags); + ret = batadv_mcast_mla_meshif_get(mesh_iface, &mcast_list, &flags); if (ret < 0) goto out; - ret = batadv_mcast_mla_bridge_get(soft_iface, &mcast_list, &flags); + ret = batadv_mcast_mla_bridge_get(mesh_iface, &mcast_list, &flags); if (ret < 0) goto out; @@ -977,7 +977,7 @@ static bool batadv_mcast_is_report_ipv4(struct sk_buff *skb) /** * batadv_mcast_forw_mode_check_ipv4() - check for optimized forwarding * potential - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: the IPv4 packet to check * @is_unsnoopable: stores whether the destination is snoopable * @is_routable: stores whether the destination is routable @@ -1042,7 +1042,7 @@ static bool batadv_mcast_is_report_ipv6(struct sk_buff *skb) /** * batadv_mcast_forw_mode_check_ipv6() - check for optimized forwarding * potential - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: the IPv6 packet to check * @is_unsnoopable: stores whether the destination is snoopable * @is_routable: stores whether the destination is routable @@ -1084,7 +1084,7 @@ static int batadv_mcast_forw_mode_check_ipv6(struct batadv_priv *bat_priv, /** * batadv_mcast_forw_mode_check() - check for optimized forwarding potential - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: the multicast frame to check * @is_unsnoopable: stores whether the destination is snoopable * @is_routable: stores whether the destination is routable @@ -1124,7 +1124,7 @@ static int batadv_mcast_forw_mode_check(struct batadv_priv *bat_priv, /** * batadv_mcast_forw_want_all_ip_count() - count nodes with unspecific mcast * interest - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @ethhdr: ethernet header of a packet * * Return: the number of nodes which want all IPv4 multicast traffic if the @@ -1147,7 +1147,7 @@ static int batadv_mcast_forw_want_all_ip_count(struct batadv_priv *bat_priv, /** * batadv_mcast_forw_rtr_count() - count nodes with a multicast router - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @protocol: the ethernet protocol type to count multicast routers for * * Return: the number of nodes which want all routable IPv4 multicast traffic @@ -1170,7 +1170,7 @@ static int batadv_mcast_forw_rtr_count(struct batadv_priv *bat_priv, /** * batadv_mcast_forw_mode_by_count() - get forwarding mode by count - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: the multicast packet to check * @vid: the vlan identifier * @is_routable: stores whether the destination is routable @@ -1214,7 +1214,7 @@ batadv_mcast_forw_mode_by_count(struct batadv_priv *bat_priv, /** * batadv_mcast_forw_mode() - check on how to forward a multicast packet - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: the multicast packet to check * @vid: the vlan identifier * @is_routable: stores whether the destination is routable @@ -1259,7 +1259,7 @@ batadv_mcast_forw_mode(struct batadv_priv *bat_priv, struct sk_buff *skb, /** * batadv_mcast_forw_send_orig() - send a multicast packet to an originator - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: the multicast packet to send * @vid: the vlan identifier * @orig_node: the originator to send the packet to @@ -1288,7 +1288,7 @@ static int batadv_mcast_forw_send_orig(struct batadv_priv *bat_priv, /** * batadv_mcast_forw_tt() - forwards a packet to multicast listeners - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: the multicast packet to transmit * @vid: the vlan identifier * @@ -1336,7 +1336,7 @@ out: /** * batadv_mcast_forw_want_all_ipv4() - forward to nodes with want-all-ipv4 - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: the multicast packet to transmit * @vid: the vlan identifier * @@ -1373,7 +1373,7 @@ batadv_mcast_forw_want_all_ipv4(struct batadv_priv *bat_priv, /** * batadv_mcast_forw_want_all_ipv6() - forward to nodes with want-all-ipv6 - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: The multicast packet to transmit * @vid: the vlan identifier * @@ -1410,7 +1410,7 @@ batadv_mcast_forw_want_all_ipv6(struct batadv_priv *bat_priv, /** * batadv_mcast_forw_want_all() - forward packet to nodes in a want-all list - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: the multicast packet to transmit * @vid: the vlan identifier * @@ -1439,7 +1439,7 @@ batadv_mcast_forw_want_all(struct batadv_priv *bat_priv, /** * batadv_mcast_forw_want_all_rtr4() - forward to nodes with want-all-rtr4 - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: the multicast packet to transmit * @vid: the vlan identifier * @@ -1476,7 +1476,7 @@ batadv_mcast_forw_want_all_rtr4(struct batadv_priv *bat_priv, /** * batadv_mcast_forw_want_all_rtr6() - forward to nodes with want-all-rtr6 - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: The multicast packet to transmit * @vid: the vlan identifier * @@ -1513,7 +1513,7 @@ batadv_mcast_forw_want_all_rtr6(struct batadv_priv *bat_priv, /** * batadv_mcast_forw_want_rtr() - forward packet to nodes in a want-all-rtr list - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: the multicast packet to transmit * @vid: the vlan identifier * @@ -1542,7 +1542,7 @@ batadv_mcast_forw_want_rtr(struct batadv_priv *bat_priv, /** * batadv_mcast_forw_send() - send packet to any detected multicast recipient - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: the multicast packet to transmit * @vid: the vlan identifier * @is_routable: stores whether the destination is routable @@ -1590,7 +1590,7 @@ skip_mc_router: /** * batadv_mcast_want_unsnoop_update() - update unsnoop counter and list - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @orig: the orig_node which multicast state might have changed of * @mcast_flags: flags indicating the new multicast state * @@ -1636,7 +1636,7 @@ static void batadv_mcast_want_unsnoop_update(struct batadv_priv *bat_priv, /** * batadv_mcast_want_ipv4_update() - update want-all-ipv4 counter and list - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @orig: the orig_node which multicast state might have changed of * @mcast_flags: flags indicating the new multicast state * @@ -1681,7 +1681,7 @@ static void batadv_mcast_want_ipv4_update(struct batadv_priv *bat_priv, /** * batadv_mcast_want_ipv6_update() - update want-all-ipv6 counter and list - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @orig: the orig_node which multicast state might have changed of * @mcast_flags: flags indicating the new multicast state * @@ -1726,7 +1726,7 @@ static void batadv_mcast_want_ipv6_update(struct batadv_priv *bat_priv, /** * batadv_mcast_want_rtr4_update() - update want-all-rtr4 counter and list - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @orig: the orig_node which multicast state might have changed of * @mcast_flags: flags indicating the new multicast state * @@ -1771,7 +1771,7 @@ static void batadv_mcast_want_rtr4_update(struct batadv_priv *bat_priv, /** * batadv_mcast_want_rtr6_update() - update want-all-rtr6 counter and list - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @orig: the orig_node which multicast state might have changed of * @mcast_flags: flags indicating the new multicast state * @@ -1816,7 +1816,7 @@ static void batadv_mcast_want_rtr6_update(struct batadv_priv *bat_priv, /** * batadv_mcast_have_mc_ptype_update() - update multicast packet type counter - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @orig: the orig_node which multicast state might have changed of * @mcast_flags: flags indicating the new multicast state * @@ -1872,7 +1872,7 @@ batadv_mcast_tvlv_flags_get(bool enabled, void *tvlv_value, u16 tvlv_value_len) /** * batadv_mcast_tvlv_ogm_handler() - process incoming multicast tvlv container - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @orig: the orig_node of the ogm * @flags: flags indicating the tvlv state (see batadv_tvlv_handler_flags) * @tvlv_value: tvlv buffer containing the multicast data @@ -1915,7 +1915,7 @@ static void batadv_mcast_tvlv_ogm_handler(struct batadv_priv *bat_priv, /** * batadv_mcast_init() - initialize the multicast optimizations structures - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information */ void batadv_mcast_init(struct batadv_priv *bat_priv) { @@ -1934,7 +1934,7 @@ void batadv_mcast_init(struct batadv_priv *bat_priv) /** * batadv_mcast_mesh_info_put() - put multicast info into a netlink message * @msg: buffer for the message - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * * Return: 0 or error code. */ @@ -2060,7 +2060,7 @@ skip: * @msg: buffer for the message * @portid: netlink port * @cb: Control block containing additional options - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @bucket: current bucket to dump * @idx: index in current bucket to the next entry to dump * @@ -2103,15 +2103,15 @@ batadv_mcast_netlink_get_primary(struct netlink_callback *cb, struct batadv_hard_iface **primary_if) { struct batadv_hard_iface *hard_iface = NULL; - struct net_device *soft_iface; + struct net_device *mesh_iface; struct batadv_priv *bat_priv; int ret = 0; - soft_iface = batadv_netlink_get_softif(cb); - if (IS_ERR(soft_iface)) - return PTR_ERR(soft_iface); + mesh_iface = batadv_netlink_get_meshif(cb); + if (IS_ERR(mesh_iface)) + return PTR_ERR(mesh_iface); - bat_priv = netdev_priv(soft_iface); + bat_priv = netdev_priv(mesh_iface); hard_iface = batadv_primary_if_get_selected(bat_priv); if (!hard_iface || hard_iface->if_status != BATADV_IF_ACTIVE) { @@ -2120,7 +2120,7 @@ batadv_mcast_netlink_get_primary(struct netlink_callback *cb, } out: - dev_put(soft_iface); + dev_put(mesh_iface); if (!ret && primary_if) *primary_if = hard_iface; @@ -2150,7 +2150,7 @@ int batadv_mcast_flags_dump(struct sk_buff *msg, struct netlink_callback *cb) if (ret) return ret; - bat_priv = netdev_priv(primary_if->soft_iface); + bat_priv = netdev_priv(primary_if->mesh_iface); ret = __batadv_mcast_flags_dump(msg, portid, cb, bat_priv, bucket, idx); batadv_hardif_put(primary_if); @@ -2159,7 +2159,7 @@ int batadv_mcast_flags_dump(struct sk_buff *msg, struct netlink_callback *cb) /** * batadv_mcast_free() - free the multicast optimizations structures - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information */ void batadv_mcast_free(struct batadv_priv *bat_priv) { diff --git a/net/batman-adv/multicast_forw.c b/net/batman-adv/multicast_forw.c index fafd6ba8c056..b8668a80b94a 100644 --- a/net/batman-adv/multicast_forw.c +++ b/net/batman-adv/multicast_forw.c @@ -131,7 +131,7 @@ batadv_mcast_forw_orig_entry(struct hlist_node *node, /** * batadv_mcast_forw_push_dest() - push an originator MAC address onto an skb - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: the skb to push the destination address onto * @vid: the vlan identifier * @orig_node: the originator node to get the MAC address from @@ -174,7 +174,7 @@ static bool batadv_mcast_forw_push_dest(struct batadv_priv *bat_priv, /** * batadv_mcast_forw_push_dests_list() - push originators from list onto an skb - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: the skb to push the destination addresses onto * @vid: the vlan identifier * @head: the list to gather originators from @@ -215,7 +215,7 @@ static int batadv_mcast_forw_push_dests_list(struct batadv_priv *bat_priv, /** * batadv_mcast_forw_push_tt() - push originators with interest through TT - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: the skb to push the destination addresses onto * @vid: the vlan identifier * @num_dests: a pointer to store the number of pushed addresses in @@ -262,7 +262,7 @@ out: /** * batadv_mcast_forw_push_want_all() - push originators with want-all flag - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: the skb to push the destination addresses onto * @vid: the vlan identifier * @num_dests: a pointer to store the number of pushed addresses in @@ -308,7 +308,7 @@ static bool batadv_mcast_forw_push_want_all(struct batadv_priv *bat_priv, /** * batadv_mcast_forw_push_want_rtr() - push originators with want-router flag - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: the skb to push the destination addresses onto * @vid: the vlan identifier * @num_dests: a pointer to store the number of pushed addresses in @@ -475,7 +475,7 @@ out: /** * batadv_mcast_forw_push_dests() - push originator addresses onto an skb - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: the skb to push the destination addresses onto * @vid: the vlan identifier * @is_routable: indicates whether the destination is routable @@ -567,7 +567,7 @@ static int batadv_mcast_forw_push_tracker(struct sk_buff *skb, int num_dests, /** * batadv_mcast_forw_push_tvlvs() - push a multicast tracker TVLV onto an skb - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: the skb to push the tracker TVLV onto * @vid: the vlan identifier * @is_routable: indicates whether the destination is routable @@ -634,7 +634,7 @@ batadv_mcast_forw_push_hdr(struct sk_buff *skb, unsigned short tvlv_len) /** * batadv_mcast_forw_scrub_dests() - scrub destinations in a tracker TVLV - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @comp_neigh: next hop neighbor to scrub+collect destinations for * @dest: start MAC entry in original skb's tracker TVLV * @next_dest: start MAC entry in to be sent skb's tracker TVLV @@ -905,7 +905,7 @@ static void batadv_mcast_forw_shrink_tracker(struct sk_buff *skb) /** * batadv_mcast_forw_packet() - forward a batman-adv multicast packet - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: the received or locally generated batman-adv multicast packet * @local_xmit: indicates that the packet was locally generated and not received * @@ -920,7 +920,7 @@ static void batadv_mcast_forw_shrink_tracker(struct sk_buff *skb) * * Return: NET_RX_SUCCESS or NET_RX_DROP on success or a negative error * code on failure. NET_RX_SUCCESS if the received packet is supposed to be - * decapsulated and forwarded to the own soft interface, NET_RX_DROP otherwise. + * decapsulated and forwarded to the own mesh interface, NET_RX_DROP otherwise. */ static int batadv_mcast_forw_packet(struct batadv_priv *bat_priv, struct sk_buff *skb, bool local_xmit) @@ -1028,7 +1028,7 @@ static int batadv_mcast_forw_packet(struct batadv_priv *bat_priv, /** * batadv_mcast_forw_tracker_tvlv_handler() - handle an mcast tracker tvlv - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: the received batman-adv multicast packet * * Parses the tracker TVLV of an incoming batman-adv multicast packet and @@ -1042,7 +1042,7 @@ static int batadv_mcast_forw_packet(struct batadv_priv *bat_priv, * * Return: NET_RX_SUCCESS or NET_RX_DROP on success or a negative error * code on failure. NET_RX_SUCCESS if the received packet is supposed to be - * decapsulated and forwarded to the own soft interface, NET_RX_DROP otherwise. + * decapsulated and forwarded to the own mesh interface, NET_RX_DROP otherwise. */ int batadv_mcast_forw_tracker_tvlv_handler(struct batadv_priv *bat_priv, struct sk_buff *skb) @@ -1075,7 +1075,7 @@ unsigned int batadv_mcast_forw_packet_hdrlen(unsigned int num_dests) /** * batadv_mcast_forw_expand_head() - expand headroom for an mcast packet - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: the multicast packet to send * * Tries to expand an skb's headroom so that its head to tail is 1298 @@ -1110,7 +1110,7 @@ static int batadv_mcast_forw_expand_head(struct batadv_priv *bat_priv, /** * batadv_mcast_forw_push() - encapsulate skb in a batman-adv multicast packet - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: the multicast packet to encapsulate and send * @vid: the vlan identifier * @is_routable: indicates whether the destination is routable @@ -1154,7 +1154,7 @@ err: /** * batadv_mcast_forw_mcsend() - send a self prepared batman-adv multicast packet - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: the multicast packet to encapsulate and send * * Transmits a batman-adv multicast packet that was locally prepared and diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c index eefba5600ded..e7c8f9f2bb1f 100644 --- a/net/batman-adv/netlink.c +++ b/net/batman-adv/netlink.c @@ -43,10 +43,10 @@ #include "gateway_common.h" #include "hard-interface.h" #include "log.h" +#include "mesh-interface.h" #include "multicast.h" #include "network-coding.h" #include "originator.h" -#include "soft-interface.h" #include "tp_meter.h" #include "translation-table.h" @@ -63,7 +63,7 @@ enum batadv_netlink_multicast_groups { */ enum batadv_genl_ops_flags { /** - * @BATADV_FLAG_NEED_MESH: request requires valid soft interface in + * @BATADV_FLAG_NEED_MESH: request requires valid mesh interface in * attribute BATADV_ATTR_MESH_IFINDEX and expects a pointer to it to be * saved in info->user_ptr[0] */ @@ -166,24 +166,24 @@ static int batadv_netlink_get_ifindex(const struct nlmsghdr *nlh, int attrtype) } /** - * batadv_netlink_mesh_fill_ap_isolation() - Add ap_isolation softif attribute + * batadv_netlink_mesh_fill_ap_isolation() - Add ap_isolation meshif attribute * @msg: Netlink message to dump into - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * * Return: 0 on success or negative error number in case of failure */ static int batadv_netlink_mesh_fill_ap_isolation(struct sk_buff *msg, struct batadv_priv *bat_priv) { - struct batadv_softif_vlan *vlan; + struct batadv_meshif_vlan *vlan; u8 ap_isolation; - vlan = batadv_softif_vlan_get(bat_priv, BATADV_NO_FLAGS); + vlan = batadv_meshif_vlan_get(bat_priv, BATADV_NO_FLAGS); if (!vlan) return 0; ap_isolation = atomic_read(&vlan->ap_isolation); - batadv_softif_vlan_put(vlan); + batadv_meshif_vlan_put(vlan); return nla_put_u8(msg, BATADV_ATTR_AP_ISOLATION_ENABLED, !!ap_isolation); @@ -192,21 +192,21 @@ static int batadv_netlink_mesh_fill_ap_isolation(struct sk_buff *msg, /** * batadv_netlink_set_mesh_ap_isolation() - Set ap_isolation from genl msg * @attr: parsed BATADV_ATTR_AP_ISOLATION_ENABLED attribute - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * * Return: 0 on success or negative error number in case of failure */ static int batadv_netlink_set_mesh_ap_isolation(struct nlattr *attr, struct batadv_priv *bat_priv) { - struct batadv_softif_vlan *vlan; + struct batadv_meshif_vlan *vlan; - vlan = batadv_softif_vlan_get(bat_priv, BATADV_NO_FLAGS); + vlan = batadv_meshif_vlan_get(bat_priv, BATADV_NO_FLAGS); if (!vlan) return -ENOENT; atomic_set(&vlan->ap_isolation, !!nla_get_u8(attr)); - batadv_softif_vlan_put(vlan); + batadv_meshif_vlan_put(vlan); return 0; } @@ -214,7 +214,7 @@ static int batadv_netlink_set_mesh_ap_isolation(struct nlattr *attr, /** * batadv_netlink_mesh_fill() - Fill message with mesh attributes * @msg: Netlink message to dump into - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @cmd: type of message to generate * @portid: Port making netlink request * @seq: sequence number for message @@ -227,7 +227,7 @@ static int batadv_netlink_mesh_fill(struct sk_buff *msg, enum batadv_nl_commands cmd, u32 portid, u32 seq, int flags) { - struct net_device *soft_iface = bat_priv->soft_iface; + struct net_device *mesh_iface = bat_priv->mesh_iface; struct batadv_hard_iface *primary_if = NULL; struct net_device *hard_iface; void *hdr; @@ -239,10 +239,10 @@ static int batadv_netlink_mesh_fill(struct sk_buff *msg, if (nla_put_string(msg, BATADV_ATTR_VERSION, BATADV_SOURCE_VERSION) || nla_put_string(msg, BATADV_ATTR_ALGO_NAME, bat_priv->algo_ops->name) || - nla_put_u32(msg, BATADV_ATTR_MESH_IFINDEX, soft_iface->ifindex) || - nla_put_string(msg, BATADV_ATTR_MESH_IFNAME, soft_iface->name) || + nla_put_u32(msg, BATADV_ATTR_MESH_IFINDEX, mesh_iface->ifindex) || + nla_put_string(msg, BATADV_ATTR_MESH_IFNAME, mesh_iface->name) || nla_put(msg, BATADV_ATTR_MESH_ADDRESS, ETH_ALEN, - soft_iface->dev_addr) || + mesh_iface->dev_addr) || nla_put_u8(msg, BATADV_ATTR_TT_TTVN, (u8)atomic_read(&bat_priv->tt.vn))) goto nla_put_failure; @@ -369,8 +369,8 @@ nla_put_failure: } /** - * batadv_netlink_notify_mesh() - send softif attributes to listener - * @bat_priv: the bat priv with all the soft interface information + * batadv_netlink_notify_mesh() - send meshif attributes to listener + * @bat_priv: the bat priv with all the mesh interface information * * Return: 0 on success, < 0 on error */ @@ -391,14 +391,14 @@ static int batadv_netlink_notify_mesh(struct batadv_priv *bat_priv) } genlmsg_multicast_netns(&batadv_netlink_family, - dev_net(bat_priv->soft_iface), msg, 0, + dev_net(bat_priv->mesh_iface), msg, 0, BATADV_NL_MCGRP_CONFIG, GFP_KERNEL); return 0; } /** - * batadv_netlink_get_mesh() - Get softif attributes + * batadv_netlink_get_mesh() - Get meshif attributes * @skb: Netlink message with request data * @info: receiver information * @@ -427,7 +427,7 @@ static int batadv_netlink_get_mesh(struct sk_buff *skb, struct genl_info *info) } /** - * batadv_netlink_set_mesh() - Set softif attributes + * batadv_netlink_set_mesh() - Set meshif attributes * @skb: Netlink message with request data * @info: receiver information * @@ -474,7 +474,7 @@ static int batadv_netlink_set_mesh(struct sk_buff *skb, struct genl_info *info) atomic_set(&bat_priv->bridge_loop_avoidance, !!nla_get_u8(attr)); - batadv_bla_status_update(bat_priv->soft_iface); + batadv_bla_status_update(bat_priv->mesh_iface); } #endif /* CONFIG_BATMAN_ADV_BLA */ @@ -484,7 +484,7 @@ static int batadv_netlink_set_mesh(struct sk_buff *skb, struct genl_info *info) atomic_set(&bat_priv->distributed_arp_table, !!nla_get_u8(attr)); - batadv_dat_status_update(bat_priv->soft_iface); + batadv_dat_status_update(bat_priv->mesh_iface); } #endif /* CONFIG_BATMAN_ADV_DAT */ @@ -494,7 +494,7 @@ static int batadv_netlink_set_mesh(struct sk_buff *skb, struct genl_info *info) atomic_set(&bat_priv->fragmentation, !!nla_get_u8(attr)); rtnl_lock(); - batadv_update_min_mtu(bat_priv->soft_iface); + batadv_update_min_mtu(bat_priv->mesh_iface); rtnl_unlock(); } @@ -594,7 +594,7 @@ static int batadv_netlink_set_mesh(struct sk_buff *skb, struct genl_info *info) attr = info->attrs[BATADV_ATTR_NETWORK_CODING_ENABLED]; atomic_set(&bat_priv->network_coding, !!nla_get_u8(attr)); - batadv_nc_status_update(bat_priv->soft_iface); + batadv_nc_status_update(bat_priv->mesh_iface); } #endif /* CONFIG_BATMAN_ADV_NC */ @@ -633,7 +633,7 @@ batadv_netlink_tp_meter_put(struct sk_buff *msg, u32 cookie) /** * batadv_netlink_tpmeter_notify() - send tp_meter result via netlink to client - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @dst: destination of tp_meter session * @result: reason for tp meter session stop * @test_time: total time of the tp_meter session @@ -680,7 +680,7 @@ int batadv_netlink_tpmeter_notify(struct batadv_priv *bat_priv, const u8 *dst, genlmsg_end(msg, hdr); genlmsg_multicast_netns(&batadv_netlink_family, - dev_net(bat_priv->soft_iface), msg, 0, + dev_net(bat_priv->mesh_iface), msg, 0, BATADV_NL_MCGRP_TPMETER, GFP_KERNEL); return 0; @@ -778,7 +778,7 @@ batadv_netlink_tp_meter_cancel(struct sk_buff *skb, struct genl_info *info) /** * batadv_netlink_hardif_fill() - Fill message with hardif attributes * @msg: Netlink message to dump into - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @hard_iface: hard interface which was modified * @cmd: type of message to generate * @portid: Port making netlink request @@ -806,11 +806,11 @@ static int batadv_netlink_hardif_fill(struct sk_buff *msg, genl_dump_check_consistent(cb, hdr); if (nla_put_u32(msg, BATADV_ATTR_MESH_IFINDEX, - bat_priv->soft_iface->ifindex)) + bat_priv->mesh_iface->ifindex)) goto nla_put_failure; if (nla_put_string(msg, BATADV_ATTR_MESH_IFNAME, - bat_priv->soft_iface->name)) + bat_priv->mesh_iface->name)) goto nla_put_failure; if (nla_put_u32(msg, BATADV_ATTR_HARD_IFINDEX, @@ -850,7 +850,7 @@ nla_put_failure: /** * batadv_netlink_notify_hardif() - send hardif attributes to listener - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @hard_iface: hard interface which was modified * * Return: 0 on success, < 0 on error @@ -873,7 +873,7 @@ static int batadv_netlink_notify_hardif(struct batadv_priv *bat_priv, } genlmsg_multicast_netns(&batadv_netlink_family, - dev_net(bat_priv->soft_iface), msg, 0, + dev_net(bat_priv->mesh_iface), msg, 0, BATADV_NL_MCGRP_CONFIG, GFP_KERNEL); return 0; @@ -963,24 +963,24 @@ static int batadv_netlink_set_hardif(struct sk_buff *skb, static int batadv_netlink_dump_hardif(struct sk_buff *msg, struct netlink_callback *cb) { - struct net_device *soft_iface; + struct net_device *mesh_iface; struct batadv_hard_iface *hard_iface; struct batadv_priv *bat_priv; int portid = NETLINK_CB(cb->skb).portid; int skip = cb->args[0]; int i = 0; - soft_iface = batadv_netlink_get_softif(cb); - if (IS_ERR(soft_iface)) - return PTR_ERR(soft_iface); + mesh_iface = batadv_netlink_get_meshif(cb); + if (IS_ERR(mesh_iface)) + return PTR_ERR(mesh_iface); - bat_priv = netdev_priv(soft_iface); + bat_priv = netdev_priv(mesh_iface); rtnl_lock(); cb->seq = batadv_hardif_generation << 1 | 1; list_for_each_entry(hard_iface, &batadv_hardif_list, list) { - if (hard_iface->soft_iface != soft_iface) + if (hard_iface->mesh_iface != mesh_iface) continue; if (i++ < skip) @@ -997,7 +997,7 @@ batadv_netlink_dump_hardif(struct sk_buff *msg, struct netlink_callback *cb) rtnl_unlock(); - dev_put(soft_iface); + dev_put(mesh_iface); cb->args[0] = i; @@ -1007,7 +1007,7 @@ batadv_netlink_dump_hardif(struct sk_buff *msg, struct netlink_callback *cb) /** * batadv_netlink_vlan_fill() - Fill message with vlan attributes * @msg: Netlink message to dump into - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @vlan: vlan which was modified * @cmd: type of message to generate * @portid: Port making netlink request @@ -1018,7 +1018,7 @@ batadv_netlink_dump_hardif(struct sk_buff *msg, struct netlink_callback *cb) */ static int batadv_netlink_vlan_fill(struct sk_buff *msg, struct batadv_priv *bat_priv, - struct batadv_softif_vlan *vlan, + struct batadv_meshif_vlan *vlan, enum batadv_nl_commands cmd, u32 portid, u32 seq, int flags) { @@ -1029,11 +1029,11 @@ static int batadv_netlink_vlan_fill(struct sk_buff *msg, return -ENOBUFS; if (nla_put_u32(msg, BATADV_ATTR_MESH_IFINDEX, - bat_priv->soft_iface->ifindex)) + bat_priv->mesh_iface->ifindex)) goto nla_put_failure; if (nla_put_string(msg, BATADV_ATTR_MESH_IFNAME, - bat_priv->soft_iface->name)) + bat_priv->mesh_iface->name)) goto nla_put_failure; if (nla_put_u32(msg, BATADV_ATTR_VLANID, vlan->vid & VLAN_VID_MASK)) @@ -1053,13 +1053,13 @@ nla_put_failure: /** * batadv_netlink_notify_vlan() - send vlan attributes to listener - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @vlan: vlan which was modified * * Return: 0 on success, < 0 on error */ static int batadv_netlink_notify_vlan(struct batadv_priv *bat_priv, - struct batadv_softif_vlan *vlan) + struct batadv_meshif_vlan *vlan) { struct sk_buff *msg; int ret; @@ -1076,7 +1076,7 @@ static int batadv_netlink_notify_vlan(struct batadv_priv *bat_priv, } genlmsg_multicast_netns(&batadv_netlink_family, - dev_net(bat_priv->soft_iface), msg, 0, + dev_net(bat_priv->mesh_iface), msg, 0, BATADV_NL_MCGRP_CONFIG, GFP_KERNEL); return 0; @@ -1091,7 +1091,7 @@ static int batadv_netlink_notify_vlan(struct batadv_priv *bat_priv, */ static int batadv_netlink_get_vlan(struct sk_buff *skb, struct genl_info *info) { - struct batadv_softif_vlan *vlan = info->user_ptr[1]; + struct batadv_meshif_vlan *vlan = info->user_ptr[1]; struct batadv_priv *bat_priv = info->user_ptr[0]; struct sk_buff *msg; int ret; @@ -1121,7 +1121,7 @@ static int batadv_netlink_get_vlan(struct sk_buff *skb, struct genl_info *info) */ static int batadv_netlink_set_vlan(struct sk_buff *skb, struct genl_info *info) { - struct batadv_softif_vlan *vlan = info->user_ptr[1]; + struct batadv_meshif_vlan *vlan = info->user_ptr[1]; struct batadv_priv *bat_priv = info->user_ptr[0]; struct nlattr *attr; @@ -1137,43 +1137,43 @@ static int batadv_netlink_set_vlan(struct sk_buff *skb, struct genl_info *info) } /** - * batadv_netlink_get_softif_from_ifindex() - Get soft-iface from ifindex + * batadv_netlink_get_meshif_from_ifindex() - Get mesh-iface from ifindex * @net: the applicable net namespace - * @ifindex: index of the soft interface + * @ifindex: index of the mesh interface * - * Return: Pointer to soft interface (with increased refcnt) on success, error + * Return: Pointer to mesh interface (with increased refcnt) on success, error * pointer on error */ static struct net_device * -batadv_netlink_get_softif_from_ifindex(struct net *net, int ifindex) +batadv_netlink_get_meshif_from_ifindex(struct net *net, int ifindex) { - struct net_device *soft_iface; + struct net_device *mesh_iface; - soft_iface = dev_get_by_index(net, ifindex); - if (!soft_iface) + mesh_iface = dev_get_by_index(net, ifindex); + if (!mesh_iface) return ERR_PTR(-ENODEV); - if (!batadv_softif_is_valid(soft_iface)) - goto err_put_softif; + if (!batadv_meshif_is_valid(mesh_iface)) + goto err_put_meshif; - return soft_iface; + return mesh_iface; -err_put_softif: - dev_put(soft_iface); +err_put_meshif: + dev_put(mesh_iface); return ERR_PTR(-EINVAL); } /** - * batadv_netlink_get_softif_from_info() - Get soft-iface from genl attributes + * batadv_netlink_get_meshif_from_info() - Get mesh-iface from genl attributes * @net: the applicable net namespace * @info: receiver information * - * Return: Pointer to soft interface (with increased refcnt) on success, error + * Return: Pointer to mesh interface (with increased refcnt) on success, error * pointer on error */ static struct net_device * -batadv_netlink_get_softif_from_info(struct net *net, struct genl_info *info) +batadv_netlink_get_meshif_from_info(struct net *net, struct genl_info *info) { int ifindex; @@ -1182,30 +1182,30 @@ batadv_netlink_get_softif_from_info(struct net *net, struct genl_info *info) ifindex = nla_get_u32(info->attrs[BATADV_ATTR_MESH_IFINDEX]); - return batadv_netlink_get_softif_from_ifindex(net, ifindex); + return batadv_netlink_get_meshif_from_ifindex(net, ifindex); } /** - * batadv_netlink_get_softif() - Retrieve soft interface from netlink callback + * batadv_netlink_get_meshif() - Retrieve mesh interface from netlink callback * @cb: callback structure containing arguments * - * Return: Pointer to soft interface (with increased refcnt) on success, error + * Return: Pointer to mesh interface (with increased refcnt) on success, error * pointer on error */ -struct net_device *batadv_netlink_get_softif(struct netlink_callback *cb) +struct net_device *batadv_netlink_get_meshif(struct netlink_callback *cb) { int ifindex = batadv_netlink_get_ifindex(cb->nlh, BATADV_ATTR_MESH_IFINDEX); if (!ifindex) return ERR_PTR(-ENONET); - return batadv_netlink_get_softif_from_ifindex(sock_net(cb->skb->sk), + return batadv_netlink_get_meshif_from_ifindex(sock_net(cb->skb->sk), ifindex); } /** * batadv_netlink_get_hardif_from_ifindex() - Get hard-iface from ifindex - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @net: the applicable net namespace * @ifindex: index of the hard interface * @@ -1227,7 +1227,7 @@ batadv_netlink_get_hardif_from_ifindex(struct batadv_priv *bat_priv, if (!hard_iface) goto err_put_harddev; - if (hard_iface->soft_iface != bat_priv->soft_iface) + if (hard_iface->mesh_iface != bat_priv->mesh_iface) goto err_put_hardif; /* hard_dev is referenced by hard_iface and not needed here */ @@ -1245,7 +1245,7 @@ err_put_harddev: /** * batadv_netlink_get_hardif_from_info() - Get hard-iface from genl attributes - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @net: the applicable net namespace * @info: receiver information * @@ -1268,7 +1268,7 @@ batadv_netlink_get_hardif_from_info(struct batadv_priv *bat_priv, /** * batadv_netlink_get_hardif() - Retrieve hard interface from netlink callback - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @cb: callback structure containing arguments * * Return: Pointer to hard interface (with increased refcnt) on success, error @@ -1290,18 +1290,18 @@ batadv_netlink_get_hardif(struct batadv_priv *bat_priv, /** * batadv_get_vlan_from_info() - Retrieve vlan from genl attributes - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @net: the applicable net namespace * @info: receiver information * * Return: Pointer to vlan on success (with increased refcnt), error pointer * on error */ -static struct batadv_softif_vlan * +static struct batadv_meshif_vlan * batadv_get_vlan_from_info(struct batadv_priv *bat_priv, struct net *net, struct genl_info *info) { - struct batadv_softif_vlan *vlan; + struct batadv_meshif_vlan *vlan; u16 vid; if (!info->attrs[BATADV_ATTR_VLANID]) @@ -1309,7 +1309,7 @@ batadv_get_vlan_from_info(struct batadv_priv *bat_priv, struct net *net, vid = nla_get_u16(info->attrs[BATADV_ATTR_VLANID]); - vlan = batadv_softif_vlan_get(bat_priv, vid | BATADV_VLAN_HAS_TAG); + vlan = batadv_meshif_vlan_get(bat_priv, vid | BATADV_VLAN_HAS_TAG); if (!vlan) return ERR_PTR(-ENOENT); @@ -1331,8 +1331,8 @@ static int batadv_pre_doit(const struct genl_split_ops *ops, struct net *net = genl_info_net(info); struct batadv_hard_iface *hard_iface; struct batadv_priv *bat_priv = NULL; - struct batadv_softif_vlan *vlan; - struct net_device *soft_iface; + struct batadv_meshif_vlan *vlan; + struct net_device *mesh_iface; u8 user_ptr1_flags; u8 mesh_dep_flags; int ret; @@ -1347,11 +1347,11 @@ static int batadv_pre_doit(const struct genl_split_ops *ops, return -EINVAL; if (ops->internal_flags & BATADV_FLAG_NEED_MESH) { - soft_iface = batadv_netlink_get_softif_from_info(net, info); - if (IS_ERR(soft_iface)) - return PTR_ERR(soft_iface); + mesh_iface = batadv_netlink_get_meshif_from_info(net, info); + if (IS_ERR(mesh_iface)) + return PTR_ERR(mesh_iface); - bat_priv = netdev_priv(soft_iface); + bat_priv = netdev_priv(mesh_iface); info->user_ptr[0] = bat_priv; } @@ -1360,7 +1360,7 @@ static int batadv_pre_doit(const struct genl_split_ops *ops, info); if (IS_ERR(hard_iface)) { ret = PTR_ERR(hard_iface); - goto err_put_softif; + goto err_put_meshif; } info->user_ptr[1] = hard_iface; @@ -1370,7 +1370,7 @@ static int batadv_pre_doit(const struct genl_split_ops *ops, vlan = batadv_get_vlan_from_info(bat_priv, net, info); if (IS_ERR(vlan)) { ret = PTR_ERR(vlan); - goto err_put_softif; + goto err_put_meshif; } info->user_ptr[1] = vlan; @@ -1378,9 +1378,9 @@ static int batadv_pre_doit(const struct genl_split_ops *ops, return 0; -err_put_softif: +err_put_meshif: if (bat_priv) - dev_put(bat_priv->soft_iface); + dev_put(bat_priv->mesh_iface); return ret; } @@ -1396,7 +1396,7 @@ static void batadv_post_doit(const struct genl_split_ops *ops, struct genl_info *info) { struct batadv_hard_iface *hard_iface; - struct batadv_softif_vlan *vlan; + struct batadv_meshif_vlan *vlan; struct batadv_priv *bat_priv; if (ops->internal_flags & BATADV_FLAG_NEED_HARDIF && @@ -1408,12 +1408,12 @@ static void batadv_post_doit(const struct genl_split_ops *ops, if (ops->internal_flags & BATADV_FLAG_NEED_VLAN && info->user_ptr[1]) { vlan = info->user_ptr[1]; - batadv_softif_vlan_put(vlan); + batadv_meshif_vlan_put(vlan); } if (ops->internal_flags & BATADV_FLAG_NEED_MESH && info->user_ptr[0]) { bat_priv = info->user_ptr[0]; - dev_put(bat_priv->soft_iface); + dev_put(bat_priv->mesh_iface); } } @@ -1567,7 +1567,7 @@ void __init batadv_netlink_register(void) ret = genl_register_family(&batadv_netlink_family); if (ret) - pr_warn("unable to register netlink family"); + pr_warn("unable to register netlink family\n"); } /** diff --git a/net/batman-adv/netlink.h b/net/batman-adv/netlink.h index 2097c2ae98f1..fe4548b974bb 100644 --- a/net/batman-adv/netlink.h +++ b/net/batman-adv/netlink.h @@ -15,7 +15,7 @@ void batadv_netlink_register(void); void batadv_netlink_unregister(void); -struct net_device *batadv_netlink_get_softif(struct netlink_callback *cb); +struct net_device *batadv_netlink_get_meshif(struct netlink_callback *cb); struct batadv_hard_iface * batadv_netlink_get_hardif(struct batadv_priv *bat_priv, struct netlink_callback *cb); diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c index 71ebd0284f95..9f56308779cc 100644 --- a/net/batman-adv/network-coding.c +++ b/net/batman-adv/network-coding.c @@ -65,7 +65,7 @@ int __init batadv_nc_init(void) /** * batadv_nc_start_timer() - initialise the nc periodic worker - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information */ static void batadv_nc_start_timer(struct batadv_priv *bat_priv) { @@ -76,7 +76,7 @@ static void batadv_nc_start_timer(struct batadv_priv *bat_priv) /** * batadv_nc_tvlv_container_update() - update the network coding tvlv container * after network coding setting change - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information */ static void batadv_nc_tvlv_container_update(struct batadv_priv *bat_priv) { @@ -98,7 +98,7 @@ static void batadv_nc_tvlv_container_update(struct batadv_priv *bat_priv) /** * batadv_nc_status_update() - update the network coding tvlv container after * network coding setting change - * @net_dev: the soft interface net device + * @net_dev: the mesh interface net device */ void batadv_nc_status_update(struct net_device *net_dev) { @@ -109,7 +109,7 @@ void batadv_nc_status_update(struct net_device *net_dev) /** * batadv_nc_tvlv_ogm_handler_v1() - process incoming nc tvlv container - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @orig: the orig_node of the ogm * @flags: flags indicating the tvlv state (see batadv_tvlv_handler_flags) * @tvlv_value: tvlv buffer containing the gateway data @@ -128,7 +128,7 @@ static void batadv_nc_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv, /** * batadv_nc_mesh_init() - initialise coding hash table and start housekeeping - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * * Return: 0 on success or negative error number in case of failure */ @@ -171,7 +171,7 @@ err: /** * batadv_nc_init_bat_priv() - initialise the nc specific bat_priv variables - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information */ void batadv_nc_init_bat_priv(struct batadv_priv *bat_priv) { @@ -267,7 +267,7 @@ static void batadv_nc_packet_free(struct batadv_nc_packet *nc_packet, /** * batadv_nc_to_purge_nc_node() - checks whether an nc node has to be purged - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @nc_node: the nc node to check * * Return: true if the entry has to be purged now, false otherwise @@ -283,7 +283,7 @@ static bool batadv_nc_to_purge_nc_node(struct batadv_priv *bat_priv, /** * batadv_nc_to_purge_nc_path_coding() - checks whether an nc path has timed out - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @nc_path: the nc path to check * * Return: true if the entry has to be purged now, false otherwise @@ -304,7 +304,7 @@ static bool batadv_nc_to_purge_nc_path_coding(struct batadv_priv *bat_priv, /** * batadv_nc_to_purge_nc_path_decoding() - checks whether an nc path has timed * out - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @nc_path: the nc path to check * * Return: true if the entry has to be purged now, false otherwise @@ -325,7 +325,7 @@ static bool batadv_nc_to_purge_nc_path_decoding(struct batadv_priv *bat_priv, /** * batadv_nc_purge_orig_nc_nodes() - go through list of nc nodes and purge stale * entries - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @list: list of nc nodes * @lock: nc node list lock * @to_purge: function in charge to decide whether an entry has to be purged or @@ -363,7 +363,7 @@ batadv_nc_purge_orig_nc_nodes(struct batadv_priv *bat_priv, /** * batadv_nc_purge_orig() - purges all nc node data attached of the given * originator - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @orig_node: orig_node with the nc node entries to be purged * @to_purge: function in charge to decide whether an entry has to be purged or * not. This function takes the nc node as argument and has to return @@ -389,7 +389,7 @@ void batadv_nc_purge_orig(struct batadv_priv *bat_priv, /** * batadv_nc_purge_orig_hash() - traverse entire originator hash to check if * they have timed out nc nodes - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information */ static void batadv_nc_purge_orig_hash(struct batadv_priv *bat_priv) { @@ -416,7 +416,7 @@ static void batadv_nc_purge_orig_hash(struct batadv_priv *bat_priv) /** * batadv_nc_purge_paths() - traverse all nc paths part of the hash and remove * unused ones - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @hash: hash table containing the nc paths to check * @to_purge: function in charge to decide whether an entry has to be purged or * not. This function takes the nc node as argument and has to return @@ -579,7 +579,7 @@ static void batadv_nc_send_packet(struct batadv_nc_packet *nc_packet) /** * batadv_nc_sniffed_purge() - Checks timestamp of given sniffed nc_packet. - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @nc_path: the nc path the packet belongs to * @nc_packet: the nc packet to be checked * @@ -618,7 +618,7 @@ out: /** * batadv_nc_fwd_flush() - Checks the timestamp of the given nc packet. - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @nc_path: the nc path the packet belongs to * @nc_packet: the nc packet to be checked * @@ -657,7 +657,7 @@ static bool batadv_nc_fwd_flush(struct batadv_priv *bat_priv, /** * batadv_nc_process_nc_paths() - traverse given nc packet pool and free timed * out nc packets - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @hash: to be processed hash table * @process_fn: Function called to process given nc packet. Should return true * to encourage this function to proceed with the next packet. @@ -744,7 +744,7 @@ static void batadv_nc_worker(struct work_struct *work) /** * batadv_can_nc_with_orig() - checks whether the given orig node is suitable * for coding or not - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @orig_node: neighboring orig node which may be used as nc candidate * @ogm_packet: incoming ogm packet also used for the checks * @@ -825,7 +825,7 @@ batadv_nc_find_nc_node(struct batadv_orig_node *orig_node, /** * batadv_nc_get_nc_node() - retrieves an nc node or creates the entry if it was * not found - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @orig_node: orig node originating the ogm packet * @orig_neigh_node: neighboring orig node from which we received the ogm packet * (can be equal to orig_node) @@ -888,7 +888,7 @@ unlock: /** * batadv_nc_update_nc_node() - updates stored incoming and outgoing nc node * structs (best called on incoming OGMs) - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @orig_node: orig node originating the ogm packet * @orig_neigh_node: neighboring orig node from which we received the ogm packet * (can be equal to orig_node) @@ -940,7 +940,7 @@ out: /** * batadv_nc_get_path() - get existing nc_path or allocate a new one - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @hash: hash table containing the nc path * @src: ethernet source address - first half of the nc path search key * @dst: ethernet destination address - second half of the nc path search key @@ -1032,7 +1032,7 @@ static void batadv_nc_memxor(char *dst, const char *src, unsigned int len) /** * batadv_nc_code_packets() - code a received unicast_packet with an nc packet * into a coded_packet and send it - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: data skb to forward * @ethhdr: pointer to the ethernet header inside the skb * @nc_packet: structure containing the packet to the skb can be coded with @@ -1245,7 +1245,7 @@ static bool batadv_nc_skb_coding_possible(struct sk_buff *skb, u8 *dst, u8 *src) /** * batadv_nc_path_search() - Find the coding path matching in_nc_node and * out_nc_node to retrieve a buffered packet that can be used for coding. - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @in_nc_node: pointer to skb next hop's neighbor nc node * @out_nc_node: pointer to skb source's neighbor nc node * @skb: data skb to forward @@ -1313,7 +1313,7 @@ batadv_nc_path_search(struct batadv_priv *bat_priv, /** * batadv_nc_skb_src_search() - Loops through the list of neighboring nodes of * the skb's sender (may be equal to the originator). - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: data skb to forward * @eth_dst: next hop mac address of skb * @eth_src: source mac address of skb @@ -1359,7 +1359,7 @@ batadv_nc_skb_src_search(struct batadv_priv *bat_priv, /** * batadv_nc_skb_store_before_coding() - set the ethernet src and dst of the * unicast skb before it is stored for use in later decoding - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: data skb to store * @eth_dst_new: new destination mac address of skb */ @@ -1408,7 +1408,7 @@ static bool batadv_nc_skb_dst_search(struct sk_buff *skb, struct batadv_neigh_node *neigh_node, struct ethhdr *ethhdr) { - struct net_device *netdev = neigh_node->if_incoming->soft_iface; + struct net_device *netdev = neigh_node->if_incoming->mesh_iface; struct batadv_priv *bat_priv = netdev_priv(netdev); struct batadv_orig_node *orig_node = neigh_node->orig_node; struct batadv_nc_node *nc_node; @@ -1495,7 +1495,7 @@ static bool batadv_nc_skb_add_to_path(struct sk_buff *skb, bool batadv_nc_skb_forward(struct sk_buff *skb, struct batadv_neigh_node *neigh_node) { - const struct net_device *netdev = neigh_node->if_incoming->soft_iface; + const struct net_device *netdev = neigh_node->if_incoming->mesh_iface; struct batadv_priv *bat_priv = netdev_priv(netdev); struct batadv_unicast_packet *packet; struct batadv_nc_path *nc_path; @@ -1544,7 +1544,7 @@ out: /** * batadv_nc_skb_store_for_decoding() - save a clone of the skb which can be * used when decoding coded packets - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: data skb to store */ void batadv_nc_skb_store_for_decoding(struct batadv_priv *bat_priv, @@ -1605,7 +1605,7 @@ out: /** * batadv_nc_skb_store_sniffed_unicast() - check if a received unicast packet * should be saved in the decoding buffer and, if so, store it there - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: unicast skb to store */ void batadv_nc_skb_store_sniffed_unicast(struct batadv_priv *bat_priv, @@ -1625,7 +1625,7 @@ void batadv_nc_skb_store_sniffed_unicast(struct batadv_priv *bat_priv, /** * batadv_nc_skb_decode_packet() - decode given skb using the decode data stored * in nc_packet - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: unicast skb to decode * @nc_packet: decode data needed to decode the skb * @@ -1719,7 +1719,7 @@ batadv_nc_skb_decode_packet(struct batadv_priv *bat_priv, struct sk_buff *skb, /** * batadv_nc_find_decoding_packet() - search through buffered decoding data to * find the data needed to decode the coded packet - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @ethhdr: pointer to the ethernet header inside the coded packet * @coded: coded packet we try to find decode data for * @@ -1793,7 +1793,7 @@ batadv_nc_find_decoding_packet(struct batadv_priv *bat_priv, static int batadv_nc_recv_coded_packet(struct sk_buff *skb, struct batadv_hard_iface *recv_if) { - struct batadv_priv *bat_priv = netdev_priv(recv_if->soft_iface); + struct batadv_priv *bat_priv = netdev_priv(recv_if->mesh_iface); struct batadv_unicast_packet *unicast_packet; struct batadv_coded_packet *coded_packet; struct batadv_nc_packet *nc_packet; @@ -1858,7 +1858,7 @@ free_skb: /** * batadv_nc_mesh_free() - clean up network coding memory - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information */ void batadv_nc_mesh_free(struct batadv_priv *bat_priv) { diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index bcc2e20e0cd6..d9cfc5c6b208 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -47,7 +47,7 @@ static struct lock_class_key batadv_orig_hash_lock_class_key; /** * batadv_orig_hash_find() - Find and return originator from orig_hash - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @data: mac address of the originator * * Return: orig_node (with increased refcnt), NULL on errors @@ -213,7 +213,7 @@ void batadv_orig_node_vlan_release(struct kref *ref) /** * batadv_originator_init() - Initialize all originator structures - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * * Return: 0 on success or negative error number in case of failure */ @@ -338,7 +338,7 @@ batadv_orig_router_get(struct batadv_orig_node *orig_node, /** * batadv_orig_to_router() - get next hop neighbor to an orig address - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @orig_addr: the originator MAC address to search the best next hop router for * @if_outgoing: the interface where the payload packet has been received or * the OGM should be sent to @@ -567,7 +567,7 @@ batadv_hardif_neigh_create(struct batadv_hard_iface *hard_iface, const u8 *neigh_addr, struct batadv_orig_node *orig_node) { - struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); + struct batadv_priv *bat_priv = netdev_priv(hard_iface->mesh_iface); struct batadv_hardif_neigh_node *hardif_neigh; spin_lock_bh(&hard_iface->neigh_list_lock); @@ -754,20 +754,20 @@ batadv_neigh_node_get_or_create(struct batadv_orig_node *orig_node, int batadv_hardif_neigh_dump(struct sk_buff *msg, struct netlink_callback *cb) { struct batadv_hard_iface *primary_if, *hard_iface; - struct net_device *soft_iface; + struct net_device *mesh_iface; struct batadv_priv *bat_priv; int ret; - soft_iface = batadv_netlink_get_softif(cb); - if (IS_ERR(soft_iface)) - return PTR_ERR(soft_iface); + mesh_iface = batadv_netlink_get_meshif(cb); + if (IS_ERR(mesh_iface)) + return PTR_ERR(mesh_iface); - bat_priv = netdev_priv(soft_iface); + bat_priv = netdev_priv(mesh_iface); primary_if = batadv_primary_if_get_selected(bat_priv); if (!primary_if || primary_if->if_status != BATADV_IF_ACTIVE) { ret = -ENOENT; - goto out_put_soft_iface; + goto out_put_mesh_iface; } hard_iface = batadv_netlink_get_hardif(bat_priv, cb); @@ -794,8 +794,8 @@ out_put_hard_iface: batadv_hardif_put(hard_iface); out_put_primary_if: batadv_hardif_put(primary_if); -out_put_soft_iface: - dev_put(soft_iface); +out_put_mesh_iface: + dev_put(mesh_iface); return ret; } @@ -892,7 +892,7 @@ void batadv_orig_node_release(struct kref *ref) /** * batadv_originator_free() - Free all originator structures - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information */ void batadv_originator_free(struct batadv_priv *bat_priv) { @@ -928,7 +928,7 @@ void batadv_originator_free(struct batadv_priv *bat_priv) /** * batadv_orig_node_new() - creates a new orig_node - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @addr: the mac address of the originator * * Creates a new originator object and initialises all the generic fields. @@ -1009,7 +1009,7 @@ free_orig_node: /** * batadv_purge_neigh_ifinfo() - purge obsolete ifinfo entries from neighbor - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @neigh: orig node which is to be checked */ static void @@ -1050,7 +1050,7 @@ batadv_purge_neigh_ifinfo(struct batadv_priv *bat_priv, /** * batadv_purge_orig_ifinfo() - purge obsolete ifinfo entries from originator - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @orig_node: orig node which is to be checked * * Return: true if any ifinfo entry was purged, false otherwise. @@ -1102,7 +1102,7 @@ batadv_purge_orig_ifinfo(struct batadv_priv *bat_priv, /** * batadv_purge_orig_neighbors() - purges neighbors from originator - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @orig_node: orig node which is to be checked * * Return: true if any neighbor was purged, false otherwise @@ -1160,7 +1160,7 @@ batadv_purge_orig_neighbors(struct batadv_priv *bat_priv, /** * batadv_find_best_neighbor() - finds the best neighbor after purging - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @orig_node: orig node which is to be checked * @if_outgoing: the interface for which the metric should be compared * @@ -1194,7 +1194,7 @@ batadv_find_best_neighbor(struct batadv_priv *bat_priv, /** * batadv_purge_orig_node() - purges obsolete information from an orig_node - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @orig_node: orig node which is to be checked * * This function checks if the orig_node or substructures of it have become @@ -1236,7 +1236,7 @@ static bool batadv_purge_orig_node(struct batadv_priv *bat_priv, if (hard_iface->if_status != BATADV_IF_ACTIVE) continue; - if (hard_iface->soft_iface != bat_priv->soft_iface) + if (hard_iface->mesh_iface != bat_priv->mesh_iface) continue; if (!kref_get_unless_zero(&hard_iface->refcount)) @@ -1258,7 +1258,7 @@ static bool batadv_purge_orig_node(struct batadv_priv *bat_priv, /** * batadv_purge_orig_ref() - Purge all outdated originators - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information */ void batadv_purge_orig_ref(struct batadv_priv *bat_priv) { @@ -1325,20 +1325,20 @@ static void batadv_purge_orig(struct work_struct *work) int batadv_orig_dump(struct sk_buff *msg, struct netlink_callback *cb) { struct batadv_hard_iface *primary_if, *hard_iface; - struct net_device *soft_iface; + struct net_device *mesh_iface; struct batadv_priv *bat_priv; int ret; - soft_iface = batadv_netlink_get_softif(cb); - if (IS_ERR(soft_iface)) - return PTR_ERR(soft_iface); + mesh_iface = batadv_netlink_get_meshif(cb); + if (IS_ERR(mesh_iface)) + return PTR_ERR(mesh_iface); - bat_priv = netdev_priv(soft_iface); + bat_priv = netdev_priv(mesh_iface); primary_if = batadv_primary_if_get_selected(bat_priv); if (!primary_if || primary_if->if_status != BATADV_IF_ACTIVE) { ret = -ENOENT; - goto out_put_soft_iface; + goto out_put_mesh_iface; } hard_iface = batadv_netlink_get_hardif(bat_priv, cb); @@ -1365,8 +1365,8 @@ out_put_hard_iface: batadv_hardif_put(hard_iface); out_put_primary_if: batadv_hardif_put(primary_if); -out_put_soft_iface: - dev_put(soft_iface); +out_put_mesh_iface: + dev_put(mesh_iface); return ret; } diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index f1061985149f..35d8c5783999 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -30,10 +30,10 @@ #include "fragmentation.h" #include "hard-interface.h" #include "log.h" +#include "mesh-interface.h" #include "network-coding.h" #include "originator.h" #include "send.h" -#include "soft-interface.h" #include "tp_meter.h" #include "translation-table.h" #include "tvlv.h" @@ -43,7 +43,7 @@ static int batadv_route_unicast_packet(struct sk_buff *skb, /** * _batadv_update_route() - set the router for this originator - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @orig_node: orig node which is to be configured * @recv_if: the receive interface for which this route is set * @neigh_node: neighbor which should be the next router @@ -106,7 +106,7 @@ static void _batadv_update_route(struct batadv_priv *bat_priv, /** * batadv_update_route() - set the router for this originator - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @orig_node: orig node which is to be configured * @recv_if: the receive interface for which this route is set * @neigh_node: neighbor which should be the next router @@ -133,7 +133,7 @@ out: /** * batadv_window_protected() - checks whether the host restarted and is in the * protection time. - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @seq_num_diff: difference between the current/received sequence number and * the last sequence number * @seq_old_max_diff: maximum age of sequence number not considered as restart @@ -207,7 +207,7 @@ bool batadv_check_management_packet(struct sk_buff *skb, /** * batadv_recv_my_icmp_packet() - receive an icmp packet locally - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: icmp packet to process * * Return: NET_RX_SUCCESS if the packet has been consumed or NET_RX_DROP @@ -338,7 +338,7 @@ out: int batadv_recv_icmp_packet(struct sk_buff *skb, struct batadv_hard_iface *recv_if) { - struct batadv_priv *bat_priv = netdev_priv(recv_if->soft_iface); + struct batadv_priv *bat_priv = netdev_priv(recv_if->mesh_iface); struct batadv_icmp_header *icmph; struct batadv_icmp_packet_rr *icmp_packet_rr; struct ethhdr *ethhdr; @@ -428,7 +428,7 @@ free_skb: /** * batadv_check_unicast_packet() - Check for malformed unicast packets - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: packet to check * @hdr_size: size of header to pull * @@ -511,7 +511,7 @@ batadv_last_bonding_replace(struct batadv_orig_node *orig_node, /** * batadv_find_router() - find a suitable router for this originator - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @orig_node: the destination node * @recv_if: pointer to interface this packet was received on * @@ -656,7 +656,7 @@ next: static int batadv_route_unicast_packet(struct sk_buff *skb, struct batadv_hard_iface *recv_if) { - struct batadv_priv *bat_priv = netdev_priv(recv_if->soft_iface); + struct batadv_priv *bat_priv = netdev_priv(recv_if->mesh_iface); struct batadv_orig_node *orig_node = NULL; struct batadv_unicast_packet *unicast_packet; struct ethhdr *ethhdr = eth_hdr(skb); @@ -727,7 +727,7 @@ free_skb: /** * batadv_reroute_unicast_packet() - update the unicast header for re-routing - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: unicast packet to process * @unicast_packet: the unicast header to be updated * @dst_addr: the payload destination @@ -879,7 +879,7 @@ static bool batadv_check_unicast_ttvn(struct batadv_priv *bat_priv, return false; /* update the header in order to let the packet be delivered to this - * node's soft interface + * node's mesh interface */ primary_if = batadv_primary_if_get_selected(bat_priv); if (!primary_if) @@ -909,7 +909,7 @@ int batadv_recv_unhandled_unicast_packet(struct sk_buff *skb, struct batadv_hard_iface *recv_if) { struct batadv_unicast_packet *unicast_packet; - struct batadv_priv *bat_priv = netdev_priv(recv_if->soft_iface); + struct batadv_priv *bat_priv = netdev_priv(recv_if->mesh_iface); int check, hdr_size = sizeof(*unicast_packet); check = batadv_check_unicast_packet(bat_priv, skb, hdr_size); @@ -938,7 +938,7 @@ free_skb: int batadv_recv_unicast_packet(struct sk_buff *skb, struct batadv_hard_iface *recv_if) { - struct batadv_priv *bat_priv = netdev_priv(recv_if->soft_iface); + struct batadv_priv *bat_priv = netdev_priv(recv_if->mesh_iface); struct batadv_unicast_packet *unicast_packet; struct batadv_unicast_4addr_packet *unicast_4addr_packet; u8 *orig_addr, *orig_addr_gw; @@ -1017,7 +1017,7 @@ int batadv_recv_unicast_packet(struct sk_buff *skb, batadv_dat_snoop_incoming_dhcp_ack(bat_priv, skb, hdr_size); - batadv_interface_rx(recv_if->soft_iface, skb, hdr_size, + batadv_interface_rx(recv_if->mesh_iface, skb, hdr_size, orig_node); rx_success: @@ -1047,7 +1047,7 @@ free_skb: int batadv_recv_unicast_tvlv(struct sk_buff *skb, struct batadv_hard_iface *recv_if) { - struct batadv_priv *bat_priv = netdev_priv(recv_if->soft_iface); + struct batadv_priv *bat_priv = netdev_priv(recv_if->mesh_iface); struct batadv_unicast_tvlv_packet *unicast_tvlv_packet; unsigned char *tvlv_buff; u16 tvlv_buff_len; @@ -1103,7 +1103,7 @@ free_skb: int batadv_recv_frag_packet(struct sk_buff *skb, struct batadv_hard_iface *recv_if) { - struct batadv_priv *bat_priv = netdev_priv(recv_if->soft_iface); + struct batadv_priv *bat_priv = netdev_priv(recv_if->mesh_iface); struct batadv_orig_node *orig_node_src = NULL; struct batadv_frag_packet *frag_packet; int ret = NET_RX_DROP; @@ -1165,7 +1165,7 @@ free_skb: int batadv_recv_bcast_packet(struct sk_buff *skb, struct batadv_hard_iface *recv_if) { - struct batadv_priv *bat_priv = netdev_priv(recv_if->soft_iface); + struct batadv_priv *bat_priv = netdev_priv(recv_if->mesh_iface); struct batadv_orig_node *orig_node = NULL; struct batadv_bcast_packet *bcast_packet; struct ethhdr *ethhdr; @@ -1255,7 +1255,7 @@ int batadv_recv_bcast_packet(struct sk_buff *skb, batadv_dat_snoop_incoming_dhcp_ack(bat_priv, skb, hdr_size); /* broadcast for me */ - batadv_interface_rx(recv_if->soft_iface, skb, hdr_size, orig_node); + batadv_interface_rx(recv_if->mesh_iface, skb, hdr_size, orig_node); rx_success: ret = NET_RX_SUCCESS; @@ -1279,14 +1279,14 @@ out: * * Parses the given, received batman-adv multicast packet. Depending on the * contents of its TVLV forwards it and/or decapsulates it to hand it to the - * soft interface. + * mesh interface. * * Return: NET_RX_DROP if the skb is not consumed, NET_RX_SUCCESS otherwise. */ int batadv_recv_mcast_packet(struct sk_buff *skb, struct batadv_hard_iface *recv_if) { - struct batadv_priv *bat_priv = netdev_priv(recv_if->soft_iface); + struct batadv_priv *bat_priv = netdev_priv(recv_if->mesh_iface); struct batadv_mcast_packet *mcast_packet; int hdr_size = sizeof(*mcast_packet); unsigned char *tvlv_buff; @@ -1329,7 +1329,7 @@ int batadv_recv_mcast_packet(struct sk_buff *skb, batadv_add_counter(bat_priv, BATADV_CNT_MCAST_RX_LOCAL_BYTES, skb->len - hdr_size); - batadv_interface_rx(bat_priv->soft_iface, skb, hdr_size, NULL); + batadv_interface_rx(bat_priv->mesh_iface, skb, hdr_size, NULL); /* skb was consumed */ skb = NULL; } diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c index 0379b126865d..735ac8077821 100644 --- a/net/batman-adv/send.c +++ b/net/batman-adv/send.c @@ -34,10 +34,10 @@ #include "gateway_client.h" #include "hard-interface.h" #include "log.h" +#include "mesh-interface.h" #include "network-coding.h" #include "originator.h" #include "routing.h" -#include "soft-interface.h" #include "translation-table.h" static void batadv_send_outstanding_bcast_packet(struct work_struct *work); @@ -68,7 +68,7 @@ int batadv_send_skb_packet(struct sk_buff *skb, struct ethhdr *ethhdr; int ret; - bat_priv = netdev_priv(hard_iface->soft_iface); + bat_priv = netdev_priv(hard_iface->mesh_iface); if (hard_iface->if_status != BATADV_IF_ACTIVE) goto send_skb_err; @@ -272,7 +272,7 @@ static bool batadv_send_skb_prepare_unicast(struct sk_buff *skb, /** * batadv_send_skb_prepare_unicast_4addr() - encapsulate an skb with a * unicast 4addr header - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: the skb containing the payload to encapsulate * @orig: the destination node * @packet_subtype: the unicast 4addr packet subtype to use @@ -314,7 +314,7 @@ out: /** * batadv_send_skb_unicast() - encapsulate and send an skb via unicast - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: payload to send * @packet_type: the batman unicast packet type to use * @packet_subtype: the unicast 4addr packet subtype (only relevant for unicast @@ -384,7 +384,7 @@ out: /** * batadv_send_skb_via_tt_generic() - send an skb via TT lookup - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: payload to send * @packet_type: the batman unicast packet type to use * @packet_subtype: the unicast 4addr packet subtype (only relevant for unicast @@ -430,7 +430,7 @@ int batadv_send_skb_via_tt_generic(struct batadv_priv *bat_priv, /** * batadv_send_skb_via_gw() - send an skb via gateway lookup - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: payload to send * @vid: the vid to be used to search the translation table * @@ -532,7 +532,7 @@ batadv_forw_packet_alloc(struct batadv_hard_iface *if_incoming, forw_packet->queue_left = queue_left; forw_packet->if_incoming = if_incoming; forw_packet->if_outgoing = if_outgoing; - forw_packet->num_packets = 0; + forw_packet->num_packets = 1; return forw_packet; @@ -695,7 +695,7 @@ static void batadv_forw_packet_queue(struct batadv_forw_packet *forw_packet, /** * batadv_forw_packet_bcast_queue() - try to queue a broadcast packet - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @forw_packet: the forwarding packet to queue * @send_time: timestamp (jiffies) when the packet is to be sent * @@ -714,7 +714,7 @@ batadv_forw_packet_bcast_queue(struct batadv_priv *bat_priv, /** * batadv_forw_packet_ogmv1_queue() - try to queue an OGMv1 packet - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @forw_packet: the forwarding packet to queue * @send_time: timestamp (jiffies) when the packet is to be sent * @@ -732,7 +732,7 @@ void batadv_forw_packet_ogmv1_queue(struct batadv_priv *bat_priv, /** * batadv_forw_bcast_packet_to_list() - queue broadcast packet for transmissions - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: broadcast packet to add * @delay: number of jiffies to wait before sending * @own_packet: true if it is a self-generated broadcast packet @@ -787,7 +787,7 @@ err: /** * batadv_forw_bcast_packet_if() - forward and queue a broadcast packet - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: broadcast packet to add * @delay: number of jiffies to wait before sending * @own_packet: true if it is a self-generated broadcast packet @@ -838,7 +838,7 @@ static int batadv_forw_bcast_packet_if(struct batadv_priv *bat_priv, /** * batadv_send_no_broadcast() - check whether (re)broadcast is necessary - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: broadcast packet to check * @own_packet: true if it is a self-generated broadcast packet * @if_out: the outgoing interface checked and considered for (re)broadcast @@ -900,7 +900,7 @@ static bool batadv_send_no_broadcast(struct batadv_priv *bat_priv, /** * __batadv_forw_bcast_packet() - forward and queue a broadcast packet - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: broadcast packet to add * @delay: number of jiffies to wait before sending * @own_packet: true if it is a self-generated broadcast packet @@ -930,7 +930,7 @@ static int __batadv_forw_bcast_packet(struct batadv_priv *bat_priv, rcu_read_lock(); list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) { - if (hard_iface->soft_iface != bat_priv->soft_iface) + if (hard_iface->mesh_iface != bat_priv->mesh_iface) continue; if (!kref_get_unless_zero(&hard_iface->refcount)) @@ -958,7 +958,7 @@ static int __batadv_forw_bcast_packet(struct batadv_priv *bat_priv, /** * batadv_forw_bcast_packet() - forward and queue a broadcast packet - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: broadcast packet to add * @delay: number of jiffies to wait before sending * @own_packet: true if it is a self-generated broadcast packet @@ -979,7 +979,7 @@ int batadv_forw_bcast_packet(struct batadv_priv *bat_priv, /** * batadv_send_bcast_packet() - send and queue a broadcast packet - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: broadcast packet to add * @delay: number of jiffies to wait before sending * @own_packet: true if it is a self-generated broadcast packet @@ -1060,7 +1060,7 @@ static void batadv_send_outstanding_bcast_packet(struct work_struct *work) delayed_work = to_delayed_work(work); forw_packet = container_of(delayed_work, struct batadv_forw_packet, delayed_work); - bat_priv = netdev_priv(forw_packet->if_incoming->soft_iface); + bat_priv = netdev_priv(forw_packet->if_incoming->mesh_iface); if (atomic_read(&bat_priv->mesh_state) == BATADV_MESH_DEACTIVATING) { dropped = true; @@ -1095,7 +1095,7 @@ out: /** * batadv_purge_outstanding_packets() - stop/purge scheduled bcast/OGMv1 packets - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @hard_iface: the hard interface to cancel and purge bcast/ogm packets on * * This method cancels and purges any broadcast and OGMv1 packet on the given diff --git a/net/batman-adv/send.h b/net/batman-adv/send.h index 08af251b765c..3415afec4a0c 100644 --- a/net/batman-adv/send.h +++ b/net/batman-adv/send.h @@ -68,7 +68,7 @@ int batadv_send_skb_via_gw(struct batadv_priv *bat_priv, struct sk_buff *skb, /** * batadv_send_skb_via_tt() - send an skb via TT lookup - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: the payload to send * @dst_hint: can be used to override the destination contained in the skb * @vid: the vid to be used to search the translation table @@ -89,7 +89,7 @@ static inline int batadv_send_skb_via_tt(struct batadv_priv *bat_priv, /** * batadv_send_skb_via_tt_4addr() - send an skb via TT lookup - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: the payload to send * @packet_subtype: the unicast 4addr packet subtype to use * @dst_hint: can be used to override the destination contained in the skb diff --git a/net/batman-adv/tp_meter.c b/net/batman-adv/tp_meter.c index 7f3dd3c393e0..9fb14e40e156 100644 --- a/net/batman-adv/tp_meter.c +++ b/net/batman-adv/tp_meter.c @@ -206,7 +206,7 @@ static void batadv_tp_update_rto(struct batadv_tp_vars *tp_vars, * batadv_tp_batctl_notify() - send client status result to client * @reason: reason for tp meter session stop * @dst: destination of tp_meter session - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @start_time: start of transmission in jiffies * @total_sent: bytes acked to the receiver * @cookie: cookie of tp_meter session @@ -238,7 +238,7 @@ static void batadv_tp_batctl_notify(enum batadv_tp_meter_reason reason, * batadv_tp_batctl_error_notify() - send client error result to client * @reason: reason for tp meter session stop * @dst: destination of tp_meter session - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @cookie: cookie of tp_meter session */ static void batadv_tp_batctl_error_notify(enum batadv_tp_meter_reason reason, @@ -251,7 +251,7 @@ static void batadv_tp_batctl_error_notify(enum batadv_tp_meter_reason reason, /** * batadv_tp_list_find() - find a tp_vars object in the global list - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @dst: the other endpoint MAC address to look for * * Look for a tp_vars object matching dst as end_point and return it after @@ -287,7 +287,7 @@ static struct batadv_tp_vars *batadv_tp_list_find(struct batadv_priv *bat_priv, /** * batadv_tp_list_find_session() - find tp_vars session object in the global * list - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @dst: the other endpoint MAC address to look for * @session: session identifier * @@ -366,7 +366,7 @@ static void batadv_tp_vars_put(struct batadv_tp_vars *tp_vars) /** * batadv_tp_sender_cleanup() - cleanup sender data and drop and timer - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @tp_vars: the private data of the current TP meter session to cleanup */ static void batadv_tp_sender_cleanup(struct batadv_priv *bat_priv, @@ -396,7 +396,7 @@ static void batadv_tp_sender_cleanup(struct batadv_priv *bat_priv, /** * batadv_tp_sender_end() - print info about ended session and inform client - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @tp_vars: the private data of the current TP meter session */ static void batadv_tp_sender_end(struct batadv_priv *bat_priv, @@ -619,7 +619,7 @@ static int batadv_tp_send_msg(struct batadv_tp_vars *tp_vars, const u8 *src, /** * batadv_tp_recv_ack() - ACK receiving function - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: the buffer containing the received packet * * Process a received TP ACK packet @@ -832,7 +832,7 @@ static int batadv_tp_send(void *arg) } /* assume that all the hard_interfaces have a correctly - * configured MTU, so use the soft_iface MTU as MSS. + * configured MTU, so use the mesh_iface MTU as MSS. * This might not be true and in that case the fragmentation * should be used. * Now, try to send the packet as it is @@ -927,7 +927,7 @@ static void batadv_tp_start_kthread(struct batadv_tp_vars *tp_vars) /** * batadv_tp_start() - start a new tp meter session - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @dst: the receiver MAC address * @test_length: test length in milliseconds * @cookie: session cookie @@ -993,7 +993,7 @@ void batadv_tp_start(struct batadv_priv *bat_priv, const u8 *dst, /* initialise the CWND to 3*MSS (Section 3.1 in RFC5681). * For batman-adv the MSS is the size of the payload received by the - * soft_interface, hence its MTU + * mesh_interface, hence its MTU */ tp_vars->cwnd = BATADV_TP_PLEN * 3; /* at the beginning initialise the SS threshold to the biggest possible @@ -1052,7 +1052,7 @@ void batadv_tp_start(struct batadv_priv *bat_priv, const u8 *dst, /** * batadv_tp_stop() - stop currently running tp meter session - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @dst: the receiver MAC address * @return_value: reason for tp meter session stop */ @@ -1141,7 +1141,7 @@ static void batadv_tp_receiver_shutdown(struct timer_list *t) /** * batadv_tp_send_ack() - send an ACK packet - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @dst: the mac address of the destination originator * @seq: the sequence number to ACK * @timestamp: the timestamp to echo back in the ACK @@ -1320,7 +1320,7 @@ static void batadv_tp_ack_unordered(struct batadv_tp_vars *tp_vars) /** * batadv_tp_init_recv() - return matching or create new receiver tp_vars - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @icmp: received icmp tp msg * * Return: corresponding tp_vars or NULL on errors @@ -1373,7 +1373,7 @@ out_unlock: /** * batadv_tp_recv_msg() - process a single data message - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: the buffer containing the received packet * * Process a received TP MSG packet @@ -1457,7 +1457,7 @@ out: /** * batadv_tp_meter_recv() - main TP Meter receiving function - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @skb: the buffer containing the received packet */ void batadv_tp_meter_recv(struct batadv_priv *bat_priv, struct sk_buff *skb) diff --git a/net/batman-adv/trace.h b/net/batman-adv/trace.h index 6b816cf1a953..7da692ec38e9 100644 --- a/net/batman-adv/trace.h +++ b/net/batman-adv/trace.h @@ -34,7 +34,7 @@ TRACE_EVENT(batadv_dbg, TP_ARGS(bat_priv, vaf), TP_STRUCT__entry( - __string(device, bat_priv->soft_iface->name) + __string(device, bat_priv->mesh_iface->name) __string(driver, KBUILD_MODNAME) __vstring(msg, vaf->fmt, vaf->va) ), diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index d4b71d34310f..4a3165920de1 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -47,9 +47,9 @@ #include "hard-interface.h" #include "hash.h" #include "log.h" +#include "mesh-interface.h" #include "netlink.h" #include "originator.h" -#include "soft-interface.h" #include "tvlv.h" static struct kmem_cache *batadv_tl_cache __read_mostly; @@ -161,7 +161,7 @@ batadv_tt_hash_find(struct batadv_hashtable *hash, const u8 *addr, /** * batadv_tt_local_hash_find() - search the local table for a given client - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @addr: the mac address of the client to look for * @vid: VLAN identifier * @@ -186,7 +186,7 @@ batadv_tt_local_hash_find(struct batadv_priv *bat_priv, const u8 *addr, /** * batadv_tt_global_hash_find() - search the global table for a given client - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @addr: the mac address of the client to look for * @vid: VLAN identifier * @@ -221,7 +221,7 @@ static void batadv_tt_local_entry_release(struct kref *ref) tt_local_entry = container_of(ref, struct batadv_tt_local_entry, common.refcount); - batadv_softif_vlan_put(tt_local_entry->vlan); + batadv_meshif_vlan_put(tt_local_entry->vlan); kfree_rcu(tt_local_entry, common.rcu); } @@ -260,7 +260,7 @@ void batadv_tt_global_entry_release(struct kref *ref) /** * batadv_tt_global_hash_count() - count the number of orig entries - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @addr: the mac address of the client to count entries for * @vid: VLAN identifier * @@ -286,28 +286,28 @@ int batadv_tt_global_hash_count(struct batadv_priv *bat_priv, /** * batadv_tt_local_size_mod() - change the size by v of the local table * identified by vid - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @vid: the VLAN identifier of the sub-table to change * @v: the amount to sum to the local table size */ static void batadv_tt_local_size_mod(struct batadv_priv *bat_priv, unsigned short vid, int v) { - struct batadv_softif_vlan *vlan; + struct batadv_meshif_vlan *vlan; - vlan = batadv_softif_vlan_get(bat_priv, vid); + vlan = batadv_meshif_vlan_get(bat_priv, vid); if (!vlan) return; atomic_add(v, &vlan->tt.num_entries); - batadv_softif_vlan_put(vlan); + batadv_meshif_vlan_put(vlan); } /** * batadv_tt_local_size_inc() - increase by one the local table size for the * given vid - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @vid: the VLAN identifier */ static void batadv_tt_local_size_inc(struct batadv_priv *bat_priv, @@ -319,7 +319,7 @@ static void batadv_tt_local_size_inc(struct batadv_priv *bat_priv, /** * batadv_tt_local_size_dec() - decrease by one the local table size for the * given vid - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @vid: the VLAN identifier */ static void batadv_tt_local_size_dec(struct batadv_priv *bat_priv, @@ -412,7 +412,7 @@ batadv_tt_orig_list_entry_put(struct batadv_tt_orig_list_entry *orig_entry) /** * batadv_tt_local_event() - store a local TT event (ADD/DEL) - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @tt_local_entry: the TT entry involved in the event * @event_flags: flags to store in the event structure */ @@ -504,7 +504,7 @@ static u16 batadv_tt_entries(u16 tt_len) /** * batadv_tt_local_table_transmit_size() - calculates the local translation * table size when transmitted over the air - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * * Return: local translation table size in bytes. */ @@ -512,11 +512,11 @@ static int batadv_tt_local_table_transmit_size(struct batadv_priv *bat_priv) { u16 num_vlan = 0; u16 tt_local_entries = 0; - struct batadv_softif_vlan *vlan; + struct batadv_meshif_vlan *vlan; int hdr_size; rcu_read_lock(); - hlist_for_each_entry_rcu(vlan, &bat_priv->softif_vlan_list, list) { + hlist_for_each_entry_rcu(vlan, &bat_priv->meshif_vlan_list, list) { num_vlan++; tt_local_entries += atomic_read(&vlan->tt.num_entries); } @@ -576,7 +576,7 @@ static void batadv_tt_global_free(struct batadv_priv *bat_priv, /** * batadv_tt_local_add() - add a new client to the local table or update an * existing client - * @soft_iface: netdev struct of the mesh interface + * @mesh_iface: netdev struct of the mesh interface * @addr: the mac address of the client to add * @vid: VLAN identifier * @ifindex: index of the interface where the client is connected to (useful to @@ -586,14 +586,14 @@ static void batadv_tt_global_free(struct batadv_priv *bat_priv, * * Return: true if the client was successfully added, false otherwise. */ -bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr, +bool batadv_tt_local_add(struct net_device *mesh_iface, const u8 *addr, unsigned short vid, int ifindex, u32 mark) { - struct batadv_priv *bat_priv = netdev_priv(soft_iface); + struct batadv_priv *bat_priv = netdev_priv(mesh_iface); struct batadv_tt_local_entry *tt_local; struct batadv_tt_global_entry *tt_global = NULL; - struct net *net = dev_net(soft_iface); - struct batadv_softif_vlan *vlan; + struct net *net = dev_net(mesh_iface); + struct batadv_meshif_vlan *vlan; struct net_device *in_dev = NULL; struct batadv_hard_iface *in_hardif = NULL; struct hlist_head *head; @@ -650,7 +650,7 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr, table_size += batadv_tt_len(1); packet_size_max = atomic_read(&bat_priv->packet_size_max); if (table_size > packet_size_max) { - net_ratelimited_function(batadv_info, soft_iface, + net_ratelimited_function(batadv_info, mesh_iface, "Local translation table size (%i) exceeds maximum packet size (%i); Ignoring new local tt entry: %pM\n", table_size, packet_size_max, addr); goto out; @@ -661,9 +661,9 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr, goto out; /* increase the refcounter of the related vlan */ - vlan = batadv_softif_vlan_get(bat_priv, vid); + vlan = batadv_meshif_vlan_get(bat_priv, vid); if (!vlan) { - net_ratelimited_function(batadv_info, soft_iface, + net_ratelimited_function(batadv_info, mesh_iface, "adding TT local entry %pM to non-existent VLAN %d\n", addr, batadv_print_vid(vid)); kmem_cache_free(batadv_tl_cache, tt_local); @@ -693,7 +693,7 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr, /* the batman interface mac and multicast addresses should never be * purged */ - if (batadv_compare_eth(addr, soft_iface->dev_addr) || + if (batadv_compare_eth(addr, mesh_iface->dev_addr) || is_multicast_ether_addr(addr)) tt_local->common.flags |= BATADV_TT_CLIENT_NOPURGE; @@ -849,7 +849,7 @@ out: /** * batadv_tt_prepare_tvlv_local_data() - allocate and prepare the TT TVLV for * this node - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @tt_data: uninitialised pointer to the address of the TVLV buffer * @tt_change: uninitialised pointer to the address of the area where the TT * changes can be stored @@ -871,7 +871,7 @@ batadv_tt_prepare_tvlv_local_data(struct batadv_priv *bat_priv, s32 *tt_len) { struct batadv_tvlv_tt_vlan_data *tt_vlan; - struct batadv_softif_vlan *vlan; + struct batadv_meshif_vlan *vlan; u16 num_vlan = 0; u16 vlan_entries = 0; u16 total_entries = 0; @@ -879,8 +879,8 @@ batadv_tt_prepare_tvlv_local_data(struct batadv_priv *bat_priv, u8 *tt_change_ptr; int change_offset; - spin_lock_bh(&bat_priv->softif_vlan_list_lock); - hlist_for_each_entry(vlan, &bat_priv->softif_vlan_list, list) { + spin_lock_bh(&bat_priv->meshif_vlan_list_lock); + hlist_for_each_entry(vlan, &bat_priv->meshif_vlan_list, list) { vlan_entries = atomic_read(&vlan->tt.num_entries); if (vlan_entries < 1) continue; @@ -909,7 +909,7 @@ batadv_tt_prepare_tvlv_local_data(struct batadv_priv *bat_priv, (*tt_data)->num_vlan = htons(num_vlan); tt_vlan = (*tt_data)->vlan_data; - hlist_for_each_entry(vlan, &bat_priv->softif_vlan_list, list) { + hlist_for_each_entry(vlan, &bat_priv->meshif_vlan_list, list) { vlan_entries = atomic_read(&vlan->tt.num_entries); if (vlan_entries < 1) continue; @@ -925,14 +925,14 @@ batadv_tt_prepare_tvlv_local_data(struct batadv_priv *bat_priv, *tt_change = (struct batadv_tvlv_tt_change *)tt_change_ptr; out: - spin_unlock_bh(&bat_priv->softif_vlan_list_lock); + spin_unlock_bh(&bat_priv->meshif_vlan_list_lock); return tvlv_len; } /** * batadv_tt_tvlv_container_update() - update the translation table tvlv * container after local tt changes have been committed - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information */ static void batadv_tt_tvlv_container_update(struct batadv_priv *bat_priv) { @@ -956,7 +956,7 @@ static void batadv_tt_tvlv_container_update(struct batadv_priv *bat_priv) * The local change history should still be cleaned up so the next * TT round can start again with a clean state. */ - if (tt_diff_len > bat_priv->soft_iface->mtu) { + if (tt_diff_len > bat_priv->mesh_iface->mtu) { tt_diff_len = 0; tt_diff_entries_num = 0; drop_changes = true; @@ -1025,7 +1025,7 @@ container_register: * @msg :Netlink message to dump into * @portid: Port making netlink request * @cb: Control block containing additional options - * @bat_priv: The bat priv with all the soft interface information + * @bat_priv: The bat priv with all the mesh interface information * @common: tt local & tt global common data * * Return: Error code, or 0 on success @@ -1037,7 +1037,7 @@ batadv_tt_local_dump_entry(struct sk_buff *msg, u32 portid, struct batadv_tt_common_entry *common) { void *hdr; - struct batadv_softif_vlan *vlan; + struct batadv_meshif_vlan *vlan; struct batadv_tt_local_entry *local; unsigned int last_seen_msecs; u32 crc; @@ -1045,13 +1045,13 @@ batadv_tt_local_dump_entry(struct sk_buff *msg, u32 portid, local = container_of(common, struct batadv_tt_local_entry, common); last_seen_msecs = jiffies_to_msecs(jiffies - local->last_seen); - vlan = batadv_softif_vlan_get(bat_priv, common->vid); + vlan = batadv_meshif_vlan_get(bat_priv, common->vid); if (!vlan) return 0; crc = vlan->tt.crc; - batadv_softif_vlan_put(vlan); + batadv_meshif_vlan_put(vlan); hdr = genlmsg_put(msg, portid, cb->nlh->nlmsg_seq, &batadv_netlink_family, NLM_F_MULTI, @@ -1084,7 +1084,7 @@ batadv_tt_local_dump_entry(struct sk_buff *msg, u32 portid, * @msg: Netlink message to dump into * @portid: Port making netlink request * @cb: Control block containing additional options - * @bat_priv: The bat priv with all the soft interface information + * @bat_priv: The bat priv with all the mesh interface information * @hash: hash to dump * @bucket: bucket index to dump * @idx_s: Number of entries to skip @@ -1130,7 +1130,7 @@ batadv_tt_local_dump_bucket(struct sk_buff *msg, u32 portid, */ int batadv_tt_local_dump(struct sk_buff *msg, struct netlink_callback *cb) { - struct net_device *soft_iface; + struct net_device *mesh_iface; struct batadv_priv *bat_priv; struct batadv_hard_iface *primary_if = NULL; struct batadv_hashtable *hash; @@ -1139,11 +1139,11 @@ int batadv_tt_local_dump(struct sk_buff *msg, struct netlink_callback *cb) int idx = cb->args[1]; int portid = NETLINK_CB(cb->skb).portid; - soft_iface = batadv_netlink_get_softif(cb); - if (IS_ERR(soft_iface)) - return PTR_ERR(soft_iface); + mesh_iface = batadv_netlink_get_meshif(cb); + if (IS_ERR(mesh_iface)) + return PTR_ERR(mesh_iface); - bat_priv = netdev_priv(soft_iface); + bat_priv = netdev_priv(mesh_iface); primary_if = batadv_primary_if_get_selected(bat_priv); if (!primary_if || primary_if->if_status != BATADV_IF_ACTIVE) { @@ -1165,7 +1165,7 @@ int batadv_tt_local_dump(struct sk_buff *msg, struct netlink_callback *cb) out: batadv_hardif_put(primary_if); - dev_put(soft_iface); + dev_put(mesh_iface); cb->args[0] = bucket; cb->args[1] = idx; @@ -1194,7 +1194,7 @@ batadv_tt_local_set_pending(struct batadv_priv *bat_priv, /** * batadv_tt_local_remove() - logically remove an entry from the local table - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @addr: the MAC address of the client to remove * @vid: VLAN identifier * @message: message to append to the log on deletion @@ -1259,7 +1259,7 @@ out: /** * batadv_tt_local_purge_list() - purge inactive tt local entries - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @head: pointer to the list containing the local tt entries * @timeout: parameter deciding whether a given tt local entry is considered * inactive or not @@ -1294,7 +1294,7 @@ static void batadv_tt_local_purge_list(struct batadv_priv *bat_priv, /** * batadv_tt_local_purge() - purge inactive tt local entries - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @timeout: parameter deciding whether a given tt local entry is considered * inactive or not */ @@ -1529,7 +1529,7 @@ out: /** * batadv_tt_global_add() - add a new TT global entry or update an existing one - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @orig_node: the originator announcing the client * @tt_addr: the mac address of the non-mesh client * @vid: VLAN identifier @@ -1702,7 +1702,7 @@ out: /** * batadv_transtable_best_orig() - Get best originator list entry from tt entry - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @tt_global_entry: global translation table entry to be analyzed * * This function assumes the caller holds rcu_read_lock(). @@ -1809,7 +1809,7 @@ batadv_tt_global_dump_subentry(struct sk_buff *msg, u32 portid, u32 seq, * @msg: Netlink message to dump into * @portid: Port making netlink request * @seq: Sequence number of netlink message - * @bat_priv: The bat priv with all the soft interface information + * @bat_priv: The bat priv with all the mesh interface information * @common: tt local & tt global common data * @sub_s: Number of entries to skip * @@ -1854,7 +1854,7 @@ batadv_tt_global_dump_entry(struct sk_buff *msg, u32 portid, u32 seq, * @msg: Netlink message to dump into * @portid: Port making netlink request * @seq: Sequence number of netlink message - * @bat_priv: The bat priv with all the soft interface information + * @bat_priv: The bat priv with all the mesh interface information * @head: Pointer to the list containing the global tt entries * @idx_s: Number of entries to skip * @sub: Number of entries to skip @@ -1897,7 +1897,7 @@ batadv_tt_global_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq, */ int batadv_tt_global_dump(struct sk_buff *msg, struct netlink_callback *cb) { - struct net_device *soft_iface; + struct net_device *mesh_iface; struct batadv_priv *bat_priv; struct batadv_hard_iface *primary_if = NULL; struct batadv_hashtable *hash; @@ -1908,11 +1908,11 @@ int batadv_tt_global_dump(struct sk_buff *msg, struct netlink_callback *cb) int sub = cb->args[2]; int portid = NETLINK_CB(cb->skb).portid; - soft_iface = batadv_netlink_get_softif(cb); - if (IS_ERR(soft_iface)) - return PTR_ERR(soft_iface); + mesh_iface = batadv_netlink_get_meshif(cb); + if (IS_ERR(mesh_iface)) + return PTR_ERR(mesh_iface); - bat_priv = netdev_priv(soft_iface); + bat_priv = netdev_priv(mesh_iface); primary_if = batadv_primary_if_get_selected(bat_priv); if (!primary_if || primary_if->if_status != BATADV_IF_ACTIVE) { @@ -1937,7 +1937,7 @@ int batadv_tt_global_dump(struct sk_buff *msg, struct netlink_callback *cb) out: batadv_hardif_put(primary_if); - dev_put(soft_iface); + dev_put(mesh_iface); cb->args[0] = bucket; cb->args[1] = idx; @@ -1990,7 +1990,7 @@ batadv_tt_global_del_orig_list(struct batadv_tt_global_entry *tt_global_entry) /** * batadv_tt_global_del_orig_node() - remove orig_node from a global tt entry - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @tt_global_entry: the global entry to remove the orig_node from * @orig_node: the originator announcing the client * @message: message to append to the log on deletion @@ -2069,7 +2069,7 @@ batadv_tt_global_del_roaming(struct batadv_priv *bat_priv, /** * batadv_tt_global_del() - remove a client from the global table - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @orig_node: an originator serving this client * @addr: the mac address of the client * @vid: VLAN identifier @@ -2134,7 +2134,7 @@ out: /** * batadv_tt_global_del_orig() - remove all the TT global entries belonging to * the given originator matching the provided vid - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @orig_node: the originator owning the entries to remove * @match_vid: the VLAN identifier to match. If negative all the entries will be * removed @@ -2305,7 +2305,7 @@ _batadv_is_ap_isolated(struct batadv_tt_local_entry *tt_local_entry, /** * batadv_transtable_search() - get the mesh destination for a given client - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @src: mac address of the source client * @addr: mac address of the destination client * @vid: VLAN identifier @@ -2364,7 +2364,7 @@ out: /** * batadv_tt_global_crc() - calculates the checksum of the local table belonging * to the given orig_node - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @orig_node: originator for which the CRC should be computed * @vid: VLAN identifier for which the CRC32 has to be computed * @@ -2458,7 +2458,7 @@ static u32 batadv_tt_global_crc(struct batadv_priv *bat_priv, /** * batadv_tt_local_crc() - calculates the checksum of the local table - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @vid: VLAN identifier for which the CRC32 has to be computed * * For details about the computation, please refer to the documentation for @@ -2593,7 +2593,7 @@ static void batadv_tt_req_purge(struct batadv_priv *bat_priv) /** * batadv_tt_req_node_new() - search and possibly create a tt_req_node object - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @orig_node: orig node this request is being issued for * * Return: the pointer to the new tt_req_node struct if no request @@ -2689,7 +2689,7 @@ static bool batadv_tt_global_valid(const void *entry_ptr, /** * batadv_tt_tvlv_generate() - fill the tvlv buff with the tt entries from the * specified tt hash - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @hash: hash table containing the tt entries * @tt_len: expected tvlv tt data buffer length in number of bytes * @tvlv_buff: pointer to the buffer to fill with the TT data @@ -2810,15 +2810,15 @@ static bool batadv_tt_global_check_crc(struct batadv_orig_node *orig_node, /** * batadv_tt_local_update_crc() - update all the local CRCs - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information */ static void batadv_tt_local_update_crc(struct batadv_priv *bat_priv) { - struct batadv_softif_vlan *vlan; + struct batadv_meshif_vlan *vlan; /* recompute the global CRC for each VLAN */ rcu_read_lock(); - hlist_for_each_entry_rcu(vlan, &bat_priv->softif_vlan_list, list) { + hlist_for_each_entry_rcu(vlan, &bat_priv->meshif_vlan_list, list) { vlan->tt.crc = batadv_tt_local_crc(bat_priv, vlan->vid); } rcu_read_unlock(); @@ -2826,7 +2826,7 @@ static void batadv_tt_local_update_crc(struct batadv_priv *bat_priv) /** * batadv_tt_global_update_crc() - update all the global CRCs for this orig_node - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @orig_node: the orig_node for which the CRCs have to be updated */ static void batadv_tt_global_update_crc(struct batadv_priv *bat_priv, @@ -2853,7 +2853,7 @@ static void batadv_tt_global_update_crc(struct batadv_priv *bat_priv, /** * batadv_send_tt_request() - send a TT Request message to a given node - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @dst_orig_node: the destination of the message * @ttvn: the version number that the source of the message is looking for * @tt_vlan: pointer to the first tvlv VLAN object to request @@ -2938,7 +2938,7 @@ out: /** * batadv_send_other_tt_response() - send reply to tt request concerning another * node's translation table - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @tt_data: tt data containing the tt request information * @req_src: mac address of tt request sender * @req_dst: mac address of tt request recipient @@ -3029,7 +3029,7 @@ static bool batadv_send_other_tt_response(struct batadv_priv *bat_priv, /* Don't send the response, if larger than fragmented packet. */ tt_len = sizeof(struct batadv_unicast_tvlv_packet) + tvlv_len; if (tt_len > atomic_read(&bat_priv->packet_size_max)) { - net_ratelimited_function(batadv_info, bat_priv->soft_iface, + net_ratelimited_function(batadv_info, bat_priv->mesh_iface, "Ignoring TT_REQUEST from %pM; Response size exceeds max packet size.\n", res_dst_orig_node->orig); goto out; @@ -3068,7 +3068,7 @@ out: /** * batadv_send_my_tt_response() - send reply to tt request concerning this * node's translation table - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @tt_data: tt data containing the tt request information * @req_src: mac address of tt request sender * @@ -3185,7 +3185,7 @@ out: /** * batadv_send_tt_response() - send reply to tt request - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @tt_data: tt data containing the tt request information * @req_src: mac address of tt request sender * @req_dst: mac address of tt request recipient @@ -3280,7 +3280,7 @@ static void batadv_tt_update_changes(struct batadv_priv *bat_priv, /** * batadv_is_my_client() - check if a client is served by the local node - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @addr: the mac address of the client to check * @vid: VLAN identifier * @@ -3309,7 +3309,7 @@ out: /** * batadv_handle_tt_response() - process incoming tt reply - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @tt_data: tt data containing the tt request information * @resp_src: mac address of tt reply sender * @num_entries: number of tt change entries appended to the tt data @@ -3397,7 +3397,7 @@ static void batadv_tt_roam_purge(struct batadv_priv *bat_priv) /** * batadv_tt_check_roam_count() - check if a client has roamed too frequently - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @client: mac address of the roaming client * * This function checks whether the client already reached the @@ -3452,7 +3452,7 @@ unlock: /** * batadv_send_roam_adv() - send a roaming advertisement message - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @client: mac address of the roaming client * @vid: VLAN identifier * @orig_node: message destination @@ -3516,8 +3516,8 @@ static void batadv_tt_purge(struct work_struct *work) } /** - * batadv_tt_free() - Free translation table of soft interface - * @bat_priv: the bat priv with all the soft interface information + * batadv_tt_free() - Free translation table of mesh interface + * @bat_priv: the bat priv with all the mesh interface information */ void batadv_tt_free(struct batadv_priv *bat_priv) { @@ -3540,7 +3540,7 @@ void batadv_tt_free(struct batadv_priv *bat_priv) /** * batadv_tt_local_set_flags() - set or unset the specified flags on the local * table and possibly count them in the TT size - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @flags: the flag to switch * @enable: whether to set or unset the flag * @count: whether to increase the TT size by the number of changed entries @@ -3626,7 +3626,7 @@ static void batadv_tt_local_purge_pending_clients(struct batadv_priv *bat_priv) /** * batadv_tt_local_commit_changes_nolock() - commit all pending local tt changes * which have been queued in the time since the last commit - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * * Caller must hold tt->commit_lock. */ @@ -3659,7 +3659,7 @@ static void batadv_tt_local_commit_changes_nolock(struct batadv_priv *bat_priv) /** * batadv_tt_local_commit_changes() - commit all pending local tt changes which * have been queued in the time since the last commit - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information */ void batadv_tt_local_commit_changes(struct batadv_priv *bat_priv) { @@ -3670,7 +3670,7 @@ void batadv_tt_local_commit_changes(struct batadv_priv *bat_priv) /** * batadv_is_ap_isolated() - Check if packet from upper layer should be dropped - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @src: source mac address of packet * @dst: destination mac address of packet * @vid: vlan id of packet @@ -3682,10 +3682,10 @@ bool batadv_is_ap_isolated(struct batadv_priv *bat_priv, u8 *src, u8 *dst, { struct batadv_tt_local_entry *tt_local_entry; struct batadv_tt_global_entry *tt_global_entry; - struct batadv_softif_vlan *vlan; + struct batadv_meshif_vlan *vlan; bool ret = false; - vlan = batadv_softif_vlan_get(bat_priv, vid); + vlan = batadv_meshif_vlan_get(bat_priv, vid); if (!vlan) return false; @@ -3707,14 +3707,14 @@ bool batadv_is_ap_isolated(struct batadv_priv *bat_priv, u8 *src, u8 *dst, local_entry_put: batadv_tt_local_entry_put(tt_local_entry); vlan_put: - batadv_softif_vlan_put(vlan); + batadv_meshif_vlan_put(vlan); return ret; } /** * batadv_tt_update_orig() - update global translation table with new tt * information received via ogms - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @orig_node: the orig_node of the ogm * @tt_buff: pointer to the first tvlv VLAN entry * @tt_num_vlan: number of tvlv VLAN entries @@ -3798,7 +3798,7 @@ request_table: /** * batadv_tt_global_client_is_roaming() - check if a client is marked as roaming - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @addr: the mac address of the client to check * @vid: VLAN identifier * @@ -3824,7 +3824,7 @@ out: /** * batadv_tt_local_client_is_roaming() - tells whether the client is roaming - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @addr: the mac address of the local client to query * @vid: VLAN identifier * @@ -3850,7 +3850,7 @@ out: /** * batadv_tt_add_temporary_global_entry() - Add temporary entry to global TT - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @orig_node: orig node which the temporary entry should be associated with * @addr: mac address of the client * @vid: VLAN id of the new temporary global translation table @@ -3883,14 +3883,14 @@ bool batadv_tt_add_temporary_global_entry(struct batadv_priv *bat_priv, /** * batadv_tt_local_resize_to_mtu() - resize the local translation table fit the * maximum packet size that can be transported through the mesh - * @soft_iface: netdev struct of the mesh interface + * @mesh_iface: netdev struct of the mesh interface * * Remove entries older than 'timeout' and half timeout if more entries need * to be removed. */ -void batadv_tt_local_resize_to_mtu(struct net_device *soft_iface) +void batadv_tt_local_resize_to_mtu(struct net_device *mesh_iface) { - struct batadv_priv *bat_priv = netdev_priv(soft_iface); + struct batadv_priv *bat_priv = netdev_priv(mesh_iface); int packet_size_max = atomic_read(&bat_priv->packet_size_max); int table_size, timeout = BATADV_TT_LOCAL_TIMEOUT / 2; bool reduced = false; @@ -3907,7 +3907,7 @@ void batadv_tt_local_resize_to_mtu(struct net_device *soft_iface) timeout /= 2; reduced = true; - net_ratelimited_function(batadv_info, soft_iface, + net_ratelimited_function(batadv_info, mesh_iface, "Forced to purge local tt entries to fit new maximum fragment MTU (%i)\n", packet_size_max); } @@ -3923,7 +3923,7 @@ void batadv_tt_local_resize_to_mtu(struct net_device *soft_iface) /** * batadv_tt_tvlv_ogm_handler_v1() - process incoming tt tvlv container - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @orig: the orig_node of the ogm * @flags: flags indicating the tvlv state (see batadv_tvlv_handler_flags) * @tvlv_value: tvlv buffer containing the gateway data @@ -3962,7 +3962,7 @@ static void batadv_tt_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv, /** * batadv_tt_tvlv_unicast_handler_v1() - process incoming (unicast) tt tvlv * container - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @src: mac address of tt tvlv sender * @dst: mac address of tt tvlv recipient * @tvlv_value: tvlv buffer containing the tt data @@ -4044,7 +4044,7 @@ static int batadv_tt_tvlv_unicast_handler_v1(struct batadv_priv *bat_priv, /** * batadv_roam_tvlv_unicast_handler_v1() - process incoming tt roam tvlv * container - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @src: mac address of tt tvlv sender * @dst: mac address of tt tvlv recipient * @tvlv_value: tvlv buffer containing the tt data @@ -4093,7 +4093,7 @@ out: /** * batadv_tt_init() - initialise the translation table internals - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * * Return: 0 on success or negative error number in case of failure. */ @@ -4131,7 +4131,7 @@ int batadv_tt_init(struct batadv_priv *bat_priv) /** * batadv_tt_global_is_isolated() - check if a client is marked as isolated - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @addr: the mac address of the client * @vid: the identifier of the VLAN where this client is connected * diff --git a/net/batman-adv/translation-table.h b/net/batman-adv/translation-table.h index d18740d9a22b..618d9dbca5ea 100644 --- a/net/batman-adv/translation-table.h +++ b/net/batman-adv/translation-table.h @@ -16,7 +16,7 @@ #include <linux/types.h> int batadv_tt_init(struct batadv_priv *bat_priv); -bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr, +bool batadv_tt_local_add(struct net_device *mesh_iface, const u8 *addr, unsigned short vid, int ifindex, u32 mark); u16 batadv_tt_local_remove(struct batadv_priv *bat_priv, const u8 *addr, unsigned short vid, @@ -45,7 +45,7 @@ bool batadv_tt_global_client_is_roaming(struct batadv_priv *bat_priv, u8 *addr, unsigned short vid); bool batadv_tt_local_client_is_roaming(struct batadv_priv *bat_priv, u8 *addr, unsigned short vid); -void batadv_tt_local_resize_to_mtu(struct net_device *soft_iface); +void batadv_tt_local_resize_to_mtu(struct net_device *mesh_iface); bool batadv_tt_add_temporary_global_entry(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node, const unsigned char *addr, diff --git a/net/batman-adv/tvlv.c b/net/batman-adv/tvlv.c index 2a583215d439..76dff1f9c559 100644 --- a/net/batman-adv/tvlv.c +++ b/net/batman-adv/tvlv.c @@ -59,7 +59,7 @@ static void batadv_tvlv_handler_put(struct batadv_tvlv_handler *tvlv_handler) /** * batadv_tvlv_handler_get() - retrieve tvlv handler from the tvlv handler list * based on the provided type and version (both need to match) - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @type: tvlv handler type to look for * @version: tvlv handler version to look for * @@ -118,7 +118,7 @@ static void batadv_tvlv_container_put(struct batadv_tvlv_container *tvlv) /** * batadv_tvlv_container_get() - retrieve tvlv container from the tvlv container * list based on the provided type and version (both need to match) - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @type: tvlv container type to look for * @version: tvlv container version to look for * @@ -152,7 +152,7 @@ batadv_tvlv_container_get(struct batadv_priv *bat_priv, u8 type, u8 version) /** * batadv_tvlv_container_list_size() - calculate the size of the tvlv container * list entries - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * * Has to be called with the appropriate locks being acquired * (tvlv.container_list_lock). @@ -177,7 +177,7 @@ static u16 batadv_tvlv_container_list_size(struct batadv_priv *bat_priv) /** * batadv_tvlv_container_remove() - remove tvlv container from the tvlv * container list - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @tvlv: the to be removed tvlv container * * Has to be called with the appropriate locks being acquired @@ -201,7 +201,7 @@ static void batadv_tvlv_container_remove(struct batadv_priv *bat_priv, /** * batadv_tvlv_container_unregister() - unregister tvlv container based on the * provided type and version (both need to match) - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @type: tvlv container type to unregister * @version: tvlv container type to unregister */ @@ -219,7 +219,7 @@ void batadv_tvlv_container_unregister(struct batadv_priv *bat_priv, /** * batadv_tvlv_container_register() - register tvlv type, version and content * to be propagated with each (primary interface) OGM - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @type: tvlv container type * @version: tvlv container version * @tvlv_value: tvlv container content @@ -297,7 +297,7 @@ static bool batadv_tvlv_realloc_packet_buff(unsigned char **packet_buff, /** * batadv_tvlv_container_ogm_append() - append tvlv container content to given * OGM packet buffer - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @packet_buff: ogm packet buffer * @packet_buff_len: ogm packet buffer size including ogm header and tvlv * content @@ -350,7 +350,7 @@ end: /** * batadv_tvlv_call_handler() - parse the given tvlv buffer to call the * appropriate handlers - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @tvlv_handler: tvlv callback function handling the tvlv content * @packet_type: indicates for which packet type the TVLV handler is called * @orig_node: orig node emitting the ogm packet @@ -421,7 +421,7 @@ static int batadv_tvlv_call_handler(struct batadv_priv *bat_priv, /** * batadv_tvlv_containers_process() - parse the given tvlv buffer to call the * appropriate handlers - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @packet_type: indicates for which packet type the TVLV handler is called * @orig_node: orig node emitting the ogm packet * @skb: the skb the TVLV handler is called for @@ -490,7 +490,7 @@ int batadv_tvlv_containers_process(struct batadv_priv *bat_priv, /** * batadv_tvlv_ogm_receive() - process an incoming ogm and call the appropriate * handlers - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @batadv_ogm_packet: ogm packet containing the tvlv containers * @orig_node: orig node emitting the ogm packet */ @@ -518,7 +518,7 @@ void batadv_tvlv_ogm_receive(struct batadv_priv *bat_priv, * batadv_tvlv_handler_register() - register tvlv handler based on the provided * type and version (both need to match) for ogm tvlv payload and/or unicast * payload - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @optr: ogm tvlv handler callback function. This function receives the orig * node, flags and the tvlv content as argument to process. * @uptr: unicast tvlv handler callback function. This function receives the @@ -583,7 +583,7 @@ void batadv_tvlv_handler_register(struct batadv_priv *bat_priv, /** * batadv_tvlv_handler_unregister() - unregister tvlv handler based on the * provided type and version (both need to match) - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @type: tvlv handler type to be unregistered * @version: tvlv handler version to be unregistered */ @@ -606,7 +606,7 @@ void batadv_tvlv_handler_unregister(struct batadv_priv *bat_priv, /** * batadv_tvlv_unicast_send() - send a unicast packet with tvlv payload to the * specified host - * @bat_priv: the bat priv with all the soft interface information + * @bat_priv: the bat priv with all the mesh interface information * @src: source mac address of the unicast packet * @dst: destination mac address of the unicast packet * @type: tvlv type diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index fe89f08533fe..0ca0fc072fc9 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -186,6 +186,9 @@ struct batadv_hard_iface { /** @net_dev: pointer to the net_device */ struct net_device *net_dev; + /** @dev_tracker: device tracker for @net_dev */ + netdevice_tracker dev_tracker; + /** @refcount: number of contexts the object is used */ struct kref refcount; @@ -196,10 +199,13 @@ struct batadv_hard_iface { struct packet_type batman_adv_ptype; /** - * @soft_iface: the batman-adv interface which uses this network + * @mesh_iface: the batman-adv interface which uses this network * interface */ - struct net_device *soft_iface; + struct net_device *mesh_iface; + + /** @meshif_dev_tracker: device tracker for @mesh_iface */ + netdevice_tracker meshif_dev_tracker; /** @rcu: struct used for freeing in an RCU-safe manner */ struct rcu_head rcu; @@ -487,7 +493,7 @@ struct batadv_orig_node { /** @hash_entry: hlist node for &batadv_priv.orig_hash */ struct hlist_node hash_entry; - /** @bat_priv: pointer to soft_iface this orig node belongs to */ + /** @bat_priv: pointer to mesh_iface this orig node belongs to */ struct batadv_priv *bat_priv; /** @bcast_seqno_lock: lock protecting bcast_bits & last_bcast_seqno */ @@ -899,13 +905,13 @@ enum batadv_counters { /** * @BATADV_CNT_MCAST_RX_LOCAL: counter for received batman-adv multicast - * packets which were forwarded to the local soft interface + * packets which were forwarded to the local mesh interface */ BATADV_CNT_MCAST_RX_LOCAL, /** * @BATADV_CNT_MCAST_RX_LOCAL_BYTES: bytes counter for received - * batman-adv multicast packets which were forwarded to the local soft + * batman-adv multicast packets which were forwarded to the local mesh * interface */ BATADV_CNT_MCAST_RX_LOCAL_BYTES, @@ -1137,29 +1143,6 @@ struct batadv_priv_bla { }; #endif -#ifdef CONFIG_BATMAN_ADV_DEBUG - -/** - * struct batadv_priv_debug_log - debug logging data - */ -struct batadv_priv_debug_log { - /** @log_buff: buffer holding the logs (ring buffer) */ - char log_buff[BATADV_LOG_BUF_LEN]; - - /** @log_start: index of next character to read */ - unsigned long log_start; - - /** @log_end: index of next character to write */ - unsigned long log_end; - - /** @lock: lock protecting log_buff, log_start & log_end */ - spinlock_t lock; - - /** @queue_wait: log reader's wait queue */ - wait_queue_head_t queue_wait; -}; -#endif - /** * struct batadv_priv_gw - per mesh interface gateway data */ @@ -1264,7 +1247,7 @@ struct batadv_mcast_mla_flags { /** @enabled: whether the multicast tvlv is currently enabled */ unsigned char enabled:1; - /** @bridged: whether the soft interface has a bridge on top */ + /** @bridged: whether the mesh interface has a bridge on top */ unsigned char bridged:1; /** @tvlv_flags: the flags we have last sent in our mcast tvlv */ @@ -1400,7 +1383,7 @@ struct batadv_priv_nc { /** * @decoding_hash: Hash table used to buffer skbs that might be needed * to decode a received coded skb. The buffer is used for 1) skbs - * arriving on the soft-interface; 2) skbs overheard on the + * arriving on the mesh-interface; 2) skbs overheard on the * hard-interface; and 3) skbs forwarded by batman-adv. */ struct batadv_hashtable *decoding_hash; @@ -1553,9 +1536,9 @@ struct batadv_tp_vars { }; /** - * struct batadv_softif_vlan - per VLAN attributes set + * struct batadv_meshif_vlan - per VLAN attributes set */ -struct batadv_softif_vlan { +struct batadv_meshif_vlan { /** @bat_priv: pointer to the mesh object */ struct batadv_priv *bat_priv; @@ -1568,7 +1551,7 @@ struct batadv_softif_vlan { /** @tt: TT private attributes (VLAN specific) */ struct batadv_vlan_tt tt; - /** @list: list node for &bat_priv.softif_vlan_list */ + /** @list: list node for &bat_priv.meshif_vlan_list */ struct hlist_node list; /** @@ -1581,7 +1564,7 @@ struct batadv_softif_vlan { }; /** - * struct batadv_priv_bat_v - B.A.T.M.A.N. V per soft-interface private data + * struct batadv_priv_bat_v - B.A.T.M.A.N. V per mesh-interface private data */ struct batadv_priv_bat_v { /** @ogm_buff: buffer holding the OGM packet */ @@ -1610,8 +1593,8 @@ struct batadv_priv { */ atomic_t mesh_state; - /** @soft_iface: net device which holds this struct as private data */ - struct net_device *soft_iface; + /** @mesh_iface: net device which holds this struct as private data */ + struct net_device *mesh_iface; /** * @mtu_set_by_user: MTU was set once by user @@ -1760,24 +1743,19 @@ struct batadv_priv { struct batadv_algo_ops *algo_ops; /** - * @softif_vlan_list: a list of softif_vlan structs, one per VLAN + * @meshif_vlan_list: a list of meshif_vlan structs, one per VLAN * created on top of the mesh interface represented by this object */ - struct hlist_head softif_vlan_list; + struct hlist_head meshif_vlan_list; - /** @softif_vlan_list_lock: lock protecting softif_vlan_list */ - spinlock_t softif_vlan_list_lock; + /** @meshif_vlan_list_lock: lock protecting meshif_vlan_list */ + spinlock_t meshif_vlan_list_lock; #ifdef CONFIG_BATMAN_ADV_BLA /** @bla: bridge loop avoidance data */ struct batadv_priv_bla bla; #endif -#ifdef CONFIG_BATMAN_ADV_DEBUG - /** @debug_log: holding debug logging relevant data */ - struct batadv_priv_debug_log *debug_log; -#endif - /** @gw: gateway data */ struct batadv_priv_gw gw; @@ -1808,7 +1786,7 @@ struct batadv_priv { #endif /* CONFIG_BATMAN_ADV_NC */ #ifdef CONFIG_BATMAN_ADV_BATMAN_V - /** @bat_v: B.A.T.M.A.N. V per soft-interface private data */ + /** @bat_v: B.A.T.M.A.N. V per mesh-interface private data */ struct batadv_priv_bat_v bat_v; #endif }; @@ -1831,7 +1809,7 @@ struct batadv_bla_backbone_gw { /** @hash_entry: hlist node for &batadv_priv_bla.backbone_hash */ struct hlist_node hash_entry; - /** @bat_priv: pointer to soft_iface this backbone gateway belongs to */ + /** @bat_priv: pointer to mesh_iface this backbone gateway belongs to */ struct batadv_priv *bat_priv; /** @lasttime: last time we heard of this backbone gw */ @@ -1936,8 +1914,8 @@ struct batadv_tt_local_entry { /** @last_seen: timestamp used for purging stale tt local entries */ unsigned long last_seen; - /** @vlan: soft-interface vlan of the entry */ - struct batadv_softif_vlan *vlan; + /** @vlan: mesh-interface vlan of the entry */ + struct batadv_meshif_vlan *vlan; }; /** @@ -2161,7 +2139,7 @@ struct batadv_forw_packet { u16 packet_len; /** @direct_link_flags: direct link flags for aggregated OGM packets */ - u32 direct_link_flags; + DECLARE_BITMAP(direct_link_flags, BATADV_MAX_AGGREGATION_PACKETS); /** @num_packets: counter for aggregated OGMv1 packets */ u8 num_packets; diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c index 3c29778171c5..f0c862091bff 100644 --- a/net/bluetooth/6lowpan.c +++ b/net/bluetooth/6lowpan.c @@ -13,6 +13,7 @@ #include <net/ipv6.h> #include <net/ip6_route.h> #include <net/addrconf.h> +#include <net/netdev_lock.h> #include <net/pkt_sched.h> #include <net/bluetooth/bluetooth.h> @@ -443,7 +444,7 @@ static int send_pkt(struct l2cap_chan *chan, struct sk_buff *skb, memset(&msg, 0, sizeof(msg)); iov_iter_kvec(&msg.msg_iter, ITER_SOURCE, &iv, 1, skb->len); - err = l2cap_chan_send(chan, &msg, skb->len); + err = l2cap_chan_send(chan, &msg, skb->len, NULL); if (err > 0) { netdev->stats.tx_bytes += err; netdev->stats.tx_packets++; diff --git a/net/bluetooth/coredump.c b/net/bluetooth/coredump.c index c18df3a08607..819eacb38762 100644 --- a/net/bluetooth/coredump.c +++ b/net/bluetooth/coredump.c @@ -240,6 +240,26 @@ static void hci_devcd_handle_pkt_pattern(struct hci_dev *hdev, bt_dev_dbg(hdev, "Failed to set pattern"); } +static void hci_devcd_dump(struct hci_dev *hdev) +{ + struct sk_buff *skb; + u32 size; + + bt_dev_dbg(hdev, "state %d", hdev->dump.state); + + size = hdev->dump.tail - hdev->dump.head; + + /* Emit a devcoredump with the available data */ + dev_coredumpv(&hdev->dev, hdev->dump.head, size, GFP_KERNEL); + + /* Send a copy to monitor as a diagnostic packet */ + skb = bt_skb_alloc(size, GFP_ATOMIC); + if (skb) { + skb_put_data(skb, hdev->dump.head, size); + hci_recv_diag(hdev, skb); + } +} + static void hci_devcd_handle_pkt_complete(struct hci_dev *hdev, struct sk_buff *skb) { @@ -256,7 +276,7 @@ static void hci_devcd_handle_pkt_complete(struct hci_dev *hdev, bt_dev_dbg(hdev, "complete with size %u (expect %zu)", dump_size, hdev->dump.alloc_size); - dev_coredumpv(&hdev->dev, hdev->dump.head, dump_size, GFP_KERNEL); + hci_devcd_dump(hdev); } static void hci_devcd_handle_pkt_abort(struct hci_dev *hdev, @@ -275,8 +295,7 @@ static void hci_devcd_handle_pkt_abort(struct hci_dev *hdev, bt_dev_dbg(hdev, "aborted with size %u (expect %zu)", dump_size, hdev->dump.alloc_size); - /* Emit a devcoredump with the available data */ - dev_coredumpv(&hdev->dev, hdev->dump.head, dump_size, GFP_KERNEL); + hci_devcd_dump(hdev); } /* Bluetooth devcoredump state machine. @@ -391,8 +410,7 @@ void hci_devcd_timeout(struct work_struct *work) bt_dev_dbg(hdev, "timeout with size %u (expect %zu)", dump_size, hdev->dump.alloc_size); - /* Emit a devcoredump with the available data */ - dev_coredumpv(&hdev->dev, hdev->dump.head, dump_size, GFP_KERNEL); + hci_devcd_dump(hdev); hci_devcd_reset(hdev); diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index d097e308a755..95972fd4c784 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -27,6 +27,7 @@ #include <linux/export.h> #include <linux/debugfs.h> +#include <linux/errqueue.h> #include <net/bluetooth/bluetooth.h> #include <net/bluetooth/hci_core.h> @@ -1002,6 +1003,7 @@ static struct hci_conn *__hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t } skb_queue_head_init(&conn->data_q); + skb_queue_head_init(&conn->tx_q.queue); INIT_LIST_HEAD(&conn->chan_list); INIT_LIST_HEAD(&conn->link_list); @@ -1155,6 +1157,7 @@ void hci_conn_del(struct hci_conn *conn) } skb_queue_purge(&conn->data_q); + skb_queue_purge(&conn->tx_q.queue); /* Remove the connection from the list and cleanup its remaining * state. This is a separate function since for some cases like @@ -3064,3 +3067,122 @@ int hci_abort_conn(struct hci_conn *conn, u8 reason) */ return hci_cmd_sync_run_once(hdev, abort_conn_sync, conn, NULL); } + +void hci_setup_tx_timestamp(struct sk_buff *skb, size_t key_offset, + const struct sockcm_cookie *sockc) +{ + struct sock *sk = skb ? skb->sk : NULL; + + /* This shall be called on a single skb of those generated by user + * sendmsg(), and only when the sendmsg() does not return error to + * user. This is required for keeping the tskey that increments here in + * sync with possible sendmsg() counting by user. + * + * Stream sockets shall set key_offset to sendmsg() length in bytes + * and call with the last fragment, others to 1 and first fragment. + */ + + if (!skb || !sockc || !sk || !key_offset) + return; + + sock_tx_timestamp(sk, sockc, &skb_shinfo(skb)->tx_flags); + + if (sockc->tsflags & SOF_TIMESTAMPING_OPT_ID && + sockc->tsflags & SOF_TIMESTAMPING_TX_RECORD_MASK) { + if (sockc->tsflags & SOCKCM_FLAG_TS_OPT_ID) { + skb_shinfo(skb)->tskey = sockc->ts_opt_id; + } else { + int key = atomic_add_return(key_offset, &sk->sk_tskey); + + skb_shinfo(skb)->tskey = key - 1; + } + } +} + +void hci_conn_tx_queue(struct hci_conn *conn, struct sk_buff *skb) +{ + struct tx_queue *comp = &conn->tx_q; + bool track = false; + + /* Emit SND now, ie. just before sending to driver */ + if (skb_shinfo(skb)->tx_flags & SKBTX_SW_TSTAMP) + __skb_tstamp_tx(skb, NULL, NULL, skb->sk, SCM_TSTAMP_SND); + + /* COMPLETION tstamp is emitted for tracked skb later in Number of + * Completed Packets event. Available only for flow controlled cases. + * + * TODO: SCO support without flowctl (needs to be done in drivers) + */ + switch (conn->type) { + case ISO_LINK: + case ACL_LINK: + case LE_LINK: + break; + case SCO_LINK: + case ESCO_LINK: + if (!hci_dev_test_flag(conn->hdev, HCI_SCO_FLOWCTL)) + return; + break; + default: + return; + } + + if (skb->sk && (skb_shinfo(skb)->tx_flags & SKBTX_COMPLETION_TSTAMP)) + track = true; + + /* If nothing is tracked, just count extra skbs at the queue head */ + if (!track && !comp->tracked) { + comp->extra++; + return; + } + + if (track) { + skb = skb_clone_sk(skb); + if (!skb) + goto count_only; + + comp->tracked++; + } else { + skb = skb_clone(skb, GFP_KERNEL); + if (!skb) + goto count_only; + } + + skb_queue_tail(&comp->queue, skb); + return; + +count_only: + /* Stop tracking skbs, and only count. This will not emit timestamps for + * the packets, but if we get here something is more seriously wrong. + */ + comp->tracked = 0; + comp->extra += skb_queue_len(&comp->queue) + 1; + skb_queue_purge(&comp->queue); +} + +void hci_conn_tx_dequeue(struct hci_conn *conn) +{ + struct tx_queue *comp = &conn->tx_q; + struct sk_buff *skb; + + /* If there are tracked skbs, the counted extra go before dequeuing real + * skbs, to keep ordering. When nothing is tracked, the ordering doesn't + * matter so dequeue real skbs first to get rid of them ASAP. + */ + if (comp->extra && (comp->tracked || skb_queue_empty(&comp->queue))) { + comp->extra--; + return; + } + + skb = skb_dequeue(&comp->queue); + if (!skb) + return; + + if (skb->sk) { + comp->tracked--; + __skb_tstamp_tx(skb, NULL, NULL, skb->sk, + SCM_TSTAMP_COMPLETION); + } + + kfree_skb(skb); +} diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 012fc107901a..5eb0600bbd03 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -3029,6 +3029,13 @@ static int hci_send_frame(struct hci_dev *hdev, struct sk_buff *skb) return 0; } +static int hci_send_conn_frame(struct hci_dev *hdev, struct hci_conn *conn, + struct sk_buff *skb) +{ + hci_conn_tx_queue(conn, skb); + return hci_send_frame(hdev, skb); +} + /* Send HCI command */ int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen, const void *param) @@ -3552,51 +3559,45 @@ static void __check_timeout(struct hci_dev *hdev, unsigned int cnt, u8 type) } /* Schedule SCO */ -static void hci_sched_sco(struct hci_dev *hdev) +static void hci_sched_sco(struct hci_dev *hdev, __u8 type) { struct hci_conn *conn; struct sk_buff *skb; - int quote; + int quote, *cnt; + unsigned int pkts = hdev->sco_pkts; - BT_DBG("%s", hdev->name); + bt_dev_dbg(hdev, "type %u", type); - if (!hci_conn_num(hdev, SCO_LINK)) + if (!hci_conn_num(hdev, type) || !pkts) return; - while (hdev->sco_cnt && (conn = hci_low_sent(hdev, SCO_LINK, "e))) { - while (quote-- && (skb = skb_dequeue(&conn->data_q))) { - BT_DBG("skb %p len %d", skb, skb->len); - hci_send_frame(hdev, skb); - - conn->sent++; - if (conn->sent == ~0) - conn->sent = 0; - } - } -} - -static void hci_sched_esco(struct hci_dev *hdev) -{ - struct hci_conn *conn; - struct sk_buff *skb; - int quote; - - BT_DBG("%s", hdev->name); - - if (!hci_conn_num(hdev, ESCO_LINK)) - return; + /* Use sco_pkts if flow control has not been enabled which will limit + * the amount of buffer sent in a row. + */ + if (!hci_dev_test_flag(hdev, HCI_SCO_FLOWCTL)) + cnt = &pkts; + else + cnt = &hdev->sco_cnt; - while (hdev->sco_cnt && (conn = hci_low_sent(hdev, ESCO_LINK, - "e))) { + while (*cnt && (conn = hci_low_sent(hdev, type, "e))) { while (quote-- && (skb = skb_dequeue(&conn->data_q))) { BT_DBG("skb %p len %d", skb, skb->len); - hci_send_frame(hdev, skb); + hci_send_conn_frame(hdev, conn, skb); conn->sent++; if (conn->sent == ~0) conn->sent = 0; + (*cnt)--; } } + + /* Rescheduled if all packets were sent and flow control is not enabled + * as there could be more packets queued that could not be sent and + * since no HCI_EV_NUM_COMP_PKTS event will be generated the reschedule + * needs to be forced. + */ + if (!pkts && !hci_dev_test_flag(hdev, HCI_SCO_FLOWCTL)) + queue_work(hdev->workqueue, &hdev->tx_work); } static void hci_sched_acl_pkt(struct hci_dev *hdev) @@ -3624,7 +3625,7 @@ static void hci_sched_acl_pkt(struct hci_dev *hdev) hci_conn_enter_active_mode(chan->conn, bt_cb(skb)->force_active); - hci_send_frame(hdev, skb); + hci_send_conn_frame(hdev, chan->conn, skb); hdev->acl_last_tx = jiffies; hdev->acl_cnt--; @@ -3632,8 +3633,8 @@ static void hci_sched_acl_pkt(struct hci_dev *hdev) chan->conn->sent++; /* Send pending SCO packets right away */ - hci_sched_sco(hdev); - hci_sched_esco(hdev); + hci_sched_sco(hdev, SCO_LINK); + hci_sched_sco(hdev, ESCO_LINK); } } @@ -3680,7 +3681,7 @@ static void hci_sched_le(struct hci_dev *hdev) skb = skb_dequeue(&chan->data_q); - hci_send_frame(hdev, skb); + hci_send_conn_frame(hdev, chan->conn, skb); hdev->le_last_tx = jiffies; (*cnt)--; @@ -3688,8 +3689,8 @@ static void hci_sched_le(struct hci_dev *hdev) chan->conn->sent++; /* Send pending SCO packets right away */ - hci_sched_sco(hdev); - hci_sched_esco(hdev); + hci_sched_sco(hdev, SCO_LINK); + hci_sched_sco(hdev, ESCO_LINK); } } @@ -3714,7 +3715,7 @@ static void hci_sched_iso(struct hci_dev *hdev) while (*cnt && (conn = hci_low_sent(hdev, ISO_LINK, "e))) { while (quote-- && (skb = skb_dequeue(&conn->data_q))) { BT_DBG("skb %p len %d", skb, skb->len); - hci_send_frame(hdev, skb); + hci_send_conn_frame(hdev, conn, skb); conn->sent++; if (conn->sent == ~0) @@ -3734,8 +3735,8 @@ static void hci_tx_work(struct work_struct *work) if (!hci_dev_test_flag(hdev, HCI_USER_CHANNEL)) { /* Schedule queues and send stuff to HCI driver */ - hci_sched_sco(hdev); - hci_sched_esco(hdev); + hci_sched_sco(hdev, SCO_LINK); + hci_sched_sco(hdev, ESCO_LINK); hci_sched_iso(hdev); hci_sched_acl(hdev); hci_sched_le(hdev); diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 903b0b52692a..1d8616f2e740 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -151,7 +151,7 @@ static u8 hci_cc_exit_periodic_inq(struct hci_dev *hdev, void *data, static u8 hci_cc_remote_name_req_cancel(struct hci_dev *hdev, void *data, struct sk_buff *skb) { - struct hci_ev_status *rp = data; + struct hci_rp_remote_name_req_cancel *rp = data; bt_dev_dbg(hdev, "status 0x%2.2x", rp->status); @@ -930,6 +930,9 @@ static u8 hci_cc_read_buffer_size(struct hci_dev *hdev, void *data, hdev->sco_pkts = 8; } + if (!read_voice_setting_capable(hdev)) + hdev->sco_pkts = 0; + hdev->acl_cnt = hdev->acl_pkts; hdev->sco_cnt = hdev->sco_pkts; @@ -4012,8 +4015,8 @@ static const struct hci_cc { HCI_CC_STATUS(HCI_OP_INQUIRY_CANCEL, hci_cc_inquiry_cancel), HCI_CC_STATUS(HCI_OP_PERIODIC_INQ, hci_cc_periodic_inq), HCI_CC_STATUS(HCI_OP_EXIT_PERIODIC_INQ, hci_cc_exit_periodic_inq), - HCI_CC_STATUS(HCI_OP_REMOTE_NAME_REQ_CANCEL, - hci_cc_remote_name_req_cancel), + HCI_CC(HCI_OP_REMOTE_NAME_REQ_CANCEL, hci_cc_remote_name_req_cancel, + sizeof(struct hci_rp_remote_name_req_cancel)), HCI_CC(HCI_OP_ROLE_DISCOVERY, hci_cc_role_discovery, sizeof(struct hci_rp_role_discovery)), HCI_CC(HCI_OP_READ_LINK_POLICY, hci_cc_read_link_policy, @@ -4412,6 +4415,7 @@ static void hci_num_comp_pkts_evt(struct hci_dev *hdev, void *data, struct hci_comp_pkts_info *info = &ev->handles[i]; struct hci_conn *conn; __u16 handle, count; + unsigned int i; handle = __le16_to_cpu(info->handle); count = __le16_to_cpu(info->count); @@ -4422,6 +4426,9 @@ static void hci_num_comp_pkts_evt(struct hci_dev *hdev, void *data, conn->sent -= count; + for (i = 0; i < count; ++i) + hci_conn_tx_dequeue(conn); + switch (conn->type) { case ACL_LINK: hdev->acl_cnt += count; @@ -4442,9 +4449,11 @@ static void hci_num_comp_pkts_evt(struct hci_dev *hdev, void *data, break; case SCO_LINK: + case ESCO_LINK: hdev->sco_cnt += count; if (hdev->sco_cnt > hdev->sco_pkts) hdev->sco_cnt = hdev->sco_pkts; + break; case ISO_LINK: @@ -6051,8 +6060,17 @@ static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr, * a LE Direct Advertising Report event. In that case it is * important to see if the address is matching the local * controller address. + * + * If local privacy is not enable the controller shall not be + * generating such event since according to its documentation it is only + * valid for filter_policy 0x02 and 0x03, but the fact that it did + * generate LE Direct Advertising Report means it is probably broken and + * won't generate any other event which can potentially break + * auto-connect logic so in case local privacy is not enable this + * ignores the direct_addr so it works as a regular report. */ - if (!hci_dev_test_flag(hdev, HCI_MESH) && direct_addr) { + if (!hci_dev_test_flag(hdev, HCI_MESH) && direct_addr && + hci_dev_test_flag(hdev, HCI_PRIVACY)) { direct_addr_type = ev_bdaddr_type(hdev, direct_addr_type, &bdaddr_resolved); @@ -6062,12 +6080,6 @@ static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr, if (!hci_bdaddr_is_rpa(direct_addr, direct_addr_type)) return; - /* If the controller is not using resolvable random - * addresses, then this report can be ignored. - */ - if (!hci_dev_test_flag(hdev, HCI_PRIVACY)) - return; - /* If the local IRK of the controller does not match * with the resolvable random address provided, then * this report can be ignored. diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index dd770ef5ec36..609b035e5c90 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -1910,7 +1910,7 @@ int hci_schedule_adv_instance_sync(struct hci_dev *hdev, u8 instance, hdev->adv_instance_timeout = timeout; queue_delayed_work(hdev->req_workqueue, &hdev->adv_instance_expire, - msecs_to_jiffies(timeout * 1000)); + secs_to_jiffies(timeout)); } /* If we're just re-scheduling the same instance again then do not @@ -3696,6 +3696,9 @@ static int hci_read_local_name_sync(struct hci_dev *hdev) /* Read Voice Setting */ static int hci_read_voice_setting_sync(struct hci_dev *hdev) { + if (!read_voice_setting_capable(hdev)) + return 0; + return __hci_cmd_sync_status(hdev, HCI_OP_READ_VOICE_SETTING, 0, NULL, HCI_CMD_TIMEOUT); } @@ -3766,6 +3769,28 @@ static int hci_write_ca_timeout_sync(struct hci_dev *hdev) sizeof(param), ¶m, HCI_CMD_TIMEOUT); } +/* Enable SCO flow control if supported */ +static int hci_write_sync_flowctl_sync(struct hci_dev *hdev) +{ + struct hci_cp_write_sync_flowctl cp; + int err; + + /* Check if the controller supports SCO and HCI_OP_WRITE_SYNC_FLOWCTL */ + if (!lmp_sco_capable(hdev) || !(hdev->commands[10] & BIT(4)) || + !test_bit(HCI_QUIRK_SYNC_FLOWCTL_SUPPORTED, &hdev->quirks)) + return 0; + + memset(&cp, 0, sizeof(cp)); + cp.enable = 0x01; + + err = __hci_cmd_sync_status(hdev, HCI_OP_WRITE_SYNC_FLOWCTL, + sizeof(cp), &cp, HCI_CMD_TIMEOUT); + if (!err) + hci_dev_set_flag(hdev, HCI_SCO_FLOWCTL); + + return err; +} + /* BR Controller init stage 2 command sequence */ static const struct hci_init_stage br_init2[] = { /* HCI_OP_READ_BUFFER_SIZE */ @@ -3784,6 +3809,8 @@ static const struct hci_init_stage br_init2[] = { HCI_INIT(hci_clear_event_filter_sync), /* HCI_OP_WRITE_CA_TIMEOUT */ HCI_INIT(hci_write_ca_timeout_sync), + /* HCI_OP_WRITE_SYNC_FLOWCTL */ + HCI_INIT(hci_write_sync_flowctl_sync), {} }; @@ -4129,7 +4156,8 @@ static int hci_read_page_scan_type_sync(struct hci_dev *hdev) * support the Read Page Scan Type command. Check support for * this command in the bit mask of supported commands. */ - if (!(hdev->commands[13] & 0x01)) + if (!(hdev->commands[13] & 0x01) || + test_bit(HCI_QUIRK_BROKEN_READ_PAGE_SCAN_TYPE, &hdev->quirks)) return 0; return __hci_cmd_sync_status(hdev, HCI_OP_READ_PAGE_SCAN_TYPE, diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c index 0cb52a3308ba..3501a991f1c6 100644 --- a/net/bluetooth/iso.c +++ b/net/bluetooth/iso.c @@ -518,7 +518,8 @@ static struct bt_iso_qos *iso_sock_get_qos(struct sock *sk) return &iso_pi(sk)->qos; } -static int iso_send_frame(struct sock *sk, struct sk_buff *skb) +static int iso_send_frame(struct sock *sk, struct sk_buff *skb, + const struct sockcm_cookie *sockc) { struct iso_conn *conn = iso_pi(sk)->conn; struct bt_iso_qos *qos = iso_sock_get_qos(sk); @@ -538,10 +539,12 @@ static int iso_send_frame(struct sock *sk, struct sk_buff *skb) hdr->slen = cpu_to_le16(hci_iso_data_len_pack(len, HCI_ISO_STATUS_VALID)); - if (sk->sk_state == BT_CONNECTED) + if (sk->sk_state == BT_CONNECTED) { + hci_setup_tx_timestamp(skb, 1, sockc); hci_send_iso(conn->hcon, skb); - else + } else { len = -ENOTCONN; + } return len; } @@ -1348,6 +1351,7 @@ static int iso_sock_sendmsg(struct socket *sock, struct msghdr *msg, { struct sock *sk = sock->sk; struct sk_buff *skb, **frag; + struct sockcm_cookie sockc; size_t mtu; int err; @@ -1360,6 +1364,14 @@ static int iso_sock_sendmsg(struct socket *sock, struct msghdr *msg, if (msg->msg_flags & MSG_OOB) return -EOPNOTSUPP; + hci_sockcm_init(&sockc, sk); + + if (msg->msg_controllen) { + err = sock_cmsg_send(sk, msg, &sockc); + if (err) + return err; + } + lock_sock(sk); if (sk->sk_state != BT_CONNECTED) { @@ -1405,7 +1417,7 @@ static int iso_sock_sendmsg(struct socket *sock, struct msghdr *msg, lock_sock(sk); if (sk->sk_state == BT_CONNECTED) - err = iso_send_frame(sk, skb); + err = iso_send_frame(sk, skb, &sockc); else err = -ENOTCONN; @@ -1474,6 +1486,10 @@ static int iso_sock_recvmsg(struct socket *sock, struct msghdr *msg, BT_DBG("sk %p", sk); + if (unlikely(flags & MSG_ERRQUEUE)) + return sock_recv_errqueue(sk, msg, len, SOL_BLUETOOTH, + BT_SCM_ERROR); + if (test_and_clear_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags)) { sock_hold(sk); lock_sock(sk); diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index c27ea70f71e1..c7b66b2ea9f2 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -282,7 +282,7 @@ static void __set_retrans_timer(struct l2cap_chan *chan) if (!delayed_work_pending(&chan->monitor_timer) && chan->retrans_timeout) { l2cap_set_timer(chan, &chan->retrans_timer, - msecs_to_jiffies(chan->retrans_timeout)); + secs_to_jiffies(chan->retrans_timeout)); } } @@ -291,7 +291,7 @@ static void __set_monitor_timer(struct l2cap_chan *chan) __clear_retrans_timer(chan); if (chan->monitor_timeout) { l2cap_set_timer(chan, &chan->monitor_timer, - msecs_to_jiffies(chan->monitor_timeout)); + secs_to_jiffies(chan->monitor_timeout)); } } @@ -2515,7 +2515,33 @@ static void l2cap_le_flowctl_send(struct l2cap_chan *chan) skb_queue_len(&chan->tx_q)); } -int l2cap_chan_send(struct l2cap_chan *chan, struct msghdr *msg, size_t len) +static void l2cap_tx_timestamp(struct sk_buff *skb, + const struct sockcm_cookie *sockc, + size_t len) +{ + struct sock *sk = skb ? skb->sk : NULL; + + if (sk && sk->sk_type == SOCK_STREAM) + hci_setup_tx_timestamp(skb, len, sockc); + else + hci_setup_tx_timestamp(skb, 1, sockc); +} + +static void l2cap_tx_timestamp_seg(struct sk_buff_head *queue, + const struct sockcm_cookie *sockc, + size_t len) +{ + struct sk_buff *skb = skb_peek(queue); + struct sock *sk = skb ? skb->sk : NULL; + + if (sk && sk->sk_type == SOCK_STREAM) + l2cap_tx_timestamp(skb_peek_tail(queue), sockc, len); + else + l2cap_tx_timestamp(skb, sockc, len); +} + +int l2cap_chan_send(struct l2cap_chan *chan, struct msghdr *msg, size_t len, + const struct sockcm_cookie *sockc) { struct sk_buff *skb; int err; @@ -2530,6 +2556,8 @@ int l2cap_chan_send(struct l2cap_chan *chan, struct msghdr *msg, size_t len) if (IS_ERR(skb)) return PTR_ERR(skb); + l2cap_tx_timestamp(skb, sockc, len); + l2cap_do_send(chan, skb); return len; } @@ -2553,6 +2581,8 @@ int l2cap_chan_send(struct l2cap_chan *chan, struct msghdr *msg, size_t len) if (err) return err; + l2cap_tx_timestamp_seg(&seg_queue, sockc, len); + skb_queue_splice_tail_init(&seg_queue, &chan->tx_q); l2cap_le_flowctl_send(chan); @@ -2574,6 +2604,8 @@ int l2cap_chan_send(struct l2cap_chan *chan, struct msghdr *msg, size_t len) if (IS_ERR(skb)) return PTR_ERR(skb); + l2cap_tx_timestamp(skb, sockc, len); + l2cap_do_send(chan, skb); err = len; break; @@ -2597,10 +2629,13 @@ int l2cap_chan_send(struct l2cap_chan *chan, struct msghdr *msg, size_t len) if (err) break; - if (chan->mode == L2CAP_MODE_ERTM) + if (chan->mode == L2CAP_MODE_ERTM) { + /* TODO: ERTM mode timestamping */ l2cap_tx(chan, NULL, &seg_queue, L2CAP_EV_DATA_REQUEST); - else + } else { + l2cap_tx_timestamp_seg(&seg_queue, sockc, len); l2cap_streaming_send(chan, &seg_queue); + } err = len; diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index acd11b268b98..5aa55fa69594 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -1106,6 +1106,7 @@ static int l2cap_sock_sendmsg(struct socket *sock, struct msghdr *msg, { struct sock *sk = sock->sk; struct l2cap_chan *chan = l2cap_pi(sk)->chan; + struct sockcm_cookie sockc; int err; BT_DBG("sock %p, sk %p", sock, sk); @@ -1120,6 +1121,14 @@ static int l2cap_sock_sendmsg(struct socket *sock, struct msghdr *msg, if (sk->sk_state != BT_CONNECTED) return -ENOTCONN; + hci_sockcm_init(&sockc, sk); + + if (msg->msg_controllen) { + err = sock_cmsg_send(sk, msg, &sockc); + if (err) + return err; + } + lock_sock(sk); err = bt_sock_wait_ready(sk, msg->msg_flags); release_sock(sk); @@ -1127,7 +1136,7 @@ static int l2cap_sock_sendmsg(struct socket *sock, struct msghdr *msg, return err; l2cap_chan_lock(chan); - err = l2cap_chan_send(chan, msg, len); + err = l2cap_chan_send(chan, msg, len, &sockc); l2cap_chan_unlock(chan); return err; @@ -1168,6 +1177,10 @@ static int l2cap_sock_recvmsg(struct socket *sock, struct msghdr *msg, struct l2cap_pinfo *pi = l2cap_pi(sk); int err; + if (unlikely(flags & MSG_ERRQUEUE)) + return sock_recv_errqueue(sk, msg, len, SOL_BLUETOOTH, + BT_SCM_ERROR); + lock_sock(sk); if (sk->sk_state == BT_CONNECT2 && test_bit(BT_SK_DEFER_SETUP, diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 621c555f639b..c1e1e529e26c 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -851,6 +851,9 @@ static u32 get_supported_settings(struct hci_dev *hdev) if (cis_peripheral_capable(hdev)) settings |= MGMT_SETTING_CIS_PERIPHERAL; + if (ll_privacy_capable(hdev)) + settings |= MGMT_SETTING_LL_PRIVACY; + settings |= MGMT_SETTING_PHY_CONFIGURATION; return settings; @@ -933,6 +936,9 @@ static u32 get_current_settings(struct hci_dev *hdev) if (sync_recv_capable(hdev)) settings |= MGMT_SETTING_ISO_SYNC_RECEIVER; + if (ll_privacy_capable(hdev)) + settings |= MGMT_SETTING_LL_PRIVACY; + return settings; } @@ -1533,7 +1539,7 @@ static void mgmt_set_discoverable_complete(struct hci_dev *hdev, void *data, if (hci_dev_test_flag(hdev, HCI_DISCOVERABLE) && hdev->discov_timeout > 0) { - int to = msecs_to_jiffies(hdev->discov_timeout * 1000); + int to = secs_to_jiffies(hdev->discov_timeout); queue_delayed_work(hdev->req_workqueue, &hdev->discov_off, to); } @@ -1641,7 +1647,7 @@ static int set_discoverable(struct sock *sk, struct hci_dev *hdev, void *data, hdev->discov_timeout = timeout; if (cp->val && hdev->discov_timeout > 0) { - int to = msecs_to_jiffies(hdev->discov_timeout * 1000); + int to = secs_to_jiffies(hdev->discov_timeout); queue_delayed_work(hdev->req_workqueue, &hdev->discov_off, to); } @@ -2534,7 +2540,7 @@ static int send_hci_cmd_sync(struct hci_dev *hdev, void *data) skb = __hci_cmd_sync_ev(hdev, le16_to_cpu(cp->opcode), le16_to_cpu(cp->params_len), cp->params, cp->event, cp->timeout ? - msecs_to_jiffies(cp->timeout * 1000) : + secs_to_jiffies(cp->timeout) : HCI_CMD_TIMEOUT); if (IS_ERR(skb)) { mgmt_cmd_status(cmd->sk, hdev->id, MGMT_OP_HCI_CMD_SYNC, @@ -5743,29 +5749,6 @@ done: return err; } -void mgmt_start_discovery_complete(struct hci_dev *hdev, u8 status) -{ - struct mgmt_pending_cmd *cmd; - - bt_dev_dbg(hdev, "status %u", status); - - hci_dev_lock(hdev); - - cmd = pending_find(MGMT_OP_START_DISCOVERY, hdev); - if (!cmd) - cmd = pending_find(MGMT_OP_START_SERVICE_DISCOVERY, hdev); - - if (!cmd) - cmd = pending_find(MGMT_OP_START_LIMITED_DISCOVERY, hdev); - - if (cmd) { - cmd->cmd_complete(cmd, mgmt_status(status)); - mgmt_pending_remove(cmd); - } - - hci_dev_unlock(hdev); -} - static bool discovery_type_is_valid(struct hci_dev *hdev, uint8_t type, uint8_t *mgmt_status) { @@ -6018,23 +6001,6 @@ failed: return err; } -void mgmt_stop_discovery_complete(struct hci_dev *hdev, u8 status) -{ - struct mgmt_pending_cmd *cmd; - - bt_dev_dbg(hdev, "status %u", status); - - hci_dev_lock(hdev); - - cmd = pending_find(MGMT_OP_STOP_DISCOVERY, hdev); - if (cmd) { - cmd->cmd_complete(cmd, mgmt_status(status)); - mgmt_pending_remove(cmd); - } - - hci_dev_unlock(hdev); -} - static void stop_discovery_complete(struct hci_dev *hdev, void *data, int err) { struct mgmt_pending_cmd *cmd = data; diff --git a/net/bluetooth/mgmt_util.c b/net/bluetooth/mgmt_util.c index 17ab909a7c07..e5ff65e424b5 100644 --- a/net/bluetooth/mgmt_util.c +++ b/net/bluetooth/mgmt_util.c @@ -229,23 +229,6 @@ struct mgmt_pending_cmd *mgmt_pending_find(unsigned short channel, u16 opcode, return NULL; } -struct mgmt_pending_cmd *mgmt_pending_find_data(unsigned short channel, - u16 opcode, - struct hci_dev *hdev, - const void *data) -{ - struct mgmt_pending_cmd *cmd; - - list_for_each_entry(cmd, &hdev->mgmt_pending, list) { - if (cmd->user_data != data) - continue; - if (cmd->opcode == opcode) - return cmd; - } - - return NULL; -} - void mgmt_pending_foreach(u16 opcode, struct hci_dev *hdev, void (*cb)(struct mgmt_pending_cmd *cmd, void *data), void *data) diff --git a/net/bluetooth/mgmt_util.h b/net/bluetooth/mgmt_util.h index bdf978605d5a..f2ba994ab1d8 100644 --- a/net/bluetooth/mgmt_util.h +++ b/net/bluetooth/mgmt_util.h @@ -54,10 +54,6 @@ int mgmt_cmd_complete(struct sock *sk, u16 index, u16 cmd, u8 status, struct mgmt_pending_cmd *mgmt_pending_find(unsigned short channel, u16 opcode, struct hci_dev *hdev); -struct mgmt_pending_cmd *mgmt_pending_find_data(unsigned short channel, - u16 opcode, - struct hci_dev *hdev, - const void *data); void mgmt_pending_foreach(u16 opcode, struct hci_dev *hdev, void (*cb)(struct mgmt_pending_cmd *cmd, void *data), void *data); diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 5d1bc0d6aee0..2945d27e75dc 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -378,7 +378,8 @@ unlock: return err; } -static int sco_send_frame(struct sock *sk, struct sk_buff *skb) +static int sco_send_frame(struct sock *sk, struct sk_buff *skb, + const struct sockcm_cookie *sockc) { struct sco_conn *conn = sco_pi(sk)->conn; int len = skb->len; @@ -389,6 +390,7 @@ static int sco_send_frame(struct sock *sk, struct sk_buff *skb) BT_DBG("sk %p len %d", sk, len); + hci_setup_tx_timestamp(skb, 1, sockc); hci_send_sco(conn->hcon, skb); return len; @@ -784,6 +786,7 @@ static int sco_sock_sendmsg(struct socket *sock, struct msghdr *msg, { struct sock *sk = sock->sk; struct sk_buff *skb; + struct sockcm_cookie sockc; int err; BT_DBG("sock %p, sk %p", sock, sk); @@ -795,6 +798,14 @@ static int sco_sock_sendmsg(struct socket *sock, struct msghdr *msg, if (msg->msg_flags & MSG_OOB) return -EOPNOTSUPP; + hci_sockcm_init(&sockc, sk); + + if (msg->msg_controllen) { + err = sock_cmsg_send(sk, msg, &sockc); + if (err) + return err; + } + skb = bt_skb_sendmsg(sk, msg, len, len, 0, 0); if (IS_ERR(skb)) return PTR_ERR(skb); @@ -802,7 +813,7 @@ static int sco_sock_sendmsg(struct socket *sock, struct msghdr *msg, lock_sock(sk); if (sk->sk_state == BT_CONNECTED) - err = sco_send_frame(sk, skb); + err = sco_send_frame(sk, skb, &sockc); else err = -ENOTCONN; @@ -868,6 +879,10 @@ static int sco_sock_recvmsg(struct socket *sock, struct msghdr *msg, struct sock *sk = sock->sk; struct sco_pinfo *pi = sco_pi(sk); + if (unlikely(flags & MSG_ERRQUEUE)) + return sock_recv_errqueue(sk, msg, len, SOL_BLUETOOTH, + BT_SCM_ERROR); + lock_sock(sk); if (sk->sk_state == BT_CONNECT2 && diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c index 8b9724fd752a..47f359f24d1f 100644 --- a/net/bluetooth/smp.c +++ b/net/bluetooth/smp.c @@ -55,7 +55,7 @@ /* Keys which are not distributed with Secure Connections */ #define SMP_SC_NO_DIST (SMP_DIST_ENC_KEY | SMP_DIST_LINK_KEY) -#define SMP_TIMEOUT msecs_to_jiffies(30000) +#define SMP_TIMEOUT secs_to_jiffies(30) #define ID_ADDR_TIMEOUT msecs_to_jiffies(200) @@ -608,7 +608,7 @@ static void smp_send_cmd(struct l2cap_conn *conn, u8 code, u16 len, void *data) iov_iter_kvec(&msg.msg_iter, ITER_SOURCE, iv, 2, 1 + len); - l2cap_chan_send(chan, &msg, 1 + len); + l2cap_chan_send(chan, &msg, 1 + len, NULL); if (!chan->data) return; diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 0ab4613aa07a..a818fdc22da9 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -16,6 +16,8 @@ #include <linux/netfilter_bridge.h> #include <linux/uaccess.h> +#include <net/netdev_lock.h> + #include "br_private.h" #define COMMON_FEATURES (NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA | \ @@ -488,7 +490,7 @@ void br_dev_setup(struct net_device *dev) SET_NETDEV_DEVTYPE(dev, &br_type); dev->priv_flags = IFF_EBRIDGE | IFF_NO_QUEUE; dev->lltx = true; - dev->netns_local = true; + dev->netns_immutable = true; dev->features = COMMON_FEATURES | NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX; diff --git a/net/bridge/br_ioctl.c b/net/bridge/br_ioctl.c index f213ed108361..6bc0a11f2ed3 100644 --- a/net/bridge/br_ioctl.c +++ b/net/bridge/br_ioctl.c @@ -394,10 +394,26 @@ static int old_deviceless(struct net *net, void __user *data) return -EOPNOTSUPP; } -int br_ioctl_stub(struct net *net, struct net_bridge *br, unsigned int cmd, - struct ifreq *ifr, void __user *uarg) +int br_ioctl_stub(struct net *net, unsigned int cmd, void __user *uarg) { int ret = -EOPNOTSUPP; + struct ifreq ifr; + + if (cmd == SIOCBRADDIF || cmd == SIOCBRDELIF) { + void __user *data; + char *colon; + + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) + return -EPERM; + + if (get_user_ifreq(&ifr, &data, uarg)) + return -EFAULT; + + ifr.ifr_name[IFNAMSIZ - 1] = 0; + colon = strchr(ifr.ifr_name, ':'); + if (colon) + *colon = 0; + } rtnl_lock(); @@ -430,7 +446,21 @@ int br_ioctl_stub(struct net *net, struct net_bridge *br, unsigned int cmd, break; case SIOCBRADDIF: case SIOCBRDELIF: - ret = add_del_if(br, ifr->ifr_ifindex, cmd == SIOCBRADDIF); + { + struct net_device *dev; + + dev = __dev_get_by_name(net, ifr.ifr_name); + if (!dev || !netif_device_present(dev)) { + ret = -ENODEV; + break; + } + if (!netif_is_bridge_master(dev)) { + ret = -EOPNOTSUPP; + break; + } + + ret = add_del_if(netdev_priv(dev), ifr.ifr_ifindex, cmd == SIOCBRADDIF); + } break; } diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index 1a52a0bca086..7e1ad229e133 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c @@ -1040,7 +1040,7 @@ static int br_mdb_add_group(const struct br_mdb_config *cfg, /* host join */ if (!port) { - if (mp->host_joined) { + if (mp->host_joined && !(cfg->nlflags & NLM_F_REPLACE)) { NL_SET_ERR_MSG_MOD(extack, "Group is already joined by host"); return -EEXIST; } diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 3e0f47203f2a..6e337937d0d7 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -1553,11 +1553,13 @@ static int br_changelink(struct net_device *brdev, struct nlattr *tb[], return 0; } -static int br_dev_newlink(struct net *src_net, struct net_device *dev, - struct nlattr *tb[], struct nlattr *data[], +static int br_dev_newlink(struct net_device *dev, + struct rtnl_newlink_params *params, struct netlink_ext_ack *extack) { struct net_bridge *br = netdev_priv(dev); + struct nlattr **data = params->data; + struct nlattr **tb = params->tb; int err; err = register_netdevice(dev); diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 1054b8a88edc..d5b3c5936a79 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -949,8 +949,7 @@ br_port_get_check_rtnl(const struct net_device *dev) /* br_ioctl.c */ int br_dev_siocdevprivate(struct net_device *dev, struct ifreq *rq, void __user *data, int cmd); -int br_ioctl_stub(struct net *net, struct net_bridge *br, unsigned int cmd, - struct ifreq *ifr, void __user *uarg); +int br_ioctl_stub(struct net *net, unsigned int cmd, void __user *uarg); /* br_multicast.c */ #ifdef CONFIG_BRIDGE_IGMP_SNOOPING diff --git a/net/caif/chnl_net.c b/net/caif/chnl_net.c index 94ad09e36df2..fa6a3c2634a8 100644 --- a/net/caif/chnl_net.c +++ b/net/caif/chnl_net.c @@ -438,10 +438,11 @@ static void caif_netlink_parms(struct nlattr *data[], } } -static int ipcaif_newlink(struct net *src_net, struct net_device *dev, - struct nlattr *tb[], struct nlattr *data[], +static int ipcaif_newlink(struct net_device *dev, + struct rtnl_newlink_params *params, struct netlink_ext_ack *extack) { + struct nlattr **data = params->data; int ret; struct chnl_net *caifdev; ASSERT_RTNL(); diff --git a/net/can/af_can.c b/net/can/af_can.c index 65230e81fa08..4c059e41c831 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -172,6 +172,8 @@ static int can_create(struct net *net, struct socket *sock, int protocol, sock_orphan(sk); sock_put(sk); sock->sk = NULL; + } else { + sock_prot_inuse_add(net, sk->sk_prot, 1); } errout: diff --git a/net/can/bcm.c b/net/can/bcm.c index 217049fa496e..0bca1b9b3f70 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -1011,13 +1011,12 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg, op->ifindex = ifindex; /* initialize uninitialized (kzalloc) structure */ - hrtimer_init(&op->timer, CLOCK_MONOTONIC, - HRTIMER_MODE_REL_SOFT); - op->timer.function = bcm_tx_timeout_handler; + hrtimer_setup(&op->timer, bcm_tx_timeout_handler, CLOCK_MONOTONIC, + HRTIMER_MODE_REL_SOFT); /* currently unused in tx_ops */ - hrtimer_init(&op->thrtimer, CLOCK_MONOTONIC, - HRTIMER_MODE_REL_SOFT); + hrtimer_setup(&op->thrtimer, hrtimer_dummy_timeout, CLOCK_MONOTONIC, + HRTIMER_MODE_REL_SOFT); /* add this bcm_op to the list of the tx_ops */ list_add(&op->list, &bo->tx_ops); @@ -1192,13 +1191,10 @@ static int bcm_rx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg, op->rx_ifindex = ifindex; /* initialize uninitialized (kzalloc) structure */ - hrtimer_init(&op->timer, CLOCK_MONOTONIC, - HRTIMER_MODE_REL_SOFT); - op->timer.function = bcm_rx_timeout_handler; - - hrtimer_init(&op->thrtimer, CLOCK_MONOTONIC, - HRTIMER_MODE_REL_SOFT); - op->thrtimer.function = bcm_rx_thr_handler; + hrtimer_setup(&op->timer, bcm_rx_timeout_handler, CLOCK_MONOTONIC, + HRTIMER_MODE_REL_SOFT); + hrtimer_setup(&op->thrtimer, bcm_rx_thr_handler, CLOCK_MONOTONIC, + HRTIMER_MODE_REL_SOFT); /* add this bcm_op to the list of the rx_ops */ list_add(&op->list, &bo->rx_ops); @@ -1625,6 +1621,7 @@ static int bcm_release(struct socket *sock) sock->sk = NULL; release_sock(sk); + sock_prot_inuse_add(net, sk->sk_prot, -1); sock_put(sk); return 0; diff --git a/net/can/isotp.c b/net/can/isotp.c index 16046931542a..1efa377f002e 100644 --- a/net/can/isotp.c +++ b/net/can/isotp.c @@ -1239,6 +1239,7 @@ static int isotp_release(struct socket *sock) sock->sk = NULL; release_sock(sk); + sock_prot_inuse_add(net, sk->sk_prot, -1); sock_put(sk); return 0; @@ -1634,12 +1635,10 @@ static int isotp_init(struct sock *sk) so->rx.buflen = ARRAY_SIZE(so->rx.sbuf); so->tx.buflen = ARRAY_SIZE(so->tx.sbuf); - hrtimer_init(&so->rxtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_SOFT); - so->rxtimer.function = isotp_rx_timer_handler; - hrtimer_init(&so->txtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_SOFT); - so->txtimer.function = isotp_tx_timer_handler; - hrtimer_init(&so->txfrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_SOFT); - so->txfrtimer.function = isotp_txfr_timer_handler; + hrtimer_setup(&so->rxtimer, isotp_rx_timer_handler, CLOCK_MONOTONIC, HRTIMER_MODE_REL_SOFT); + hrtimer_setup(&so->txtimer, isotp_tx_timer_handler, CLOCK_MONOTONIC, HRTIMER_MODE_REL_SOFT); + hrtimer_setup(&so->txfrtimer, isotp_txfr_timer_handler, CLOCK_MONOTONIC, + HRTIMER_MODE_REL_SOFT); init_waitqueue_head(&so->wait); spin_lock_init(&so->rx_lock); diff --git a/net/can/j1939/bus.c b/net/can/j1939/bus.c index 486687901602..39844f14eed8 100644 --- a/net/can/j1939/bus.c +++ b/net/can/j1939/bus.c @@ -158,8 +158,8 @@ struct j1939_ecu *j1939_ecu_create_locked(struct j1939_priv *priv, name_t name) ecu->addr = J1939_IDLE_ADDR; ecu->name = name; - hrtimer_init(&ecu->ac_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_SOFT); - ecu->ac_timer.function = j1939_ecu_timer_handler; + hrtimer_setup(&ecu->ac_timer, j1939_ecu_timer_handler, CLOCK_MONOTONIC, + HRTIMER_MODE_REL_SOFT); INIT_LIST_HEAD(&ecu->list); j1939_priv_get(priv); diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c index 9b72d118d756..fbf5c8001c9d 100644 --- a/net/can/j1939/transport.c +++ b/net/can/j1939/transport.c @@ -1511,12 +1511,8 @@ static struct j1939_session *j1939_session_new(struct j1939_priv *priv, skcb = j1939_skb_to_cb(skb); memcpy(&session->skcb, skcb, sizeof(session->skcb)); - hrtimer_init(&session->txtimer, CLOCK_MONOTONIC, - HRTIMER_MODE_REL_SOFT); - session->txtimer.function = j1939_tp_txtimer; - hrtimer_init(&session->rxtimer, CLOCK_MONOTONIC, - HRTIMER_MODE_REL_SOFT); - session->rxtimer.function = j1939_tp_rxtimer; + hrtimer_setup(&session->txtimer, j1939_tp_txtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_SOFT); + hrtimer_setup(&session->rxtimer, j1939_tp_rxtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_SOFT); netdev_dbg(priv->ndev, "%s: 0x%p: sa: %02x, da: %02x\n", __func__, session, skcb->addr.sa, skcb->addr.da); diff --git a/net/can/raw.c b/net/can/raw.c index 46e8ed9d64da..020f21430b1d 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -397,11 +397,13 @@ static int raw_release(struct socket *sock) { struct sock *sk = sock->sk; struct raw_sock *ro; + struct net *net; if (!sk) return 0; ro = raw_sk(sk); + net = sock_net(sk); spin_lock(&raw_notifier_lock); while (raw_busy_notifier == ro) { @@ -421,7 +423,7 @@ static int raw_release(struct socket *sock) raw_disable_allfilters(dev_net(ro->dev), ro->dev, sk); netdev_put(ro->dev, &ro->dev_tracker); } else { - raw_disable_allfilters(sock_net(sk), NULL, sk); + raw_disable_allfilters(net, NULL, sk); } } @@ -440,6 +442,7 @@ static int raw_release(struct socket *sock) release_sock(sk); rtnl_unlock(); + sock_prot_inuse_add(net, sk->sk_prot, -1); sock_put(sk); return 0; @@ -963,7 +966,7 @@ static int raw_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) skb->dev = dev; skb->priority = sockc.priority; - skb->mark = READ_ONCE(sk->sk_mark); + skb->mark = sockc.mark; skb->tstamp = sockc.transmit_time; skb_setup_tx_timestamp(skb, &sockc); diff --git a/net/core/Makefile b/net/core/Makefile index d9326600e289..a10c3bd96798 100644 --- a/net/core/Makefile +++ b/net/core/Makefile @@ -9,7 +9,7 @@ obj-y := sock.o request_sock.o skbuff.o datagram.o stream.o scm.o \ obj-$(CONFIG_SYSCTL) += sysctl_net_core.o -obj-y += dev.o dev_addr_lists.o dst.o netevent.o \ +obj-y += dev.o dev_api.o dev_addr_lists.o dst.o netevent.o \ neighbour.o rtnetlink.o utils.o link_watch.o filter.o \ sock_diag.o dev_ioctl.o tso.o sock_reuseport.o \ fib_notifier.o xdp.o flow_offload.o gro.o \ diff --git a/net/core/dev.c b/net/core/dev.c index 2f7f5fd9ffec..be17e0660144 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -156,9 +156,11 @@ #include <linux/pm_runtime.h> #include <linux/prandom.h> #include <linux/once_lite.h> +#include <net/netdev_lock.h> #include <net/netdev_rx_queue.h> #include <net/page_pool/types.h> #include <net/page_pool/helpers.h> +#include <net/page_pool/memory_provider.h> #include <net/rps.h> #include <linux/phy_link_topology.h> @@ -570,10 +572,18 @@ static inline void netdev_set_addr_lockdep_class(struct net_device *dev) static inline struct list_head *ptype_head(const struct packet_type *pt) { - if (pt->type == htons(ETH_P_ALL)) - return pt->dev ? &pt->dev->ptype_all : &net_hotdata.ptype_all; - else - return pt->dev ? &pt->dev->ptype_specific : + if (pt->type == htons(ETH_P_ALL)) { + if (!pt->af_packet_net && !pt->dev) + return NULL; + + return pt->dev ? &pt->dev->ptype_all : + &pt->af_packet_net->ptype_all; + } + + if (pt->dev) + return &pt->dev->ptype_specific; + + return pt->af_packet_net ? &pt->af_packet_net->ptype_specific : &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK]; } @@ -594,6 +604,9 @@ void dev_add_pack(struct packet_type *pt) { struct list_head *head = ptype_head(pt); + if (WARN_ON_ONCE(!head)) + return; + spin_lock(&ptype_lock); list_add_rcu(&pt->list, head); spin_unlock(&ptype_lock); @@ -618,6 +631,9 @@ void __dev_remove_pack(struct packet_type *pt) struct list_head *head = ptype_head(pt); struct packet_type *pt1; + if (!head) + return; + spin_lock(&ptype_lock); list_for_each_entry(pt1, head, list) { @@ -1007,7 +1023,7 @@ struct net_device *dev_get_by_napi_id(unsigned int napi_id) WARN_ON_ONCE(!rcu_read_lock_held()); - if (napi_id < MIN_NAPI_ID) + if (!napi_id_valid(napi_id)) return NULL; napi = napi_by_id(napi_id); @@ -1370,15 +1386,7 @@ static int dev_get_valid_name(struct net *net, struct net_device *dev, return ret < 0 ? ret : 0; } -/** - * dev_change_name - change name of a device - * @dev: device - * @newname: name (or format string) must be at least IFNAMSIZ - * - * Change name of a device, can pass format strings "eth%d". - * for wildcarding. - */ -int dev_change_name(struct net_device *dev, const char *newname) +int netif_change_name(struct net_device *dev, const char *newname) { struct net *net = dev_net(dev); unsigned char old_assign_type; @@ -1448,15 +1456,7 @@ rollback: return err; } -/** - * dev_set_alias - change ifalias of a device - * @dev: device - * @alias: name up to IFALIASZ - * @len: limit of bytes to copy from info - * - * Set ifalias for a device, - */ -int dev_set_alias(struct net_device *dev, const char *alias, size_t len) +int netif_set_alias(struct net_device *dev, const char *alias, size_t len) { struct dev_ifalias *new_alias = NULL; @@ -1482,7 +1482,6 @@ int dev_set_alias(struct net_device *dev, const char *alias, size_t len) return len; } -EXPORT_SYMBOL(dev_set_alias); /** * dev_get_alias - get ifalias of a device @@ -1628,6 +1627,8 @@ static int __dev_open(struct net_device *dev, struct netlink_ext_ack *extack) set_bit(__LINK_STATE_START, &dev->state); + netdev_ops_assert_locked(dev); + if (ops->ndo_validate_addr) ret = ops->ndo_validate_addr(dev); @@ -1648,20 +1649,7 @@ static int __dev_open(struct net_device *dev, struct netlink_ext_ack *extack) return ret; } -/** - * dev_open - prepare an interface for use. - * @dev: device to open - * @extack: netlink extended ack - * - * Takes a device from down to up state. The device's private open - * function is invoked and then the multicast lists are loaded. Finally - * the device is moved into the up state and a %NETDEV_UP message is - * sent to the netdev notifier chain. - * - * Calling this function on an active interface is a nop. On a failure - * a negative errno code is returned. - */ -int dev_open(struct net_device *dev, struct netlink_ext_ack *extack) +int netif_open(struct net_device *dev, struct netlink_ext_ack *extack) { int ret; @@ -1677,7 +1665,6 @@ int dev_open(struct net_device *dev, struct netlink_ext_ack *extack) return ret; } -EXPORT_SYMBOL(dev_open); static void __dev_close_many(struct list_head *head) { @@ -1715,6 +1702,9 @@ static void __dev_close_many(struct list_head *head) * We allow it to be called even after a DETACH hot-plug * event. */ + + netdev_ops_assert_locked(dev); + if (ops->ndo_stop) ops->ndo_stop(dev); @@ -1752,16 +1742,7 @@ void dev_close_many(struct list_head *head, bool unlink) } EXPORT_SYMBOL(dev_close_many); -/** - * dev_close - shutdown an interface. - * @dev: device to shutdown - * - * This function moves an active device into down state. A - * %NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device - * is then deactivated and finally a %NETDEV_DOWN is sent to the notifier - * chain. - */ -void dev_close(struct net_device *dev) +void netif_close(struct net_device *dev) { if (dev->flags & IFF_UP) { LIST_HEAD(single); @@ -1771,18 +1752,9 @@ void dev_close(struct net_device *dev) list_del(&single); } } -EXPORT_SYMBOL(dev_close); +EXPORT_SYMBOL(netif_close); - -/** - * dev_disable_lro - disable Large Receive Offload on a device - * @dev: device - * - * Disable Large Receive Offload (LRO) on a net device. Must be - * called under RTNL. This is needed if received packets may be - * forwarded to another interface. - */ -void dev_disable_lro(struct net_device *dev) +void netif_disable_lro(struct net_device *dev) { struct net_device *lower_dev; struct list_head *iter; @@ -1793,10 +1765,12 @@ void dev_disable_lro(struct net_device *dev) if (unlikely(dev->features & NETIF_F_LRO)) netdev_WARN(dev, "failed to disable LRO!\n"); - netdev_for_each_lower_dev(dev, lower_dev, iter) - dev_disable_lro(lower_dev); + netdev_for_each_lower_dev(dev, lower_dev, iter) { + netdev_lock_ops(lower_dev); + netif_disable_lro(lower_dev); + netdev_unlock_ops(lower_dev); + } } -EXPORT_SYMBOL(dev_disable_lro); /** * dev_disable_gro_hw - disable HW Generic Receive Offload on a device @@ -2481,16 +2455,21 @@ static inline bool skb_loop_sk(struct packet_type *ptype, struct sk_buff *skb) } /** - * dev_nit_active - return true if any network interface taps are in use + * dev_nit_active_rcu - return true if any network interface taps are in use + * + * The caller must hold the RCU lock * * @dev: network device to check for the presence of taps */ -bool dev_nit_active(struct net_device *dev) +bool dev_nit_active_rcu(const struct net_device *dev) { - return !list_empty(&net_hotdata.ptype_all) || + /* Callers may hold either RCU or RCU BH lock */ + WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_bh_held()); + + return !list_empty(&dev_net(dev)->ptype_all) || !list_empty(&dev->ptype_all); } -EXPORT_SYMBOL_GPL(dev_nit_active); +EXPORT_SYMBOL_GPL(dev_nit_active_rcu); /* * Support routine. Sends outgoing frames to any network @@ -2499,11 +2478,12 @@ EXPORT_SYMBOL_GPL(dev_nit_active); void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) { - struct list_head *ptype_list = &net_hotdata.ptype_all; struct packet_type *ptype, *pt_prev = NULL; + struct list_head *ptype_list; struct sk_buff *skb2 = NULL; rcu_read_lock(); + ptype_list = &dev_net_rcu(dev)->ptype_all; again: list_for_each_entry_rcu(ptype, ptype_list, list) { if (READ_ONCE(ptype->ignore_outgoing)) @@ -2547,7 +2527,7 @@ again: pt_prev = ptype; } - if (ptype_list == &net_hotdata.ptype_all) { + if (ptype_list != &dev->ptype_all) { ptype_list = &dev->ptype_all; goto again; } @@ -3150,6 +3130,7 @@ int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq) if (dev->reg_state == NETREG_REGISTERED || dev->reg_state == NETREG_UNREGISTERING) { ASSERT_RTNL(); + netdev_ops_assert_locked(dev); rc = netdev_queue_update_kobjects(dev, dev->real_num_tx_queues, txq); @@ -3180,7 +3161,6 @@ int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq) } EXPORT_SYMBOL(netif_set_real_num_tx_queues); -#ifdef CONFIG_SYSFS /** * netif_set_real_num_rx_queues - set actual number of RX queues used * @dev: Network device @@ -3200,6 +3180,7 @@ int netif_set_real_num_rx_queues(struct net_device *dev, unsigned int rxq) if (dev->reg_state == NETREG_REGISTERED) { ASSERT_RTNL(); + netdev_ops_assert_locked(dev); rc = net_rx_queue_update_kobjects(dev, dev->real_num_rx_queues, rxq); @@ -3211,7 +3192,6 @@ int netif_set_real_num_rx_queues(struct net_device *dev, unsigned int rxq) return 0; } EXPORT_SYMBOL(netif_set_real_num_rx_queues); -#endif /** * netif_set_real_num_queues - set actual number of RX and TX queues used @@ -3792,7 +3772,7 @@ static int xmit_one(struct sk_buff *skb, struct net_device *dev, unsigned int len; int rc; - if (dev_nit_active(dev)) + if (dev_nit_active_rcu(dev)) dev_queue_xmit_nit(skb, dev); len = skb->len; @@ -4568,7 +4548,8 @@ int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev) skb_reset_mac_header(skb); skb_assert_len(skb); - if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_SCHED_TSTAMP)) + if (unlikely(skb_shinfo(skb)->tx_flags & + (SKBTX_SCHED_TSTAMP | SKBTX_BPF))) __skb_tstamp_tx(skb, NULL, NULL, skb->sk, SCM_TSTAMP_SCHED); /* Disable soft irqs for various locks below. Also @@ -4770,6 +4751,11 @@ EXPORT_SYMBOL(rps_needed); struct static_key_false rfs_needed __read_mostly; EXPORT_SYMBOL(rfs_needed); +static u32 rfs_slot(u32 hash, const struct rps_dev_flow_table *flow_table) +{ + return hash_32(hash, flow_table->log); +} + static struct rps_dev_flow * set_rps_cpu(struct net_device *dev, struct sk_buff *skb, struct rps_dev_flow *rflow, u16 next_cpu) @@ -4796,7 +4782,7 @@ set_rps_cpu(struct net_device *dev, struct sk_buff *skb, flow_table = rcu_dereference(rxqueue->rps_flow_table); if (!flow_table) goto out; - flow_id = skb_get_hash(skb) & flow_table->mask; + flow_id = rfs_slot(skb_get_hash(skb), flow_table); rc = dev->netdev_ops->ndo_rx_flow_steer(dev, skb, rxq_index, flow_id); if (rc < 0) @@ -4875,7 +4861,7 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, /* OK, now we know there is a match, * we can look at the local (per receive queue) flow table */ - rflow = &flow_table->flows[hash & flow_table->mask]; + rflow = &flow_table->flows[rfs_slot(hash, flow_table)]; tcpu = rflow->cpu; /* @@ -4942,13 +4928,13 @@ bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index, rcu_read_lock(); flow_table = rcu_dereference(rxqueue->rps_flow_table); - if (flow_table && flow_id <= flow_table->mask) { + if (flow_table && flow_id < (1UL << flow_table->log)) { rflow = &flow_table->flows[flow_id]; cpu = READ_ONCE(rflow->cpu); if (READ_ONCE(rflow->filter) == filter_id && cpu < nr_cpu_ids && ((int)(READ_ONCE(per_cpu(softnet_data, cpu).input_queue_head) - READ_ONCE(rflow->last_qtail)) < - (int)(10 * flow_table->mask))) + (int)(10 << flow_table->log))) expire = false; } rcu_read_unlock(); @@ -5735,7 +5721,8 @@ another_round: if (pfmemalloc) goto skip_taps; - list_for_each_entry_rcu(ptype, &net_hotdata.ptype_all, list) { + list_for_each_entry_rcu(ptype, &dev_net_rcu(skb->dev)->ptype_all, + list) { if (pt_prev) ret = deliver_skb(skb, pt_prev, orig_dev); pt_prev = ptype; @@ -5847,6 +5834,14 @@ check_vlan_id: deliver_ptype_list_skb(skb, &pt_prev, orig_dev, type, &ptype_base[ntohs(type) & PTYPE_HASH_MASK]); + + /* orig_dev and skb->dev could belong to different netns; + * Even in such case we need to traverse only the list + * coming from skb->dev, as the ptype owner (packet socket) + * will use dev_net(skb->dev) to do namespace filtering. + */ + deliver_ptype_list_skb(skb, &pt_prev, orig_dev, type, + &dev_net_rcu(skb->dev)->ptype_specific); } deliver_ptype_list_skb(skb, &pt_prev, orig_dev, type, @@ -6057,7 +6052,7 @@ static int generic_xdp_install(struct net_device *dev, struct netdev_bpf *xdp) static_branch_dec(&generic_xdp_needed_key); } else if (new && !old) { static_branch_inc(&generic_xdp_needed_key); - dev_disable_lro(dev); + netif_disable_lro(dev); dev_disable_gro_hw(dev); } break; @@ -6187,16 +6182,18 @@ EXPORT_SYMBOL(netif_receive_skb_list); static void flush_backlog(struct work_struct *work) { struct sk_buff *skb, *tmp; + struct sk_buff_head list; struct softnet_data *sd; + __skb_queue_head_init(&list); local_bh_disable(); sd = this_cpu_ptr(&softnet_data); backlog_lock_irq_disable(sd); skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) { - if (skb->dev->reg_state == NETREG_UNREGISTERING) { + if (READ_ONCE(skb->dev->reg_state) == NETREG_UNREGISTERING) { __skb_unlink(skb, &sd->input_pkt_queue); - dev_kfree_skb_irq(skb); + __skb_queue_tail(&list, skb); rps_input_queue_head_incr(sd); } } @@ -6204,14 +6201,16 @@ static void flush_backlog(struct work_struct *work) local_lock_nested_bh(&softnet_data.process_queue_bh_lock); skb_queue_walk_safe(&sd->process_queue, skb, tmp) { - if (skb->dev->reg_state == NETREG_UNREGISTERING) { + if (READ_ONCE(skb->dev->reg_state) == NETREG_UNREGISTERING) { __skb_unlink(skb, &sd->process_queue); - kfree_skb(skb); + __skb_queue_tail(&list, skb); rps_input_queue_head_incr(sd); } } local_unlock_nested_bh(&softnet_data.process_queue_bh_lock); local_bh_enable(); + + __skb_queue_purge_reason(&list, SKB_DROP_REASON_DEV_READY); } static bool flush_required(int cpu) @@ -6475,7 +6474,7 @@ bool napi_complete_done(struct napi_struct *n, int work_done) return false; if (work_done) { - if (n->gro_bitmask) + if (n->gro.bitmask) timeout = napi_get_gro_flush_timeout(n); n->defer_hard_irqs_count = napi_get_defer_hard_irqs(n); } @@ -6485,15 +6484,14 @@ bool napi_complete_done(struct napi_struct *n, int work_done) if (timeout) ret = false; } - if (n->gro_bitmask) { - /* When the NAPI instance uses a timeout and keeps postponing - * it, we need to bound somehow the time packets are kept in - * the GRO layer - */ - napi_gro_flush(n, !!timeout); - } - gro_normal_list(n); + /* + * When the NAPI instance uses a timeout and keeps postponing + * it, we need to bound somehow the time packets are kept in + * the GRO layer. + */ + gro_flush(&n->gro, !!timeout); + gro_normal_list(&n->gro); if (unlikely(!list_empty(&n->poll_list))) { /* If n->poll_list is not empty, we need to mask irqs */ @@ -6557,19 +6555,15 @@ static void skb_defer_free_flush(struct softnet_data *sd) static void __busy_poll_stop(struct napi_struct *napi, bool skip_schedule) { if (!skip_schedule) { - gro_normal_list(napi); + gro_normal_list(&napi->gro); __napi_schedule(napi); return; } - if (napi->gro_bitmask) { - /* flush too old packets - * If HZ < 1000, flush all packets. - */ - napi_gro_flush(napi, HZ >= 1000); - } + /* Flush too old packets. If HZ < 1000, flush all packets */ + gro_flush(&napi->gro, HZ >= 1000); + gro_normal_list(&napi->gro); - gro_normal_list(napi); clear_bit(NAPI_STATE_SCHED, &napi->state); } @@ -6676,7 +6670,7 @@ restart: } work = napi_poll(napi, budget); trace_napi_poll(napi, work, budget); - gro_normal_list(napi); + gro_normal_list(&napi->gro); count: if (work > 0) __NET_ADD_STATS(dev_net(napi->dev), @@ -6776,6 +6770,8 @@ void napi_resume_irqs(unsigned int napi_id) static void __napi_hash_add_with_id(struct napi_struct *napi, unsigned int napi_id) { + napi->gro.cached_napi_id = napi_id; + WRITE_ONCE(napi->napi_id, napi_id); hlist_add_head_rcu(&napi->napi_hash_node, &napi_hash[napi->napi_id % HASH_SIZE(napi_hash)]); @@ -6803,7 +6799,7 @@ static void napi_hash_add(struct napi_struct *napi) /* 0..NR_CPUS range is reserved for sender_cpu use */ do { - if (unlikely(++napi_gen_id < MIN_NAPI_ID)) + if (unlikely(!napi_id_valid(++napi_gen_id))) napi_gen_id = MIN_NAPI_ID; } while (napi_by_id(napi_gen_id)); @@ -6844,17 +6840,6 @@ static enum hrtimer_restart napi_watchdog(struct hrtimer *timer) return HRTIMER_NORESTART; } -static void init_gro_hash(struct napi_struct *napi) -{ - int i; - - for (i = 0; i < GRO_HASH_BUCKETS; i++) { - INIT_LIST_HEAD(&napi->gro_hash[i].list); - napi->gro_hash[i].count = 0; - } - napi->gro_bitmask = 0; -} - int dev_set_threaded(struct net_device *dev, bool threaded) { struct napi_struct *napi; @@ -6916,8 +6901,7 @@ void netif_queue_set_napi(struct net_device *dev, unsigned int queue_index, if (WARN_ON_ONCE(napi && !napi->dev)) return; - if (dev->reg_state >= NETREG_REGISTERED) - ASSERT_RTNL(); + netdev_ops_assert_locked_or_invisible(dev); switch (type) { case NETDEV_QUEUE_TYPE_RX: @@ -6934,11 +6918,175 @@ void netif_queue_set_napi(struct net_device *dev, unsigned int queue_index, } EXPORT_SYMBOL(netif_queue_set_napi); +static void +netif_napi_irq_notify(struct irq_affinity_notify *notify, + const cpumask_t *mask) +{ + struct napi_struct *napi = + container_of(notify, struct napi_struct, notify); +#ifdef CONFIG_RFS_ACCEL + struct cpu_rmap *rmap = napi->dev->rx_cpu_rmap; + int err; +#endif + + if (napi->config && napi->dev->irq_affinity_auto) + cpumask_copy(&napi->config->affinity_mask, mask); + +#ifdef CONFIG_RFS_ACCEL + if (napi->dev->rx_cpu_rmap_auto) { + err = cpu_rmap_update(rmap, napi->napi_rmap_idx, mask); + if (err) + netdev_warn(napi->dev, "RMAP update failed (%d)\n", + err); + } +#endif +} + +#ifdef CONFIG_RFS_ACCEL +static void netif_napi_affinity_release(struct kref *ref) +{ + struct napi_struct *napi = + container_of(ref, struct napi_struct, notify.kref); + struct cpu_rmap *rmap = napi->dev->rx_cpu_rmap; + + netdev_assert_locked(napi->dev); + WARN_ON(test_and_clear_bit(NAPI_STATE_HAS_NOTIFIER, + &napi->state)); + + if (!napi->dev->rx_cpu_rmap_auto) + return; + rmap->obj[napi->napi_rmap_idx] = NULL; + napi->napi_rmap_idx = -1; + cpu_rmap_put(rmap); +} + +int netif_enable_cpu_rmap(struct net_device *dev, unsigned int num_irqs) +{ + if (dev->rx_cpu_rmap_auto) + return 0; + + dev->rx_cpu_rmap = alloc_irq_cpu_rmap(num_irqs); + if (!dev->rx_cpu_rmap) + return -ENOMEM; + + dev->rx_cpu_rmap_auto = true; + return 0; +} +EXPORT_SYMBOL(netif_enable_cpu_rmap); + +static void netif_del_cpu_rmap(struct net_device *dev) +{ + struct cpu_rmap *rmap = dev->rx_cpu_rmap; + + if (!dev->rx_cpu_rmap_auto) + return; + + /* Free the rmap */ + cpu_rmap_put(rmap); + dev->rx_cpu_rmap = NULL; + dev->rx_cpu_rmap_auto = false; +} + +#else +static void netif_napi_affinity_release(struct kref *ref) +{ +} + +int netif_enable_cpu_rmap(struct net_device *dev, unsigned int num_irqs) +{ + return 0; +} +EXPORT_SYMBOL(netif_enable_cpu_rmap); + +static void netif_del_cpu_rmap(struct net_device *dev) +{ +} +#endif + +void netif_set_affinity_auto(struct net_device *dev) +{ + unsigned int i, maxqs, numa; + + maxqs = max(dev->num_tx_queues, dev->num_rx_queues); + numa = dev_to_node(&dev->dev); + + for (i = 0; i < maxqs; i++) + cpumask_set_cpu(cpumask_local_spread(i, numa), + &dev->napi_config[i].affinity_mask); + + dev->irq_affinity_auto = true; +} +EXPORT_SYMBOL(netif_set_affinity_auto); + +void netif_napi_set_irq_locked(struct napi_struct *napi, int irq) +{ + int rc; + + netdev_assert_locked_or_invisible(napi->dev); + + if (napi->irq == irq) + return; + + /* Remove existing resources */ + if (test_and_clear_bit(NAPI_STATE_HAS_NOTIFIER, &napi->state)) + irq_set_affinity_notifier(napi->irq, NULL); + + napi->irq = irq; + if (irq < 0 || + (!napi->dev->rx_cpu_rmap_auto && !napi->dev->irq_affinity_auto)) + return; + + /* Abort for buggy drivers */ + if (napi->dev->irq_affinity_auto && WARN_ON_ONCE(!napi->config)) + return; + +#ifdef CONFIG_RFS_ACCEL + if (napi->dev->rx_cpu_rmap_auto) { + rc = cpu_rmap_add(napi->dev->rx_cpu_rmap, napi); + if (rc < 0) + return; + + cpu_rmap_get(napi->dev->rx_cpu_rmap); + napi->napi_rmap_idx = rc; + } +#endif + + /* Use core IRQ notifier */ + napi->notify.notify = netif_napi_irq_notify; + napi->notify.release = netif_napi_affinity_release; + rc = irq_set_affinity_notifier(irq, &napi->notify); + if (rc) { + netdev_warn(napi->dev, "Unable to set IRQ notifier (%d)\n", + rc); + goto put_rmap; + } + + set_bit(NAPI_STATE_HAS_NOTIFIER, &napi->state); + return; + +put_rmap: +#ifdef CONFIG_RFS_ACCEL + if (napi->dev->rx_cpu_rmap_auto) { + napi->dev->rx_cpu_rmap->obj[napi->napi_rmap_idx] = NULL; + cpu_rmap_put(napi->dev->rx_cpu_rmap); + napi->napi_rmap_idx = -1; + } +#endif + napi->notify.notify = NULL; + napi->notify.release = NULL; +} +EXPORT_SYMBOL(netif_napi_set_irq_locked); + static void napi_restore_config(struct napi_struct *n) { n->defer_hard_irqs = n->config->defer_hard_irqs; n->gro_flush_timeout = n->config->gro_flush_timeout; n->irq_suspend_timeout = n->config->irq_suspend_timeout; + + if (n->dev->irq_affinity_auto && + test_bit(NAPI_STATE_HAS_NOTIFIER, &n->state)) + irq_set_affinity(n->irq, &n->config->affinity_mask); + /* a NAPI ID might be stored in the config, if so use it. if not, use * napi_hash_add to generate one for us. */ @@ -6974,7 +7122,7 @@ netif_napi_dev_list_add(struct net_device *dev, struct napi_struct *napi) higher = &dev->napi_list; list_for_each_entry(pos, &dev->napi_list, dev_list) { - if (pos->napi_id >= MIN_NAPI_ID) + if (napi_id_valid(pos->napi_id)) pos_id = pos->napi_id; else if (pos->config) pos_id = pos->config->napi_id; @@ -7016,12 +7164,9 @@ void netif_napi_add_weight_locked(struct net_device *dev, INIT_LIST_HEAD(&napi->poll_list); INIT_HLIST_NODE(&napi->napi_hash_node); - hrtimer_init(&napi->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED); - napi->timer.function = napi_watchdog; - init_gro_hash(napi); + hrtimer_setup(&napi->timer, napi_watchdog, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED); + gro_init(&napi->gro); napi->skb = NULL; - INIT_LIST_HEAD(&napi->rx_list); - napi->rx_count = 0; napi->poll = poll; if (weight > NAPI_POLL_WEIGHT) netdev_err_once(dev, "%s() called with weight %d\n", __func__, @@ -7135,19 +7280,6 @@ void napi_enable(struct napi_struct *n) } EXPORT_SYMBOL(napi_enable); -static void flush_gro_hash(struct napi_struct *napi) -{ - int i; - - for (i = 0; i < GRO_HASH_BUCKETS; i++) { - struct sk_buff *skb, *n; - - list_for_each_entry_safe(skb, n, &napi->gro_hash[i].list, list) - kfree_skb(skb); - napi->gro_hash[i].count = 0; - } -} - /* Must be called in process context */ void __netif_napi_del_locked(struct napi_struct *napi) { @@ -7156,6 +7288,12 @@ void __netif_napi_del_locked(struct napi_struct *napi) if (!test_and_clear_bit(NAPI_STATE_LISTED, &napi->state)) return; + /* Make sure NAPI is disabled (or was never enabled). */ + WARN_ON(!test_bit(NAPI_STATE_SCHED, &napi->state)); + + if (test_and_clear_bit(NAPI_STATE_HAS_NOTIFIER, &napi->state)) + irq_set_affinity_notifier(napi->irq, NULL); + if (napi->config) { napi->index = -1; napi->config = NULL; @@ -7164,8 +7302,7 @@ void __netif_napi_del_locked(struct napi_struct *napi) list_del_rcu(&napi->dev_list); napi_free_frags(napi); - flush_gro_hash(napi); - napi->gro_bitmask = 0; + gro_cleanup(&napi->gro); if (napi->thread) { kthread_stop(napi->thread); @@ -7224,14 +7361,9 @@ static int __napi_poll(struct napi_struct *n, bool *repoll) return work; } - if (n->gro_bitmask) { - /* flush too old packets - * If HZ < 1000, flush all packets. - */ - napi_gro_flush(n, HZ >= 1000); - } - - gro_normal_list(n); + /* Flush too old packets. If HZ < 1000, flush all packets */ + gro_flush(&n->gro, HZ >= 1000); + gro_normal_list(&n->gro); /* Some drivers may have called napi_schedule * prior to exhausting their budget. @@ -9090,7 +9222,7 @@ int dev_set_promiscuity(struct net_device *dev, int inc) } EXPORT_SYMBOL(dev_set_promiscuity); -static int __dev_set_allmulti(struct net_device *dev, int inc, bool notify) +int netif_set_allmulti(struct net_device *dev, int inc, bool notify) { unsigned int old_flags = dev->flags, old_gflags = dev->gflags; unsigned int allmulti, flags; @@ -9125,25 +9257,6 @@ static int __dev_set_allmulti(struct net_device *dev, int inc, bool notify) return 0; } -/** - * dev_set_allmulti - update allmulti count on a device - * @dev: device - * @inc: modifier - * - * Add or remove reception of all multicast frames to a device. While the - * count in the device remains above zero the interface remains listening - * to all interfaces. Once it hits zero the device reverts back to normal - * filtering operation. A negative @inc value is used to drop the counter - * when releasing a resource needing all multicasts. - * Return 0 if successful or a negative errno code on error. - */ - -int dev_set_allmulti(struct net_device *dev, int inc) -{ - return __dev_set_allmulti(dev, inc, true); -} -EXPORT_SYMBOL(dev_set_allmulti); - /* * Upload unicast and multicast address lists to device and * configure RX filtering. When the device doesn't support unicast @@ -9259,7 +9372,7 @@ int __dev_change_flags(struct net_device *dev, unsigned int flags, if ((flags ^ dev->gflags) & IFF_PROMISC) { int inc = (flags & IFF_PROMISC) ? 1 : -1; - unsigned int old_flags = dev->flags; + old_flags = dev->flags; dev->gflags ^= IFF_PROMISC; @@ -9276,7 +9389,7 @@ int __dev_change_flags(struct net_device *dev, unsigned int flags, int inc = (flags & IFF_ALLMULTI) ? 1 : -1; dev->gflags ^= IFF_ALLMULTI; - __dev_set_allmulti(dev, inc, false); + netif_set_allmulti(dev, inc, false); } return ret; @@ -9311,17 +9424,8 @@ void __dev_notify_flags(struct net_device *dev, unsigned int old_flags, } } -/** - * dev_change_flags - change device settings - * @dev: device - * @flags: device state flags - * @extack: netlink extended ack - * - * Change settings on device based state flags. The flags are - * in the userspace exported format. - */ -int dev_change_flags(struct net_device *dev, unsigned int flags, - struct netlink_ext_ack *extack) +int netif_change_flags(struct net_device *dev, unsigned int flags, + struct netlink_ext_ack *extack) { int ret; unsigned int changes, old_flags = dev->flags, old_gflags = dev->gflags; @@ -9334,7 +9438,6 @@ int dev_change_flags(struct net_device *dev, unsigned int flags, __dev_notify_flags(dev, old_flags, changes, 0, NULL); return ret; } -EXPORT_SYMBOL(dev_change_flags); int __dev_set_mtu(struct net_device *dev, int new_mtu) { @@ -9366,15 +9469,15 @@ int dev_validate_mtu(struct net_device *dev, int new_mtu, } /** - * dev_set_mtu_ext - Change maximum transfer unit + * netif_set_mtu_ext - Change maximum transfer unit * @dev: device * @new_mtu: new transfer unit * @extack: netlink extended ack * * Change the maximum transfer size of the network device. */ -int dev_set_mtu_ext(struct net_device *dev, int new_mtu, - struct netlink_ext_ack *extack) +int netif_set_mtu_ext(struct net_device *dev, int new_mtu, + struct netlink_ext_ack *extack) { int err, orig_mtu; @@ -9412,25 +9515,20 @@ int dev_set_mtu_ext(struct net_device *dev, int new_mtu, return err; } -int dev_set_mtu(struct net_device *dev, int new_mtu) +int netif_set_mtu(struct net_device *dev, int new_mtu) { struct netlink_ext_ack extack; int err; memset(&extack, 0, sizeof(extack)); - err = dev_set_mtu_ext(dev, new_mtu, &extack); + err = netif_set_mtu_ext(dev, new_mtu, &extack); if (err && extack._msg) net_err_ratelimited("%s: %s\n", dev->name, extack._msg); return err; } -EXPORT_SYMBOL(dev_set_mtu); +EXPORT_SYMBOL(netif_set_mtu); -/** - * dev_change_tx_queue_len - Change TX queue length of a netdevice - * @dev: device - * @new_len: new tx queue length - */ -int dev_change_tx_queue_len(struct net_device *dev, unsigned long new_len) +int netif_change_tx_queue_len(struct net_device *dev, unsigned long new_len) { unsigned int orig_len = dev->tx_queue_len; int res; @@ -9457,12 +9555,7 @@ err_rollback: return res; } -/** - * dev_set_group - Change group this device belongs to - * @dev: device - * @new_group: group this device should belong to - */ -void dev_set_group(struct net_device *dev, int new_group) +void netif_set_group(struct net_device *dev, int new_group) { dev->group = new_group; } @@ -9488,16 +9581,8 @@ int dev_pre_changeaddr_notify(struct net_device *dev, const char *addr, } EXPORT_SYMBOL(dev_pre_changeaddr_notify); -/** - * dev_set_mac_address - Change Media Access Control Address - * @dev: device - * @sa: new address - * @extack: netlink extended ack - * - * Change the hardware (MAC) address of the device - */ -int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa, - struct netlink_ext_ack *extack) +int netif_set_mac_address(struct net_device *dev, struct sockaddr *sa, + struct netlink_ext_ack *extack) { const struct net_device_ops *ops = dev->netdev_ops; int err; @@ -9521,22 +9606,9 @@ int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa, add_device_randomness(dev->dev_addr, dev->addr_len); return 0; } -EXPORT_SYMBOL(dev_set_mac_address); DECLARE_RWSEM(dev_addr_sem); -int dev_set_mac_address_user(struct net_device *dev, struct sockaddr *sa, - struct netlink_ext_ack *extack) -{ - int ret; - - down_write(&dev_addr_sem); - ret = dev_set_mac_address(dev, sa, extack); - up_write(&dev_addr_sem); - return ret; -} -EXPORT_SYMBOL(dev_set_mac_address_user); - int dev_get_mac_address(struct sockaddr *sa, struct net *net, char *dev_name) { size_t size = sizeof(sa->sa_data_min); @@ -9565,14 +9637,7 @@ unlock: } EXPORT_SYMBOL(dev_get_mac_address); -/** - * dev_change_carrier - Change device carrier - * @dev: device - * @new_carrier: new value - * - * Change device carrier - */ -int dev_change_carrier(struct net_device *dev, bool new_carrier) +int netif_change_carrier(struct net_device *dev, bool new_carrier) { const struct net_device_ops *ops = dev->netdev_ops; @@ -9683,13 +9748,7 @@ bool netdev_port_same_parent_id(struct net_device *a, struct net_device *b) } EXPORT_SYMBOL(netdev_port_same_parent_id); -/** - * dev_change_proto_down - set carrier according to proto_down. - * - * @dev: device - * @proto_down: new value - */ -int dev_change_proto_down(struct net_device *dev, bool proto_down) +int netif_change_proto_down(struct net_device *dev, bool proto_down) { if (!dev->change_proto_down) return -EOPNOTSUPP; @@ -9704,14 +9763,14 @@ int dev_change_proto_down(struct net_device *dev, bool proto_down) } /** - * dev_change_proto_down_reason - proto down reason + * netdev_change_proto_down_reason_locked - proto down reason * * @dev: device * @mask: proto down mask * @value: proto down value */ -void dev_change_proto_down_reason(struct net_device *dev, unsigned long mask, - u32 value) +void netdev_change_proto_down_reason_locked(struct net_device *dev, + unsigned long mask, u32 value) { u32 proto_down_reason; int b; @@ -9800,7 +9859,7 @@ u8 dev_xdp_sb_prog_count(struct net_device *dev) return count; } -int dev_xdp_propagate(struct net_device *dev, struct netdev_bpf *bpf) +int netif_xdp_propagate(struct net_device *dev, struct netdev_bpf *bpf) { if (!dev->netdev_ops->ndo_bpf) return -EOPNOTSUPP; @@ -9820,7 +9879,6 @@ int dev_xdp_propagate(struct net_device *dev, struct netdev_bpf *bpf) return dev->netdev_ops->ndo_bpf(dev, bpf); } -EXPORT_SYMBOL_GPL(dev_xdp_propagate); u32 dev_xdp_prog_id(struct net_device *dev, enum bpf_xdp_mode mode) { @@ -9850,6 +9908,8 @@ static int dev_xdp_install(struct net_device *dev, enum bpf_xdp_mode mode, struct netdev_bpf xdp; int err; + netdev_ops_assert_locked(dev); + if (dev->cfg->hds_config == ETHTOOL_TCP_DATA_SPLIT_ENABLED && prog && !prog->aux->xdp_has_frags) { NL_SET_ERR_MSG(extack, "unable to install XDP to device using tcp-data-split"); @@ -10082,7 +10142,9 @@ static void bpf_xdp_link_release(struct bpf_link *link) * already NULL, in which case link was already auto-detached */ if (xdp_link->dev) { + netdev_lock_ops(xdp_link->dev); WARN_ON(dev_xdp_detach_link(xdp_link->dev, NULL, xdp_link)); + netdev_unlock_ops(xdp_link->dev); xdp_link->dev = NULL; } @@ -10164,10 +10226,12 @@ static int bpf_xdp_link_update(struct bpf_link *link, struct bpf_prog *new_prog, goto out_unlock; } + netdev_lock_ops(xdp_link->dev); mode = dev_xdp_mode(xdp_link->dev, xdp_link->flags); bpf_op = dev_xdp_bpf_op(xdp_link->dev, mode); err = dev_xdp_install(xdp_link->dev, mode, bpf_op, NULL, xdp_link->flags, new_prog); + netdev_unlock_ops(xdp_link->dev); if (err) goto out_unlock; @@ -10293,7 +10357,7 @@ u32 dev_get_min_mp_channel_count(const struct net_device *dev) { int i; - ASSERT_RTNL(); + netdev_ops_assert_locked(dev); for (i = dev->real_num_rx_queues - 1; i >= 0; i--) if (dev->_rx[i].mp_params.mp_priv) @@ -10519,6 +10583,7 @@ int __netdev_update_features(struct net_device *dev) int err = -1; ASSERT_RTNL(); + netdev_ops_assert_locked(dev); features = netdev_get_wanted_features(dev); @@ -10952,7 +11017,9 @@ int register_netdevice(struct net_device *dev) if (ret) goto err_uninit_notify; + netdev_lock_ops(dev); __netdev_update_features(dev); + netdev_unlock_ops(dev); /* * Default initial state at registry is that the @@ -11197,9 +11264,8 @@ void netdev_run_todo(void) list_replace_init(&net_unlink_list, &unlink_list); while (!list_empty(&unlink_list)) { - struct net_device *dev = list_first_entry(&unlink_list, - struct net_device, - unlink_list); + dev = list_first_entry(&unlink_list, struct net_device, + unlink_list); list_del_init(&dev->unlink_list); dev->nested_level = dev->lower_level - 1; } @@ -11709,6 +11775,8 @@ void free_netdev(struct net_device *dev) netdev_napi_exit(dev); + netif_del_cpu_rmap(dev); + ref_tracker_dir_exit(&dev->refcnt_tracker); #ifdef CONFIG_PCPU_DEV_REFCNT free_percpu(dev->pcpu_refcnt); @@ -11823,6 +11891,19 @@ void unregister_netdevice_queue(struct net_device *dev, struct list_head *head) } EXPORT_SYMBOL(unregister_netdevice_queue); +static void dev_memory_provider_uninstall(struct net_device *dev) +{ + unsigned int i; + + for (i = 0; i < dev->real_num_rx_queues; i++) { + struct netdev_rx_queue *rxq = &dev->_rx[i]; + struct pp_memory_provider_params *p = &rxq->mp_params; + + if (p->mp_ops && p->mp_ops->uninstall) + p->mp_ops->uninstall(rxq->mp_params.mp_priv, rxq); + } +} + void unregister_netdevice_many_notify(struct list_head *head, u32 portid, const struct nlmsghdr *nlh) { @@ -11854,11 +11935,14 @@ void unregister_netdevice_many_notify(struct list_head *head, } /* If device is running, close it first. */ - list_for_each_entry(dev, head, unreg_list) + list_for_each_entry(dev, head, unreg_list) { list_add_tail(&dev->close_list, &close_head); + netdev_lock_ops(dev); + } dev_close_many(&close_head, true); list_for_each_entry(dev, head, unreg_list) { + netdev_unlock_ops(dev); /* And unlink it from device chain. */ unlist_netdevice(dev); netdev_lock(dev); @@ -11875,9 +11959,11 @@ void unregister_netdevice_many_notify(struct list_head *head, /* Shutdown queueing discipline. */ dev_shutdown(dev); dev_tcx_uninstall(dev); + netdev_lock_ops(dev); dev_xdp_uninstall(dev); + dev_memory_provider_uninstall(dev); + netdev_unlock_ops(dev); bpf_dev_bound_netdev_unregister(dev); - dev_dmabuf_uninstall(dev); netdev_offload_xstats_disable_all(dev); @@ -11971,24 +12057,9 @@ void unregister_netdev(struct net_device *dev) } EXPORT_SYMBOL(unregister_netdev); -/** - * __dev_change_net_namespace - move device to different nethost namespace - * @dev: device - * @net: network namespace - * @pat: If not NULL name pattern to try if the current device name - * is already taken in the destination network namespace. - * @new_ifindex: If not zero, specifies device index in the target - * namespace. - * - * This function shuts down a device interface and moves it - * to a new network namespace. On success 0 is returned, on - * a failure a netagive errno code is returned. - * - * Callers must hold the rtnl semaphore. - */ - -int __dev_change_net_namespace(struct net_device *dev, struct net *net, - const char *pat, int new_ifindex) +int netif_change_net_namespace(struct net_device *dev, struct net *net, + const char *pat, int new_ifindex, + struct netlink_ext_ack *extack) { struct netdev_name_node *name_node; struct net *net_old = dev_net(dev); @@ -11999,12 +12070,16 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net, /* Don't allow namespace local devices to be moved. */ err = -EINVAL; - if (dev->netns_local) + if (dev->netns_immutable) { + NL_SET_ERR_MSG(extack, "The interface netns is immutable"); goto out; + } /* Ensure the device has been registered */ - if (dev->reg_state != NETREG_REGISTERED) + if (dev->reg_state != NETREG_REGISTERED) { + NL_SET_ERR_MSG(extack, "The interface isn't registered"); goto out; + } /* Get out if there is nothing todo */ err = 0; @@ -12017,30 +12092,49 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net, err = -EEXIST; if (netdev_name_in_use(net, dev->name)) { /* We get here if we can't use the current device name */ - if (!pat) + if (!pat) { + NL_SET_ERR_MSG(extack, + "An interface with the same name exists in the target netns"); goto out; + } err = dev_prep_valid_name(net, dev, pat, new_name, EEXIST); - if (err < 0) + if (err < 0) { + NL_SET_ERR_MSG_FMT(extack, + "Unable to use '%s' for the new interface name in the target netns", + pat); goto out; + } } /* Check that none of the altnames conflicts. */ err = -EEXIST; - netdev_for_each_altname(dev, name_node) - if (netdev_name_in_use(net, name_node->name)) + netdev_for_each_altname(dev, name_node) { + if (netdev_name_in_use(net, name_node->name)) { + NL_SET_ERR_MSG_FMT(extack, + "An interface with the altname %s exists in the target netns", + name_node->name); goto out; + } + } /* Check that new_ifindex isn't used yet. */ if (new_ifindex) { err = dev_index_reserve(net, new_ifindex); - if (err < 0) + if (err < 0) { + NL_SET_ERR_MSG_FMT(extack, + "The ifindex %d is not available in the target netns", + new_ifindex); goto out; + } } else { /* If there is an ifindex conflict assign a new one */ err = dev_index_reserve(net, dev->ifindex); if (err == -EBUSY) err = dev_index_reserve(net, 0); - if (err < 0) + if (err < 0) { + NL_SET_ERR_MSG(extack, + "Unable to allocate a new ifindex in the target netns"); goto out; + } new_ifindex = err; } @@ -12049,7 +12143,7 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net, */ /* If device is running close it first. */ - dev_close(dev); + netif_close(dev); /* And unlink it from device chain */ unlist_netdevice(dev); @@ -12131,7 +12225,6 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net, out: return err; } -EXPORT_SYMBOL_GPL(__dev_change_net_namespace); static int dev_cpu_dead(unsigned int oldcpu) { @@ -12246,7 +12339,7 @@ static struct hlist_head * __net_init netdev_create_hash(void) static int __net_init netdev_init(struct net *net) { BUILD_BUG_ON(GRO_HASH_BUCKETS > - 8 * sizeof_field(struct napi_struct, gro_bitmask)); + BITS_PER_BYTE * sizeof_field(struct gro_node, bitmask)); INIT_LIST_HEAD(&net->dev_base_head); @@ -12381,7 +12474,7 @@ static void __net_exit default_device_exit_net(struct net *net) char fb_name[IFNAMSIZ]; /* Ignore unmoveable devices (i.e. loopback) */ - if (dev->netns_local) + if (dev->netns_immutable) continue; /* Leave virtual devices for the generic cleanup */ @@ -12611,7 +12704,7 @@ static int __init net_dev_init(void) INIT_CSD(&sd->defer_csd, trigger_rx_softirq, sd); spin_lock_init(&sd->defer_lock); - init_gro_hash(&sd->backlog); + gro_init(&sd->backlog.gro); sd->backlog.poll = process_backlog; sd->backlog.weight = weight_p; INIT_LIST_HEAD(&sd->backlog.poll_list); diff --git a/net/core/dev.h b/net/core/dev.h index a5b166bbd169..7ee203395d8e 100644 --- a/net/core/dev.h +++ b/net/core/dev.h @@ -6,6 +6,7 @@ #include <linux/types.h> #include <linux/rwsem.h> #include <linux/netdevice.h> +#include <net/netdev_lock.h> struct net; struct netlink_ext_ack; @@ -85,6 +86,7 @@ struct netdev_name_node { }; int netdev_get_name(struct net *net, char *name, int ifindex); +int netif_change_name(struct net_device *dev, const char *newname); int dev_change_name(struct net_device *dev, const char *newname); #define netdev_for_each_altname(dev, namenode) \ @@ -98,24 +100,28 @@ int netdev_name_node_alt_destroy(struct net_device *dev, const char *name); int dev_validate_mtu(struct net_device *dev, int mtu, struct netlink_ext_ack *extack); -int dev_set_mtu_ext(struct net_device *dev, int mtu, - struct netlink_ext_ack *extack); +int netif_set_mtu_ext(struct net_device *dev, int new_mtu, + struct netlink_ext_ack *extack); int dev_get_phys_port_id(struct net_device *dev, struct netdev_phys_item_id *ppid); int dev_get_phys_port_name(struct net_device *dev, char *name, size_t len); +int netif_change_proto_down(struct net_device *dev, bool proto_down); int dev_change_proto_down(struct net_device *dev, bool proto_down); -void dev_change_proto_down_reason(struct net_device *dev, unsigned long mask, - u32 value); +void netdev_change_proto_down_reason_locked(struct net_device *dev, + unsigned long mask, u32 value); typedef int (*bpf_op_t)(struct net_device *dev, struct netdev_bpf *bpf); int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack, int fd, int expected_fd, u32 flags); +int netif_change_tx_queue_len(struct net_device *dev, unsigned long new_len); int dev_change_tx_queue_len(struct net_device *dev, unsigned long new_len); +void netif_set_group(struct net_device *dev, int new_group); void dev_set_group(struct net_device *dev, int new_group); +int netif_change_carrier(struct net_device *dev, bool new_carrier); int dev_change_carrier(struct net_device *dev, bool new_carrier); void __dev_set_rx_mode(struct net_device *dev); @@ -134,9 +140,11 @@ static inline void netif_set_up(struct net_device *dev, bool value) else dev->flags &= ~IFF_UP; - netdev_lock(dev); + if (!netdev_need_ops_lock(dev)) + netdev_lock(dev); dev->up = value; - netdev_unlock(dev); + if (!netdev_need_ops_lock(dev)) + netdev_unlock(dev); } static inline void netif_set_gso_max_size(struct net_device *dev, @@ -299,6 +307,18 @@ void xdp_do_check_flushed(struct napi_struct *napi); static inline void xdp_do_check_flushed(struct napi_struct *napi) { } #endif +/* Best effort check that NAPI is not idle (can't be scheduled to run) */ +static inline void napi_assert_will_not_race(const struct napi_struct *napi) +{ + /* uninitialized instance, can't race */ + if (!napi->poll_list.next) + return; + + /* SCHED bit is set on disabled instances */ + WARN_ON(!test_bit(NAPI_STATE_SCHED, &napi->state)); + WARN_ON(READ_ONCE(napi->list_owner) != -1); +} + void kick_defer_list_purge(struct softnet_data *sd, unsigned int cpu); #define XMIT_RECURSION_LIMIT 8 diff --git a/net/core/dev_api.c b/net/core/dev_api.c new file mode 100644 index 000000000000..8dbc60612100 --- /dev/null +++ b/net/core/dev_api.c @@ -0,0 +1,335 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#include <linux/netdevice.h> +#include <net/netdev_lock.h> + +#include "dev.h" + +/** + * dev_change_name() - change name of a device + * @dev: device + * @newname: name (or format string) must be at least IFNAMSIZ + * + * Change name of a device, can pass format strings "eth%d". + * for wildcarding. + * + * Return: 0 on success, -errno on failure. + */ +int dev_change_name(struct net_device *dev, const char *newname) +{ + int ret; + + netdev_lock_ops(dev); + ret = netif_change_name(dev, newname); + netdev_unlock_ops(dev); + + return ret; +} + +/** + * dev_set_alias() - change ifalias of a device + * @dev: device + * @alias: name up to IFALIASZ + * @len: limit of bytes to copy from info + * + * Set ifalias for a device. + * + * Return: 0 on success, -errno on failure. + */ +int dev_set_alias(struct net_device *dev, const char *alias, size_t len) +{ + int ret; + + netdev_lock_ops(dev); + ret = netif_set_alias(dev, alias, len); + netdev_unlock_ops(dev); + + return ret; +} +EXPORT_SYMBOL(dev_set_alias); + +/** + * dev_change_flags() - change device settings + * @dev: device + * @flags: device state flags + * @extack: netlink extended ack + * + * Change settings on device based state flags. The flags are + * in the userspace exported format. + * + * Return: 0 on success, -errno on failure. + */ +int dev_change_flags(struct net_device *dev, unsigned int flags, + struct netlink_ext_ack *extack) +{ + int ret; + + netdev_lock_ops(dev); + ret = netif_change_flags(dev, flags, extack); + netdev_unlock_ops(dev); + + return ret; +} +EXPORT_SYMBOL(dev_change_flags); + +/** + * dev_set_group() - change group this device belongs to + * @dev: device + * @new_group: group this device should belong to + */ +void dev_set_group(struct net_device *dev, int new_group) +{ + netdev_lock_ops(dev); + netif_set_group(dev, new_group); + netdev_unlock_ops(dev); +} + +int dev_set_mac_address_user(struct net_device *dev, struct sockaddr *sa, + struct netlink_ext_ack *extack) +{ + int ret; + + down_write(&dev_addr_sem); + netdev_lock_ops(dev); + ret = netif_set_mac_address(dev, sa, extack); + netdev_unlock_ops(dev); + up_write(&dev_addr_sem); + + return ret; +} +EXPORT_SYMBOL(dev_set_mac_address_user); + +/** + * dev_change_net_namespace() - move device to different nethost namespace + * @dev: device + * @net: network namespace + * @pat: If not NULL name pattern to try if the current device name + * is already taken in the destination network namespace. + * + * This function shuts down a device interface and moves it + * to a new network namespace. On success 0 is returned, on + * a failure a netagive errno code is returned. + * + * Callers must hold the rtnl semaphore. + * + * Return: 0 on success, -errno on failure. + */ +int dev_change_net_namespace(struct net_device *dev, struct net *net, + const char *pat) +{ + int ret; + + netdev_lock_ops(dev); + ret = netif_change_net_namespace(dev, net, pat, 0, NULL); + netdev_unlock_ops(dev); + + return ret; +} +EXPORT_SYMBOL_GPL(dev_change_net_namespace); + +/** + * dev_change_carrier() - change device carrier + * @dev: device + * @new_carrier: new value + * + * Change device carrier + * + * Return: 0 on success, -errno on failure. + */ +int dev_change_carrier(struct net_device *dev, bool new_carrier) +{ + int ret; + + netdev_lock_ops(dev); + ret = netif_change_carrier(dev, new_carrier); + netdev_unlock_ops(dev); + + return ret; +} + +/** + * dev_change_tx_queue_len() - change TX queue length of a netdevice + * @dev: device + * @new_len: new tx queue length + * + * Return: 0 on success, -errno on failure. + */ +int dev_change_tx_queue_len(struct net_device *dev, unsigned long new_len) +{ + int ret; + + netdev_lock_ops(dev); + ret = netif_change_tx_queue_len(dev, new_len); + netdev_unlock_ops(dev); + + return ret; +} + +/** + * dev_change_proto_down() - set carrier according to proto_down + * @dev: device + * @proto_down: new value + * + * Return: 0 on success, -errno on failure. + */ +int dev_change_proto_down(struct net_device *dev, bool proto_down) +{ + int ret; + + netdev_lock_ops(dev); + ret = netif_change_proto_down(dev, proto_down); + netdev_unlock_ops(dev); + + return ret; +} + +/** + * dev_open() - prepare an interface for use + * @dev: device to open + * @extack: netlink extended ack + * + * Takes a device from down to up state. The device's private open + * function is invoked and then the multicast lists are loaded. Finally + * the device is moved into the up state and a %NETDEV_UP message is + * sent to the netdev notifier chain. + * + * Calling this function on an active interface is a nop. On a failure + * a negative errno code is returned. + * + * Return: 0 on success, -errno on failure. + */ +int dev_open(struct net_device *dev, struct netlink_ext_ack *extack) +{ + int ret; + + netdev_lock_ops(dev); + ret = netif_open(dev, extack); + netdev_unlock_ops(dev); + + return ret; +} +EXPORT_SYMBOL(dev_open); + +/** + * dev_close() - shutdown an interface + * @dev: device to shutdown + * + * This function moves an active device into down state. A + * %NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device + * is then deactivated and finally a %NETDEV_DOWN is sent to the notifier + * chain. + */ +void dev_close(struct net_device *dev) +{ + netdev_lock_ops(dev); + netif_close(dev); + netdev_unlock_ops(dev); +} +EXPORT_SYMBOL(dev_close); + +int dev_eth_ioctl(struct net_device *dev, + struct ifreq *ifr, unsigned int cmd) +{ + const struct net_device_ops *ops = dev->netdev_ops; + int ret = -ENODEV; + + if (!ops->ndo_eth_ioctl) + return -EOPNOTSUPP; + + netdev_lock_ops(dev); + if (netif_device_present(dev)) + ret = ops->ndo_eth_ioctl(dev, ifr, cmd); + netdev_unlock_ops(dev); + + return ret; +} +EXPORT_SYMBOL(dev_eth_ioctl); + +int dev_set_mtu(struct net_device *dev, int new_mtu) +{ + int ret; + + netdev_lock_ops(dev); + ret = netif_set_mtu(dev, new_mtu); + netdev_unlock_ops(dev); + + return ret; +} +EXPORT_SYMBOL(dev_set_mtu); + +/** + * dev_disable_lro() - disable Large Receive Offload on a device + * @dev: device + * + * Disable Large Receive Offload (LRO) on a net device. Must be + * called under RTNL. This is needed if received packets may be + * forwarded to another interface. + */ +void dev_disable_lro(struct net_device *dev) +{ + netdev_lock_ops(dev); + netif_disable_lro(dev); + netdev_unlock_ops(dev); +} +EXPORT_SYMBOL(dev_disable_lro); + +/** + * dev_set_allmulti() - update allmulti count on a device + * @dev: device + * @inc: modifier + * + * Add or remove reception of all multicast frames to a device. While the + * count in the device remains above zero the interface remains listening + * to all interfaces. Once it hits zero the device reverts back to normal + * filtering operation. A negative @inc value is used to drop the counter + * when releasing a resource needing all multicasts. + * + * Return: 0 on success, -errno on failure. + */ + +int dev_set_allmulti(struct net_device *dev, int inc) +{ + int ret; + + netdev_lock_ops(dev); + ret = netif_set_allmulti(dev, inc, true); + netdev_unlock_ops(dev); + + return ret; +} +EXPORT_SYMBOL(dev_set_allmulti); + +/** + * dev_set_mac_address() - change Media Access Control Address + * @dev: device + * @sa: new address + * @extack: netlink extended ack + * + * Change the hardware (MAC) address of the device + * + * Return: 0 on success, -errno on failure. + */ +int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa, + struct netlink_ext_ack *extack) +{ + int ret; + + netdev_lock_ops(dev); + ret = netif_set_mac_address(dev, sa, extack); + netdev_unlock_ops(dev); + + return ret; +} +EXPORT_SYMBOL(dev_set_mac_address); + +int dev_xdp_propagate(struct net_device *dev, struct netdev_bpf *bpf) +{ + int ret; + + netdev_lock_ops(dev); + ret = netif_xdp_propagate(dev, bpf); + netdev_unlock_ops(dev); + + return ret; +} +EXPORT_SYMBOL_GPL(dev_xdp_propagate); diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c index 4c2098ac9d72..fff13a8b48f1 100644 --- a/net/core/dev_ioctl.c +++ b/net/core/dev_ioctl.c @@ -10,6 +10,7 @@ #include <linux/wireless.h> #include <linux/if_bridge.h> #include <net/dsa_stubs.h> +#include <net/netdev_lock.h> #include <net/wext.h> #include "dev.h" @@ -110,7 +111,7 @@ static int dev_getifmap(struct net_device *dev, struct ifreq *ifr) return 0; } -static int dev_setifmap(struct net_device *dev, struct ifreq *ifr) +static int netif_setifmap(struct net_device *dev, struct ifreq *ifr) { struct compat_ifmap *cifmap = (struct compat_ifmap *)&ifr->ifr_map; @@ -240,20 +241,6 @@ int net_hwtstamp_validate(const struct kernel_hwtstamp_config *cfg) return 0; } -static int dev_eth_ioctl(struct net_device *dev, - struct ifreq *ifr, unsigned int cmd) -{ - const struct net_device_ops *ops = dev->netdev_ops; - - if (!ops->ndo_eth_ioctl) - return -EOPNOTSUPP; - - if (!netif_device_present(dev)) - return -ENODEV; - - return ops->ndo_eth_ioctl(dev, ifr, cmd); -} - /** * dev_get_hwtstamp_phylib() - Get hardware timestamping settings of NIC * or of attached phylib PHY @@ -305,7 +292,9 @@ static int dev_get_hwtstamp(struct net_device *dev, struct ifreq *ifr) return -ENODEV; kernel_cfg.ifr = ifr; + netdev_lock_ops(dev); err = dev_get_hwtstamp_phylib(dev, &kernel_cfg); + netdev_unlock_ops(dev); if (err) return err; @@ -429,7 +418,9 @@ static int dev_set_hwtstamp(struct net_device *dev, struct ifreq *ifr) if (!netif_device_present(dev)) return -ENODEV; + netdev_lock_ops(dev); err = dev_set_hwtstamp_phylib(dev, &kernel_cfg, &extack); + netdev_unlock_ops(dev); if (err) return err; @@ -504,10 +495,14 @@ static int dev_siocbond(struct net_device *dev, const struct net_device_ops *ops = dev->netdev_ops; if (ops->ndo_siocbond) { + int ret = -ENODEV; + + netdev_lock_ops(dev); if (netif_device_present(dev)) - return ops->ndo_siocbond(dev, ifr, cmd); - else - return -ENODEV; + ret = ops->ndo_siocbond(dev, ifr, cmd); + netdev_unlock_ops(dev); + + return ret; } return -EOPNOTSUPP; @@ -519,10 +514,14 @@ static int dev_siocdevprivate(struct net_device *dev, struct ifreq *ifr, const struct net_device_ops *ops = dev->netdev_ops; if (ops->ndo_siocdevprivate) { + int ret = -ENODEV; + + netdev_lock_ops(dev); if (netif_device_present(dev)) - return ops->ndo_siocdevprivate(dev, ifr, data, cmd); - else - return -ENODEV; + ret = ops->ndo_siocdevprivate(dev, ifr, data, cmd); + netdev_unlock_ops(dev); + + return ret; } return -EOPNOTSUPP; @@ -533,10 +532,14 @@ static int dev_siocwandev(struct net_device *dev, struct if_settings *ifs) const struct net_device_ops *ops = dev->netdev_ops; if (ops->ndo_siocwandev) { + int ret = -ENODEV; + + netdev_lock_ops(dev); if (netif_device_present(dev)) - return ops->ndo_siocwandev(dev, ifs); - else - return -ENODEV; + ret = ops->ndo_siocwandev(dev, ifs); + netdev_unlock_ops(dev); + + return ret; } return -EOPNOTSUPP; @@ -551,7 +554,6 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, void __user *data, int err; struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name); const struct net_device_ops *ops; - netdevice_tracker dev_tracker; if (!dev) return -ENODEV; @@ -580,11 +582,16 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, void __user *data, memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data, min(sizeof(ifr->ifr_hwaddr.sa_data_min), (size_t)dev->addr_len)); + netdev_lock_ops(dev); call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); + netdev_unlock_ops(dev); return 0; case SIOCSIFMAP: - return dev_setifmap(dev, ifr); + netdev_lock_ops(dev); + err = netif_setifmap(dev, ifr); + netdev_unlock_ops(dev); + return err; case SIOCADDMULTI: if (!ops->ndo_set_rx_mode || @@ -592,7 +599,10 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, void __user *data, return -EINVAL; if (!netif_device_present(dev)) return -ENODEV; - return dev_mc_add_global(dev, ifr->ifr_hwaddr.sa_data); + netdev_lock_ops(dev); + err = dev_mc_add_global(dev, ifr->ifr_hwaddr.sa_data); + netdev_unlock_ops(dev); + return err; case SIOCDELMULTI: if (!ops->ndo_set_rx_mode || @@ -600,7 +610,10 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, void __user *data, return -EINVAL; if (!netif_device_present(dev)) return -ENODEV; - return dev_mc_del_global(dev, ifr->ifr_hwaddr.sa_data); + netdev_lock_ops(dev); + err = dev_mc_del_global(dev, ifr->ifr_hwaddr.sa_data); + netdev_unlock_ops(dev); + return err; case SIOCSIFTXQLEN: if (ifr->ifr_qlen < 0) @@ -614,22 +627,6 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, void __user *data, case SIOCWANDEV: return dev_siocwandev(dev, &ifr->ifr_settings); - case SIOCBRADDIF: - case SIOCBRDELIF: - if (!netif_device_present(dev)) - return -ENODEV; - if (!netif_is_bridge_master(dev)) - return -EOPNOTSUPP; - - netdev_hold(dev, &dev_tracker, GFP_KERNEL); - rtnl_net_unlock(net); - - err = br_ioctl_call(net, netdev_priv(dev), cmd, ifr, NULL); - - netdev_put(dev, &dev_tracker); - rtnl_net_lock(net); - return err; - case SIOCDEVPRIVATE ... SIOCDEVPRIVATE + 15: return dev_siocdevprivate(dev, ifr, data, cmd); @@ -812,8 +809,6 @@ int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, case SIOCBONDRELEASE: case SIOCBONDSETHWADDR: case SIOCBONDCHANGEACTIVE: - case SIOCBRADDIF: - case SIOCBRDELIF: case SIOCSHWTSTAMP: if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) return -EPERM; diff --git a/net/core/devmem.c b/net/core/devmem.c index 0e5a2c672efd..ee145a2aa41c 100644 --- a/net/core/devmem.c +++ b/net/core/devmem.c @@ -16,6 +16,7 @@ #include <net/netdev_queues.h> #include <net/netdev_rx_queue.h> #include <net/page_pool/helpers.h> +#include <net/page_pool/memory_provider.h> #include <trace/events/page_pool.h> #include "devmem.h" @@ -24,23 +25,30 @@ /* Device memory support */ -/* Protected by rtnl_lock() */ static DEFINE_XARRAY_FLAGS(net_devmem_dmabuf_bindings, XA_FLAGS_ALLOC1); +static const struct memory_provider_ops dmabuf_devmem_ops; + +bool net_is_devmem_iov(struct net_iov *niov) +{ + return niov->pp->mp_ops == &dmabuf_devmem_ops; +} + static void net_devmem_dmabuf_free_chunk_owner(struct gen_pool *genpool, struct gen_pool_chunk *chunk, void *not_used) { struct dmabuf_genpool_chunk_owner *owner = chunk->owner; - kvfree(owner->niovs); + kvfree(owner->area.niovs); kfree(owner); } static dma_addr_t net_devmem_get_dma_addr(const struct net_iov *niov) { - struct dmabuf_genpool_chunk_owner *owner = net_iov_owner(niov); + struct dmabuf_genpool_chunk_owner *owner; + owner = net_devmem_iov_to_chunk_owner(niov); return owner->base_dma_addr + ((dma_addr_t)net_iov_idx(niov) << PAGE_SHIFT); } @@ -83,7 +91,7 @@ net_devmem_alloc_dmabuf(struct net_devmem_dmabuf_binding *binding) offset = dma_addr - owner->base_dma_addr; index = offset / PAGE_SIZE; - niov = &owner->niovs[index]; + niov = &owner->area.niovs[index]; niov->pp_magic = 0; niov->pp = NULL; @@ -94,7 +102,7 @@ net_devmem_alloc_dmabuf(struct net_devmem_dmabuf_binding *binding) void net_devmem_free_dmabuf(struct net_iov *niov) { - struct net_devmem_dmabuf_binding *binding = net_iov_binding(niov); + struct net_devmem_dmabuf_binding *binding = net_devmem_iov_binding(niov); unsigned long dma_addr = net_devmem_get_dma_addr(niov); if (WARN_ON(!gen_pool_has_addr(binding->chunk_pool, dma_addr, @@ -118,6 +126,7 @@ void net_devmem_unbind_dmabuf(struct net_devmem_dmabuf_binding *binding) WARN_ON(rxq->mp_params.mp_priv != binding); rxq->mp_params.mp_priv = NULL; + rxq->mp_params.mp_ops = NULL; rxq_idx = get_netdev_rx_queue_index(rxq); @@ -154,7 +163,7 @@ int net_devmem_bind_dmabuf_to_queue(struct net_device *dev, u32 rxq_idx, } rxq = __netif_get_rx_queue(dev, rxq_idx); - if (rxq->mp_params.mp_priv) { + if (rxq->mp_params.mp_ops) { NL_SET_ERR_MSG(extack, "designated queue already memory provider bound"); return -EEXIST; } @@ -172,6 +181,7 @@ int net_devmem_bind_dmabuf_to_queue(struct net_device *dev, u32 rxq_idx, return err; rxq->mp_params.mp_priv = binding; + rxq->mp_params.mp_ops = &dmabuf_devmem_ops; err = netdev_rx_queue_restart(dev, rxq_idx); if (err) @@ -181,6 +191,7 @@ int net_devmem_bind_dmabuf_to_queue(struct net_device *dev, u32 rxq_idx, err_xa_erase: rxq->mp_params.mp_priv = NULL; + rxq->mp_params.mp_ops = NULL; xa_erase(&binding->bound_rxqs, xa_idx); return err; @@ -263,9 +274,9 @@ net_devmem_bind_dmabuf(struct net_device *dev, unsigned int dmabuf_fd, goto err_free_chunks; } - owner->base_virtual = virtual; + owner->area.base_virtual = virtual; owner->base_dma_addr = dma_addr; - owner->num_niovs = len / PAGE_SIZE; + owner->area.num_niovs = len / PAGE_SIZE; owner->binding = binding; err = gen_pool_add_owner(binding->chunk_pool, dma_addr, @@ -277,17 +288,17 @@ net_devmem_bind_dmabuf(struct net_device *dev, unsigned int dmabuf_fd, goto err_free_chunks; } - owner->niovs = kvmalloc_array(owner->num_niovs, - sizeof(*owner->niovs), - GFP_KERNEL); - if (!owner->niovs) { + owner->area.niovs = kvmalloc_array(owner->area.num_niovs, + sizeof(*owner->area.niovs), + GFP_KERNEL); + if (!owner->area.niovs) { err = -ENOMEM; goto err_free_chunks; } - for (i = 0; i < owner->num_niovs; i++) { - niov = &owner->niovs[i]; - niov->owner = owner; + for (i = 0; i < owner->area.num_niovs; i++) { + niov = &owner->area.niovs[i]; + niov->owner = &owner->area; page_pool_set_dma_addr_netmem(net_iov_to_netmem(niov), net_devmem_get_dma_addr(niov)); } @@ -315,26 +326,6 @@ err_put_dmabuf: return ERR_PTR(err); } -void dev_dmabuf_uninstall(struct net_device *dev) -{ - struct net_devmem_dmabuf_binding *binding; - struct netdev_rx_queue *rxq; - unsigned long xa_idx; - unsigned int i; - - for (i = 0; i < dev->real_num_rx_queues; i++) { - binding = dev->_rx[i].mp_params.mp_priv; - if (!binding) - continue; - - xa_for_each(&binding->bound_rxqs, xa_idx, rxq) - if (rxq == &dev->_rx[i]) { - xa_erase(&binding->bound_rxqs, xa_idx); - break; - } - } -} - /*** "Dmabuf devmem memory provider" ***/ int mp_dmabuf_devmem_init(struct page_pool *pool) @@ -400,3 +391,36 @@ bool mp_dmabuf_devmem_release_page(struct page_pool *pool, netmem_ref netmem) /* We don't want the page pool put_page()ing our net_iovs. */ return false; } + +static int mp_dmabuf_devmem_nl_fill(void *mp_priv, struct sk_buff *rsp, + struct netdev_rx_queue *rxq) +{ + const struct net_devmem_dmabuf_binding *binding = mp_priv; + int type = rxq ? NETDEV_A_QUEUE_DMABUF : NETDEV_A_PAGE_POOL_DMABUF; + + return nla_put_u32(rsp, type, binding->id); +} + +static void mp_dmabuf_devmem_uninstall(void *mp_priv, + struct netdev_rx_queue *rxq) +{ + struct net_devmem_dmabuf_binding *binding = mp_priv; + struct netdev_rx_queue *bound_rxq; + unsigned long xa_idx; + + xa_for_each(&binding->bound_rxqs, xa_idx, bound_rxq) { + if (bound_rxq == rxq) { + xa_erase(&binding->bound_rxqs, xa_idx); + break; + } + } +} + +static const struct memory_provider_ops dmabuf_devmem_ops = { + .init = mp_dmabuf_devmem_init, + .destroy = mp_dmabuf_devmem_destroy, + .alloc_netmems = mp_dmabuf_devmem_alloc_netmems, + .release_netmem = mp_dmabuf_devmem_release_page, + .nl_fill = mp_dmabuf_devmem_nl_fill, + .uninstall = mp_dmabuf_devmem_uninstall, +}; diff --git a/net/core/devmem.h b/net/core/devmem.h index 76099ef9c482..7fc158d52729 100644 --- a/net/core/devmem.h +++ b/net/core/devmem.h @@ -10,6 +10,8 @@ #ifndef _NET_DEVMEM_H #define _NET_DEVMEM_H +#include <net/netmem.h> + struct netlink_ext_ack; struct net_devmem_dmabuf_binding { @@ -51,17 +53,11 @@ struct net_devmem_dmabuf_binding { * allocations from this chunk. */ struct dmabuf_genpool_chunk_owner { - /* Offset into the dma-buf where this chunk starts. */ - unsigned long base_virtual; + struct net_iov_area area; + struct net_devmem_dmabuf_binding *binding; /* dma_addr of the start of the chunk. */ dma_addr_t base_dma_addr; - - /* Array of net_iovs for this chunk. */ - struct net_iov *niovs; - size_t num_niovs; - - struct net_devmem_dmabuf_binding *binding; }; void __net_devmem_dmabuf_binding_free(struct net_devmem_dmabuf_binding *binding); @@ -72,38 +68,34 @@ void net_devmem_unbind_dmabuf(struct net_devmem_dmabuf_binding *binding); int net_devmem_bind_dmabuf_to_queue(struct net_device *dev, u32 rxq_idx, struct net_devmem_dmabuf_binding *binding, struct netlink_ext_ack *extack); -void dev_dmabuf_uninstall(struct net_device *dev); static inline struct dmabuf_genpool_chunk_owner * -net_iov_owner(const struct net_iov *niov) +net_devmem_iov_to_chunk_owner(const struct net_iov *niov) { - return niov->owner; + struct net_iov_area *owner = net_iov_owner(niov); + + return container_of(owner, struct dmabuf_genpool_chunk_owner, area); } -static inline unsigned int net_iov_idx(const struct net_iov *niov) +static inline struct net_devmem_dmabuf_binding * +net_devmem_iov_binding(const struct net_iov *niov) { - return niov - net_iov_owner(niov)->niovs; + return net_devmem_iov_to_chunk_owner(niov)->binding; } -static inline struct net_devmem_dmabuf_binding * -net_iov_binding(const struct net_iov *niov) +static inline u32 net_devmem_iov_binding_id(const struct net_iov *niov) { - return net_iov_owner(niov)->binding; + return net_devmem_iov_binding(niov)->id; } static inline unsigned long net_iov_virtual_addr(const struct net_iov *niov) { - struct dmabuf_genpool_chunk_owner *owner = net_iov_owner(niov); + struct net_iov_area *owner = net_iov_owner(niov); return owner->base_virtual + ((unsigned long)net_iov_idx(niov) << PAGE_SHIFT); } -static inline u32 net_iov_binding_id(const struct net_iov *niov) -{ - return net_iov_owner(niov)->binding->id; -} - static inline void net_devmem_dmabuf_binding_get(struct net_devmem_dmabuf_binding *binding) { @@ -123,6 +115,8 @@ struct net_iov * net_devmem_alloc_dmabuf(struct net_devmem_dmabuf_binding *binding); void net_devmem_free_dmabuf(struct net_iov *ppiov); +bool net_is_devmem_iov(struct net_iov *niov); + #else struct net_devmem_dmabuf_binding; @@ -152,10 +146,6 @@ net_devmem_bind_dmabuf_to_queue(struct net_device *dev, u32 rxq_idx, return -EOPNOTSUPP; } -static inline void dev_dmabuf_uninstall(struct net_device *dev) -{ -} - static inline struct net_iov * net_devmem_alloc_dmabuf(struct net_devmem_dmabuf_binding *binding) { @@ -171,10 +161,15 @@ static inline unsigned long net_iov_virtual_addr(const struct net_iov *niov) return 0; } -static inline u32 net_iov_binding_id(const struct net_iov *niov) +static inline u32 net_devmem_iov_binding_id(const struct net_iov *niov) { return 0; } + +static inline bool net_is_devmem_iov(struct net_iov *niov) +{ + return false; +} #endif #endif /* _NET_DEVMEM_H */ diff --git a/net/core/dst.c b/net/core/dst.c index 9552a90d4772..c99b95cf9cbb 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -286,7 +286,8 @@ struct metadata_dst *metadata_dst_alloc(u8 optslen, enum metadata_type type, { struct metadata_dst *md_dst; - md_dst = kmalloc(sizeof(*md_dst) + optslen, flags); + md_dst = kmalloc(struct_size(md_dst, u.tun_info.options, optslen), + flags); if (!md_dst) return NULL; @@ -314,7 +315,8 @@ metadata_dst_alloc_percpu(u8 optslen, enum metadata_type type, gfp_t flags) int cpu; struct metadata_dst __percpu *md_dst; - md_dst = __alloc_percpu_gfp(sizeof(struct metadata_dst) + optslen, + md_dst = __alloc_percpu_gfp(struct_size(md_dst, u.tun_info.options, + optslen), __alignof__(struct metadata_dst), flags); if (!md_dst) return NULL; diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 94a7872ab231..4bc64d912a1c 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -373,7 +373,8 @@ static int call_fib_rule_notifiers(struct net *net, .rule = rule, }; - ASSERT_RTNL(); + ASSERT_RTNL_NET(net); + /* Paired with READ_ONCE() in fib_rules_seq() */ WRITE_ONCE(ops->fib_rules_seq, ops->fib_rules_seq + 1); return call_fib_notifiers(net, event_type, &info.info); @@ -461,9 +462,6 @@ static struct fib_rule *rule_find(struct fib_rules_ops *ops, if (rule->tun_id && r->tun_id != rule->tun_id) continue; - if (r->fr_net != rule->fr_net) - continue; - if (rule->l3mdev && r->l3mdev != rule->l3mdev) continue; @@ -483,11 +481,17 @@ static struct fib_rule *rule_find(struct fib_rules_ops *ops, &rule->sport_range)) continue; + if (rule->sport_mask && r->sport_mask != rule->sport_mask) + continue; + if (fib_rule_port_range_set(&rule->dport_range) && !fib_rule_port_range_compare(&r->dport_range, &rule->dport_range)) continue; + if (rule->dport_mask && r->dport_mask != rule->dport_mask) + continue; + if (!ops->compare(r, frh, tb)) continue; return r; @@ -517,14 +521,40 @@ static int fib_nl2rule_l3mdev(struct nlattr *nla, struct fib_rule *nlrule, } #endif -static int fib_nl2rule(struct sk_buff *skb, struct nlmsghdr *nlh, +static int fib_nl2rule_port_mask(const struct nlattr *mask_attr, + const struct fib_rule_port_range *range, + u16 *port_mask, + struct netlink_ext_ack *extack) +{ + if (!fib_rule_port_range_valid(range)) { + NL_SET_ERR_MSG_ATTR(extack, mask_attr, + "Cannot specify port mask without port value"); + return -EINVAL; + } + + if (fib_rule_port_is_range(range)) { + NL_SET_ERR_MSG_ATTR(extack, mask_attr, + "Cannot specify port mask for port range"); + return -EINVAL; + } + + if (range->start & ~nla_get_u16(mask_attr)) { + NL_SET_ERR_MSG_ATTR(extack, mask_attr, "Invalid port mask"); + return -EINVAL; + } + + *port_mask = nla_get_u16(mask_attr); + + return 0; +} + +static int fib_nl2rule(struct net *net, struct nlmsghdr *nlh, struct netlink_ext_ack *extack, struct fib_rules_ops *ops, struct nlattr *tb[], struct fib_rule **rule, bool *user_priority) { - struct net *net = sock_net(skb->sk); struct fib_rule_hdr *frh = nlmsg_data(nlh); struct fib_rule *nlrule = NULL; int err = -EINVAL; @@ -556,30 +586,18 @@ static int fib_nl2rule(struct sk_buff *skb, struct nlmsghdr *nlh, if (tb[FRA_PRIORITY]) { nlrule->pref = nla_get_u32(tb[FRA_PRIORITY]); *user_priority = true; - } else { - nlrule->pref = fib_default_rule_pref(ops); } nlrule->proto = nla_get_u8_default(tb[FRA_PROTOCOL], RTPROT_UNSPEC); if (tb[FRA_IIFNAME]) { - struct net_device *dev; - nlrule->iifindex = -1; nla_strscpy(nlrule->iifname, tb[FRA_IIFNAME], IFNAMSIZ); - dev = __dev_get_by_name(net, nlrule->iifname); - if (dev) - nlrule->iifindex = dev->ifindex; } if (tb[FRA_OIFNAME]) { - struct net_device *dev; - nlrule->oifindex = -1; nla_strscpy(nlrule->oifname, tb[FRA_OIFNAME], IFNAMSIZ); - dev = __dev_get_by_name(net, nlrule->oifname); - if (dev) - nlrule->oifindex = dev->ifindex; } if (tb[FRA_FWMARK]) { @@ -621,11 +639,6 @@ static int fib_nl2rule(struct sk_buff *skb, struct nlmsghdr *nlh, } nlrule->target = nla_get_u32(tb[FRA_GOTO]); - /* Backward jumps are prohibited to avoid endless loops */ - if (nlrule->target <= nlrule->pref) { - NL_SET_ERR_MSG(extack, "Backward goto not supported"); - goto errout_free; - } } else if (nlrule->action == FR_ACT_GOTO) { NL_SET_ERR_MSG(extack, "Missing goto target for action goto"); goto errout_free; @@ -664,6 +677,16 @@ static int fib_nl2rule(struct sk_buff *skb, struct nlmsghdr *nlh, NL_SET_ERR_MSG(extack, "Invalid sport range"); goto errout_free; } + if (!fib_rule_port_is_range(&nlrule->sport_range)) + nlrule->sport_mask = U16_MAX; + } + + if (tb[FRA_SPORT_MASK]) { + err = fib_nl2rule_port_mask(tb[FRA_SPORT_MASK], + &nlrule->sport_range, + &nlrule->sport_mask, extack); + if (err) + goto errout_free; } if (tb[FRA_DPORT_RANGE]) { @@ -673,6 +696,16 @@ static int fib_nl2rule(struct sk_buff *skb, struct nlmsghdr *nlh, NL_SET_ERR_MSG(extack, "Invalid dport range"); goto errout_free; } + if (!fib_rule_port_is_range(&nlrule->dport_range)) + nlrule->dport_mask = U16_MAX; + } + + if (tb[FRA_DPORT_MASK]) { + err = fib_nl2rule_port_mask(tb[FRA_DPORT_MASK], + &nlrule->dport_range, + &nlrule->dport_mask, extack); + if (err) + goto errout_free; } *rule = nlrule; @@ -685,6 +718,39 @@ errout: return err; } +static int fib_nl2rule_rtnl(struct fib_rule *nlrule, + struct fib_rules_ops *ops, + struct nlattr *tb[], + struct netlink_ext_ack *extack) +{ + if (!tb[FRA_PRIORITY]) + nlrule->pref = fib_default_rule_pref(ops); + + /* Backward jumps are prohibited to avoid endless loops */ + if (tb[FRA_GOTO] && nlrule->target <= nlrule->pref) { + NL_SET_ERR_MSG(extack, "Backward goto not supported"); + return -EINVAL; + } + + if (tb[FRA_IIFNAME]) { + struct net_device *dev; + + dev = __dev_get_by_name(nlrule->fr_net, nlrule->iifname); + if (dev) + nlrule->iifindex = dev->ifindex; + } + + if (tb[FRA_OIFNAME]) { + struct net_device *dev; + + dev = __dev_get_by_name(nlrule->fr_net, nlrule->oifname); + if (dev) + nlrule->oifindex = dev->ifindex; + } + + return 0; +} + static int rule_exists(struct fib_rules_ops *ops, struct fib_rule_hdr *frh, struct nlattr **tb, struct fib_rule *rule) { @@ -721,9 +787,6 @@ static int rule_exists(struct fib_rules_ops *ops, struct fib_rule_hdr *frh, if (r->tun_id != rule->tun_id) continue; - if (r->fr_net != rule->fr_net) - continue; - if (r->l3mdev != rule->l3mdev) continue; @@ -741,10 +804,16 @@ static int rule_exists(struct fib_rules_ops *ops, struct fib_rule_hdr *frh, &rule->sport_range)) continue; + if (r->sport_mask != rule->sport_mask) + continue; + if (!fib_rule_port_range_compare(&r->dport_range, &rule->dport_range)) continue; + if (r->dport_mask != rule->dport_mask) + continue; + if (!ops->compare(r, frh, tb)) continue; return 1; @@ -774,17 +843,19 @@ static const struct nla_policy fib_rule_policy[FRA_MAX + 1] = { [FRA_DSCP] = NLA_POLICY_MAX(NLA_U8, INET_DSCP_MASK >> 2), [FRA_FLOWLABEL] = { .type = NLA_BE32 }, [FRA_FLOWLABEL_MASK] = { .type = NLA_BE32 }, + [FRA_SPORT_MASK] = { .type = NLA_U16 }, + [FRA_DPORT_MASK] = { .type = NLA_U16 }, + [FRA_DSCP_MASK] = NLA_POLICY_MASK(NLA_U8, INET_DSCP_MASK >> 2), }; -int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh, - struct netlink_ext_ack *extack) +int fib_newrule(struct net *net, struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack, bool rtnl_held) { - struct net *net = sock_net(skb->sk); + struct fib_rule *rule = NULL, *r, *last = NULL; struct fib_rule_hdr *frh = nlmsg_data(nlh); + int err = -EINVAL, unresolved = 0; struct fib_rules_ops *ops = NULL; - struct fib_rule *rule = NULL, *r, *last = NULL; struct nlattr *tb[FRA_MAX + 1]; - int err = -EINVAL, unresolved = 0; bool user_priority = false; if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh))) { @@ -806,10 +877,17 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh, goto errout; } - err = fib_nl2rule(skb, nlh, extack, ops, tb, &rule, &user_priority); + err = fib_nl2rule(net, nlh, extack, ops, tb, &rule, &user_priority); if (err) goto errout; + if (!rtnl_held) + rtnl_net_lock(net); + + err = fib_nl2rule_rtnl(rule, ops, tb, extack); + if (err) + goto errout_free; + if ((nlh->nlmsg_flags & NLM_F_EXCL) && rule_exists(ops, frh, tb, rule)) { err = -EEXIST; @@ -871,29 +949,42 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh, if (rule->tun_id) ip_tunnel_need_metadata(); + fib_rule_get(rule); + + if (!rtnl_held) + rtnl_net_unlock(net); + notify_rule_change(RTM_NEWRULE, rule, ops, nlh, NETLINK_CB(skb).portid); + fib_rule_put(rule); flush_route_cache(ops); rules_ops_put(ops); return 0; errout_free: + if (!rtnl_held) + rtnl_net_unlock(net); kfree(rule); errout: rules_ops_put(ops); return err; } -EXPORT_SYMBOL_GPL(fib_nl_newrule); +EXPORT_SYMBOL_GPL(fib_newrule); -int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh, - struct netlink_ext_ack *extack) +static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) { - struct net *net = sock_net(skb->sk); + return fib_newrule(sock_net(skb->sk), skb, nlh, extack, false); +} + +int fib_delrule(struct net *net, struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack, bool rtnl_held) +{ + struct fib_rule *rule = NULL, *nlrule = NULL; struct fib_rule_hdr *frh = nlmsg_data(nlh); struct fib_rules_ops *ops = NULL; - struct fib_rule *rule = NULL, *r, *nlrule = NULL; struct nlattr *tb[FRA_MAX+1]; - int err = -EINVAL; bool user_priority = false; + int err = -EINVAL; if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh))) { NL_SET_ERR_MSG(extack, "Invalid msg length"); @@ -914,25 +1005,32 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh, goto errout; } - err = fib_nl2rule(skb, nlh, extack, ops, tb, &nlrule, &user_priority); + err = fib_nl2rule(net, nlh, extack, ops, tb, &nlrule, &user_priority); if (err) goto errout; + if (!rtnl_held) + rtnl_net_lock(net); + + err = fib_nl2rule_rtnl(nlrule, ops, tb, extack); + if (err) + goto errout_free; + rule = rule_find(ops, frh, tb, nlrule, user_priority); if (!rule) { err = -ENOENT; - goto errout; + goto errout_free; } if (rule->flags & FIB_RULE_PERMANENT) { err = -EPERM; - goto errout; + goto errout_free; } if (ops->delete) { err = ops->delete(rule); if (err) - goto errout; + goto errout_free; } if (rule->tun_id) @@ -954,7 +1052,7 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh, * current if it is goto rule, have actually been added. */ if (ops->nr_goto_rules > 0) { - struct fib_rule *n; + struct fib_rule *n, *r; n = list_next_entry(rule, list); if (&n->list == &ops->rules_list || n->pref != rule->pref) @@ -968,22 +1066,33 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh, } } - call_fib_rule_notifiers(net, FIB_EVENT_RULE_DEL, rule, ops, - NULL); - notify_rule_change(RTM_DELRULE, rule, ops, nlh, - NETLINK_CB(skb).portid); + call_fib_rule_notifiers(net, FIB_EVENT_RULE_DEL, rule, ops, NULL); + + if (!rtnl_held) + rtnl_net_unlock(net); + + notify_rule_change(RTM_DELRULE, rule, ops, nlh, NETLINK_CB(skb).portid); fib_rule_put(rule); flush_route_cache(ops); rules_ops_put(ops); kfree(nlrule); return 0; -errout: +errout_free: + if (!rtnl_held) + rtnl_net_unlock(net); kfree(nlrule); +errout: rules_ops_put(ops); return err; } -EXPORT_SYMBOL_GPL(fib_nl_delrule); +EXPORT_SYMBOL_GPL(fib_delrule); + +static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) +{ + return fib_delrule(sock_net(skb->sk), skb, nlh, extack, false); +} static inline size_t fib_rule_nlmsg_size(struct fib_rules_ops *ops, struct fib_rule *rule) @@ -1002,7 +1111,9 @@ static inline size_t fib_rule_nlmsg_size(struct fib_rules_ops *ops, + nla_total_size(1) /* FRA_PROTOCOL */ + nla_total_size(1) /* FRA_IP_PROTO */ + nla_total_size(sizeof(struct fib_rule_port_range)) /* FRA_SPORT_RANGE */ - + nla_total_size(sizeof(struct fib_rule_port_range)); /* FRA_DPORT_RANGE */ + + nla_total_size(sizeof(struct fib_rule_port_range)) /* FRA_DPORT_RANGE */ + + nla_total_size(2) /* FRA_SPORT_MASK */ + + nla_total_size(2); /* FRA_DPORT_MASK */ if (ops->nlmsg_payload) payload += ops->nlmsg_payload(rule); @@ -1070,8 +1181,12 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule, nla_put_uid_range(skb, &rule->uid_range)) || (fib_rule_port_range_set(&rule->sport_range) && nla_put_port_range(skb, FRA_SPORT_RANGE, &rule->sport_range)) || + (rule->sport_mask && nla_put_u16(skb, FRA_SPORT_MASK, + rule->sport_mask)) || (fib_rule_port_range_set(&rule->dport_range) && nla_put_port_range(skb, FRA_DPORT_RANGE, &rule->dport_range)) || + (rule->dport_mask && nla_put_u16(skb, FRA_DPORT_MASK, + rule->dport_mask)) || (rule->ip_proto && nla_put_u8(skb, FRA_IP_PROTO, rule->ip_proto))) goto nla_put_failure; @@ -1295,8 +1410,10 @@ static struct pernet_operations fib_rules_net_ops = { }; static const struct rtnl_msg_handler fib_rules_rtnl_msg_handlers[] __initconst = { - {.msgtype = RTM_NEWRULE, .doit = fib_nl_newrule}, - {.msgtype = RTM_DELRULE, .doit = fib_nl_delrule}, + {.msgtype = RTM_NEWRULE, .doit = fib_nl_newrule, + .flags = RTNL_FLAG_DOIT_PERNET}, + {.msgtype = RTM_DELRULE, .doit = fib_nl_delrule, + .flags = RTNL_FLAG_DOIT_PERNET}, {.msgtype = RTM_GETRULE, .dumpit = fib_nl_dumprule, .flags = RTNL_FLAG_DUMP_UNLOCKED}, }; diff --git a/net/core/filter.c b/net/core/filter.c index 2ec162dd83c4..46ae8eb7a03c 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -5222,6 +5222,25 @@ static const struct bpf_func_proto bpf_get_socket_uid_proto = { .arg1_type = ARG_PTR_TO_CTX, }; +static int sk_bpf_set_get_cb_flags(struct sock *sk, char *optval, bool getopt) +{ + u32 sk_bpf_cb_flags; + + if (getopt) { + *(u32 *)optval = sk->sk_bpf_cb_flags; + return 0; + } + + sk_bpf_cb_flags = *(u32 *)optval; + + if (sk_bpf_cb_flags & ~SK_BPF_CB_MASK) + return -EINVAL; + + sk->sk_bpf_cb_flags = sk_bpf_cb_flags; + + return 0; +} + static int sol_socket_sockopt(struct sock *sk, int optname, char *optval, int *optlen, bool getopt) @@ -5238,6 +5257,7 @@ static int sol_socket_sockopt(struct sock *sk, int optname, case SO_MAX_PACING_RATE: case SO_BINDTOIFINDEX: case SO_TXREHASH: + case SK_BPF_CB_FLAGS: if (*optlen != sizeof(int)) return -EINVAL; break; @@ -5247,6 +5267,9 @@ static int sol_socket_sockopt(struct sock *sk, int optname, return -EINVAL; } + if (optname == SK_BPF_CB_FLAGS) + return sk_bpf_set_get_cb_flags(sk, optval, getopt); + if (getopt) { if (optname == SO_BINDTODEVICE) return -EINVAL; @@ -5259,6 +5282,38 @@ static int sol_socket_sockopt(struct sock *sk, int optname, KERNEL_SOCKPTR(optval), *optlen); } +static int bpf_sol_tcp_getsockopt(struct sock *sk, int optname, + char *optval, int optlen) +{ + if (optlen != sizeof(int)) + return -EINVAL; + + switch (optname) { + case TCP_BPF_SOCK_OPS_CB_FLAGS: { + int cb_flags = tcp_sk(sk)->bpf_sock_ops_cb_flags; + + memcpy(optval, &cb_flags, optlen); + break; + } + case TCP_BPF_RTO_MIN: { + int rto_min_us = jiffies_to_usecs(inet_csk(sk)->icsk_rto_min); + + memcpy(optval, &rto_min_us, optlen); + break; + } + case TCP_BPF_DELACK_MAX: { + int delack_max_us = jiffies_to_usecs(inet_csk(sk)->icsk_delack_max); + + memcpy(optval, &delack_max_us, optlen); + break; + } + default: + return -EINVAL; + } + + return 0; +} + static int bpf_sol_tcp_setsockopt(struct sock *sk, int optname, char *optval, int optlen) { @@ -5382,6 +5437,7 @@ static int sol_tcp_sockopt(struct sock *sk, int optname, case TCP_USER_TIMEOUT: case TCP_NOTSENT_LOWAT: case TCP_SAVE_SYN: + case TCP_RTO_MAX_MS: if (*optlen != sizeof(int)) return -EINVAL; break; @@ -5391,20 +5447,9 @@ static int sol_tcp_sockopt(struct sock *sk, int optname, if (*optlen < 1) return -EINVAL; break; - case TCP_BPF_SOCK_OPS_CB_FLAGS: - if (*optlen != sizeof(int)) - return -EINVAL; - if (getopt) { - struct tcp_sock *tp = tcp_sk(sk); - int cb_flags = tp->bpf_sock_ops_cb_flags; - - memcpy(optval, &cb_flags, *optlen); - return 0; - } - return bpf_sol_tcp_setsockopt(sk, optname, optval, *optlen); default: if (getopt) - return -EINVAL; + return bpf_sol_tcp_getsockopt(sk, optname, optval, *optlen); return bpf_sol_tcp_setsockopt(sk, optname, optval, *optlen); } @@ -5500,6 +5545,11 @@ static int __bpf_setsockopt(struct sock *sk, int level, int optname, return -EINVAL; } +static bool is_locked_tcp_sock_ops(struct bpf_sock_ops_kern *bpf_sock) +{ + return bpf_sock->op <= BPF_SOCK_OPS_WRITE_HDR_OPT_CB; +} + static int _bpf_setsockopt(struct sock *sk, int level, int optname, char *optval, int optlen) { @@ -5650,6 +5700,9 @@ static const struct bpf_func_proto bpf_sock_addr_getsockopt_proto = { BPF_CALL_5(bpf_sock_ops_setsockopt, struct bpf_sock_ops_kern *, bpf_sock, int, level, int, optname, char *, optval, int, optlen) { + if (!is_locked_tcp_sock_ops(bpf_sock)) + return -EOPNOTSUPP; + return _bpf_setsockopt(bpf_sock->sk, level, optname, optval, optlen); } @@ -5735,6 +5788,9 @@ static int bpf_sock_ops_get_syn(struct bpf_sock_ops_kern *bpf_sock, BPF_CALL_5(bpf_sock_ops_getsockopt, struct bpf_sock_ops_kern *, bpf_sock, int, level, int, optname, char *, optval, int, optlen) { + if (!is_locked_tcp_sock_ops(bpf_sock)) + return -EOPNOTSUPP; + if (IS_ENABLED(CONFIG_INET) && level == SOL_TCP && optname >= TCP_BPF_SYN && optname <= TCP_BPF_SYN_MAC) { int ret, copy_len = 0; @@ -5777,6 +5833,9 @@ BPF_CALL_2(bpf_sock_ops_cb_flags_set, struct bpf_sock_ops_kern *, bpf_sock, struct sock *sk = bpf_sock->sk; int val = argval & BPF_SOCK_OPS_ALL_CB_FLAGS; + if (!is_locked_tcp_sock_ops(bpf_sock)) + return -EOPNOTSUPP; + if (!IS_ENABLED(CONFIG_INET) || !sk_fullsock(sk)) return -EINVAL; @@ -7586,6 +7645,9 @@ BPF_CALL_4(bpf_sock_ops_load_hdr_opt, struct bpf_sock_ops_kern *, bpf_sock, u8 search_kind, search_len, copy_len, magic_len; int ret; + if (!is_locked_tcp_sock_ops(bpf_sock)) + return -EOPNOTSUPP; + /* 2 byte is the minimal option len except TCPOPT_NOP and * TCPOPT_EOL which are useless for the bpf prog to learn * and this helper disallow loading them also. @@ -10358,10 +10420,10 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type, } \ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \ struct bpf_sock_ops_kern, \ - is_fullsock), \ + is_locked_tcp_sock), \ fullsock_reg, si->src_reg, \ offsetof(struct bpf_sock_ops_kern, \ - is_fullsock)); \ + is_locked_tcp_sock)); \ *insn++ = BPF_JMP_IMM(BPF_JEQ, fullsock_reg, 0, jmp); \ if (si->dst_reg == si->src_reg) \ *insn++ = BPF_LDX_MEM(BPF_DW, reg, si->src_reg, \ @@ -10446,10 +10508,10 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type, temp)); \ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \ struct bpf_sock_ops_kern, \ - is_fullsock), \ + is_locked_tcp_sock), \ reg, si->dst_reg, \ offsetof(struct bpf_sock_ops_kern, \ - is_fullsock)); \ + is_locked_tcp_sock)); \ *insn++ = BPF_JMP_IMM(BPF_JEQ, reg, 0, 2); \ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \ struct bpf_sock_ops_kern, sk),\ @@ -12062,6 +12124,25 @@ __bpf_kfunc int bpf_sk_assign_tcp_reqsk(struct __sk_buff *s, struct sock *sk, #endif } +__bpf_kfunc int bpf_sock_ops_enable_tx_tstamp(struct bpf_sock_ops_kern *skops, + u64 flags) +{ + struct sk_buff *skb; + + if (skops->op != BPF_SOCK_OPS_TSTAMP_SENDMSG_CB) + return -EOPNOTSUPP; + + if (flags) + return -EINVAL; + + skb = skops->skb; + skb_shinfo(skb)->tx_flags |= SKBTX_BPF; + TCP_SKB_CB(skb)->txstamp_ack |= TSTAMP_ACK_BPF; + skb_shinfo(skb)->tskey = TCP_SKB_CB(skb)->seq + skb->len - 1; + + return 0; +} + __bpf_kfunc_end_defs(); int bpf_dynptr_from_skb_rdonly(struct __sk_buff *skb, u64 flags, @@ -12095,6 +12176,10 @@ BTF_KFUNCS_START(bpf_kfunc_check_set_tcp_reqsk) BTF_ID_FLAGS(func, bpf_sk_assign_tcp_reqsk, KF_TRUSTED_ARGS) BTF_KFUNCS_END(bpf_kfunc_check_set_tcp_reqsk) +BTF_KFUNCS_START(bpf_kfunc_check_set_sock_ops) +BTF_ID_FLAGS(func, bpf_sock_ops_enable_tx_tstamp, KF_TRUSTED_ARGS) +BTF_KFUNCS_END(bpf_kfunc_check_set_sock_ops) + static const struct btf_kfunc_id_set bpf_kfunc_set_skb = { .owner = THIS_MODULE, .set = &bpf_kfunc_check_set_skb, @@ -12115,6 +12200,11 @@ static const struct btf_kfunc_id_set bpf_kfunc_set_tcp_reqsk = { .set = &bpf_kfunc_check_set_tcp_reqsk, }; +static const struct btf_kfunc_id_set bpf_kfunc_set_sock_ops = { + .owner = THIS_MODULE, + .set = &bpf_kfunc_check_set_sock_ops, +}; + static int __init bpf_kfunc_init(void) { int ret; @@ -12133,7 +12223,8 @@ static int __init bpf_kfunc_init(void) ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, &bpf_kfunc_set_xdp); ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_CGROUP_SOCK_ADDR, &bpf_kfunc_set_sock_addr); - return ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &bpf_kfunc_set_tcp_reqsk); + ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &bpf_kfunc_set_tcp_reqsk); + return ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SOCK_OPS, &bpf_kfunc_set_sock_ops); } late_initcall(bpf_kfunc_init); diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 9cd8de6bebb5..1b61bb25ba0e 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -106,7 +106,7 @@ int flow_dissector_bpf_prog_attach_check(struct net *net, #endif /* CONFIG_BPF_SYSCALL */ /** - * __skb_flow_get_ports - extract the upper layer ports and return them + * skb_flow_get_ports - extract the upper layer ports and return them * @skb: sk_buff to extract the ports from * @thoff: transport header offset * @ip_proto: protocol for which to get port offset @@ -116,8 +116,8 @@ int flow_dissector_bpf_prog_attach_check(struct net *net, * The function will try to retrieve the ports at offset thoff + poff where poff * is the protocol port offset returned from proto_ports_offset */ -__be32 __skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto, - const void *data, int hlen) +__be32 skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto, + const void *data, int hlen) { int poff = proto_ports_offset(ip_proto); @@ -137,7 +137,7 @@ __be32 __skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto, return 0; } -EXPORT_SYMBOL(__skb_flow_get_ports); +EXPORT_SYMBOL(skb_flow_get_ports); static bool icmp_has_id(u8 type) { @@ -870,7 +870,7 @@ __skb_flow_dissect_ports(const struct sk_buff *skb, if (!key_ports && !key_ports_range) return; - ports = __skb_flow_get_ports(skb, nhoff, ip_proto, data, hlen); + ports = skb_flow_get_ports(skb, nhoff, ip_proto, data, hlen); if (key_ports) key_ports->ports = ports; diff --git a/net/core/gro.c b/net/core/gro.c index 0ad549b07e03..b350e5b69549 100644 --- a/net/core/gro.c +++ b/net/core/gro.c @@ -250,8 +250,7 @@ int skb_gro_receive_list(struct sk_buff *p, struct sk_buff *skb) return 0; } - -static void napi_gro_complete(struct napi_struct *napi, struct sk_buff *skb) +static void gro_complete(struct gro_node *gro, struct sk_buff *skb) { struct list_head *head = &net_hotdata.offload_base; struct packet_offload *ptype; @@ -284,43 +283,43 @@ static void napi_gro_complete(struct napi_struct *napi, struct sk_buff *skb) } out: - gro_normal_one(napi, skb, NAPI_GRO_CB(skb)->count); + gro_normal_one(gro, skb, NAPI_GRO_CB(skb)->count); } -static void __napi_gro_flush_chain(struct napi_struct *napi, u32 index, - bool flush_old) +static void __gro_flush_chain(struct gro_node *gro, u32 index, bool flush_old) { - struct list_head *head = &napi->gro_hash[index].list; + struct list_head *head = &gro->hash[index].list; struct sk_buff *skb, *p; list_for_each_entry_safe_reverse(skb, p, head, list) { if (flush_old && NAPI_GRO_CB(skb)->age == jiffies) return; skb_list_del_init(skb); - napi_gro_complete(napi, skb); - napi->gro_hash[index].count--; + gro_complete(gro, skb); + gro->hash[index].count--; } - if (!napi->gro_hash[index].count) - __clear_bit(index, &napi->gro_bitmask); + if (!gro->hash[index].count) + __clear_bit(index, &gro->bitmask); } -/* napi->gro_hash[].list contains packets ordered by age. +/* + * gro->hash[].list contains packets ordered by age. * youngest packets at the head of it. * Complete skbs in reverse order to reduce latencies. */ -void napi_gro_flush(struct napi_struct *napi, bool flush_old) +void __gro_flush(struct gro_node *gro, bool flush_old) { - unsigned long bitmask = napi->gro_bitmask; + unsigned long bitmask = gro->bitmask; unsigned int i, base = ~0U; while ((i = ffs(bitmask)) != 0) { bitmask >>= i; base += i; - __napi_gro_flush_chain(napi, base, flush_old); + __gro_flush_chain(gro, base, flush_old); } } -EXPORT_SYMBOL(napi_gro_flush); +EXPORT_SYMBOL(__gro_flush); static unsigned long gro_list_prepare_tc_ext(const struct sk_buff *skb, const struct sk_buff *p, @@ -439,7 +438,7 @@ static void gro_try_pull_from_frag0(struct sk_buff *skb) gro_pull_from_frag0(skb, grow); } -static void gro_flush_oldest(struct napi_struct *napi, struct list_head *head) +static void gro_flush_oldest(struct gro_node *gro, struct list_head *head) { struct sk_buff *oldest; @@ -455,14 +454,15 @@ static void gro_flush_oldest(struct napi_struct *napi, struct list_head *head) * SKB to the chain. */ skb_list_del_init(oldest); - napi_gro_complete(napi, oldest); + gro_complete(gro, oldest); } -static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) +static enum gro_result dev_gro_receive(struct gro_node *gro, + struct sk_buff *skb) { u32 bucket = skb_get_hash_raw(skb) & (GRO_HASH_BUCKETS - 1); - struct gro_list *gro_list = &napi->gro_hash[bucket]; struct list_head *head = &net_hotdata.offload_base; + struct gro_list *gro_list = &gro->hash[bucket]; struct packet_offload *ptype; __be16 type = skb->protocol; struct sk_buff *pp = NULL; @@ -526,7 +526,7 @@ found_ptype: if (pp) { skb_list_del_init(pp); - napi_gro_complete(napi, pp); + gro_complete(gro, pp); gro_list->count--; } @@ -537,7 +537,7 @@ found_ptype: goto normal; if (unlikely(gro_list->count >= MAX_GRO_SKBS)) - gro_flush_oldest(napi, &gro_list->list); + gro_flush_oldest(gro, &gro_list->list); else gro_list->count++; @@ -551,10 +551,10 @@ found_ptype: ret = GRO_HELD; ok: if (gro_list->count) { - if (!test_bit(bucket, &napi->gro_bitmask)) - __set_bit(bucket, &napi->gro_bitmask); - } else if (test_bit(bucket, &napi->gro_bitmask)) { - __clear_bit(bucket, &napi->gro_bitmask); + if (!test_bit(bucket, &gro->bitmask)) + __set_bit(bucket, &gro->bitmask); + } else if (test_bit(bucket, &gro->bitmask)) { + __clear_bit(bucket, &gro->bitmask); } return ret; @@ -593,13 +593,12 @@ struct packet_offload *gro_find_complete_by_type(__be16 type) } EXPORT_SYMBOL(gro_find_complete_by_type); -static gro_result_t napi_skb_finish(struct napi_struct *napi, - struct sk_buff *skb, - gro_result_t ret) +static gro_result_t gro_skb_finish(struct gro_node *gro, struct sk_buff *skb, + gro_result_t ret) { switch (ret) { case GRO_NORMAL: - gro_normal_one(napi, skb, 1); + gro_normal_one(gro, skb, 1); break; case GRO_MERGED_FREE: @@ -620,21 +619,21 @@ static gro_result_t napi_skb_finish(struct napi_struct *napi, return ret; } -gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) +gro_result_t gro_receive_skb(struct gro_node *gro, struct sk_buff *skb) { gro_result_t ret; - skb_mark_napi_id(skb, napi); + __skb_mark_napi_id(skb, gro); trace_napi_gro_receive_entry(skb); skb_gro_reset_offset(skb, 0); - ret = napi_skb_finish(napi, skb, dev_gro_receive(napi, skb)); + ret = gro_skb_finish(gro, skb, dev_gro_receive(gro, skb)); trace_napi_gro_receive_exit(ret); return ret; } -EXPORT_SYMBOL(napi_gro_receive); +EXPORT_SYMBOL(gro_receive_skb); static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb) { @@ -691,7 +690,7 @@ static gro_result_t napi_frags_finish(struct napi_struct *napi, __skb_push(skb, ETH_HLEN); skb->protocol = eth_type_trans(skb, skb->dev); if (ret == GRO_NORMAL) - gro_normal_one(napi, skb, 1); + gro_normal_one(&napi->gro, skb, 1); break; case GRO_MERGED_FREE: @@ -760,7 +759,7 @@ gro_result_t napi_gro_frags(struct napi_struct *napi) trace_napi_gro_frags_entry(skb); - ret = napi_frags_finish(napi, skb, dev_gro_receive(napi, skb)); + ret = napi_frags_finish(napi, skb, dev_gro_receive(&napi->gro, skb)); trace_napi_gro_frags_exit(ret); return ret; @@ -792,3 +791,37 @@ __sum16 __skb_gro_checksum_complete(struct sk_buff *skb) return sum; } EXPORT_SYMBOL(__skb_gro_checksum_complete); + +void gro_init(struct gro_node *gro) +{ + for (u32 i = 0; i < GRO_HASH_BUCKETS; i++) { + INIT_LIST_HEAD(&gro->hash[i].list); + gro->hash[i].count = 0; + } + + gro->bitmask = 0; + gro->cached_napi_id = 0; + + INIT_LIST_HEAD(&gro->rx_list); + gro->rx_count = 0; +} + +void gro_cleanup(struct gro_node *gro) +{ + struct sk_buff *skb, *n; + + for (u32 i = 0; i < GRO_HASH_BUCKETS; i++) { + list_for_each_entry_safe(skb, n, &gro->hash[i].list, list) + kfree_skb(skb); + + gro->hash[i].count = 0; + } + + gro->bitmask = 0; + gro->cached_napi_id = 0; + + list_for_each_entry_safe(skb, n, &gro->rx_list, list) + kfree_skb(skb); + + gro->rx_count = 0; +} diff --git a/net/core/hotdata.c b/net/core/hotdata.c index d0aaaaa556f2..0bc893d5f07b 100644 --- a/net/core/hotdata.c +++ b/net/core/hotdata.c @@ -7,7 +7,6 @@ struct net_hotdata net_hotdata __cacheline_aligned = { .offload_base = LIST_HEAD_INIT(net_hotdata.offload_base), - .ptype_all = LIST_HEAD_INIT(net_hotdata.ptype_all), .gro_normal_batch = 8, .netdev_budget = 300, diff --git a/net/core/lwtunnel.c b/net/core/lwtunnel.c index 4417a18b3e95..e39a459540ec 100644 --- a/net/core/lwtunnel.c +++ b/net/core/lwtunnel.c @@ -149,7 +149,8 @@ int lwtunnel_build_state(struct net *net, u16 encap_type, } EXPORT_SYMBOL_GPL(lwtunnel_build_state); -int lwtunnel_valid_encap_type(u16 encap_type, struct netlink_ext_ack *extack) +int lwtunnel_valid_encap_type(u16 encap_type, struct netlink_ext_ack *extack, + bool rtnl_is_held) { const struct lwtunnel_encap_ops *ops; int ret = -EINVAL; @@ -160,21 +161,19 @@ int lwtunnel_valid_encap_type(u16 encap_type, struct netlink_ext_ack *extack) return ret; } - rcu_read_lock(); - ops = rcu_dereference(lwtun_encaps[encap_type]); - rcu_read_unlock(); + ops = rcu_access_pointer(lwtun_encaps[encap_type]); #ifdef CONFIG_MODULES if (!ops) { const char *encap_type_str = lwtunnel_encap_str(encap_type); if (encap_type_str) { - __rtnl_unlock(); + if (rtnl_is_held) + __rtnl_unlock(); request_module("rtnl-lwt-%s", encap_type_str); - rtnl_lock(); + if (rtnl_is_held) + rtnl_lock(); - rcu_read_lock(); - ops = rcu_dereference(lwtun_encaps[encap_type]); - rcu_read_unlock(); + ops = rcu_access_pointer(lwtun_encaps[encap_type]); } } #endif @@ -187,7 +186,8 @@ int lwtunnel_valid_encap_type(u16 encap_type, struct netlink_ext_ack *extack) EXPORT_SYMBOL_GPL(lwtunnel_valid_encap_type); int lwtunnel_valid_encap_type_attr(struct nlattr *attr, int remaining, - struct netlink_ext_ack *extack) + struct netlink_ext_ack *extack, + bool rtnl_is_held) { struct rtnexthop *rtnh = (struct rtnexthop *)attr; struct nlattr *nla_entype; @@ -209,7 +209,8 @@ int lwtunnel_valid_encap_type_attr(struct nlattr *attr, int remaining, encap_type = nla_get_u16(nla_entype); if (lwtunnel_valid_encap_type(encap_type, - extack) != 0) + extack, + rtnl_is_held) != 0) return -EOPNOTSUPP; } } diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 1a620f903c56..0738aa6cca25 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -518,7 +518,7 @@ static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift) if (!ret) return NULL; - hash_heads = kvzalloc(size, GFP_ATOMIC); + hash_heads = kzalloc(size, GFP_ATOMIC); if (!hash_heads) { kfree(ret); return NULL; @@ -536,7 +536,7 @@ static void neigh_hash_free_rcu(struct rcu_head *head) struct neigh_hash_table, rcu); - kvfree(nht->hash_heads); + kfree(nht->hash_heads); kfree(nht); } @@ -832,12 +832,10 @@ static int pneigh_ifdown_and_unlock(struct neigh_table *tbl, return -ENOENT; } -static void neigh_parms_destroy(struct neigh_parms *parms); - static inline void neigh_parms_put(struct neigh_parms *parms) { if (refcount_dec_and_test(&parms->refcnt)) - neigh_parms_destroy(parms); + kfree(parms); } /* @@ -1713,11 +1711,6 @@ void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms) } EXPORT_SYMBOL(neigh_parms_release); -static void neigh_parms_destroy(struct neigh_parms *parms) -{ - kfree(parms); -} - static struct lock_class_key neigh_table_proxy_queue_class; static struct neigh_table __rcu *neigh_tables[NEIGH_NR_TABLES] __read_mostly; diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c index fa6d3969734a..3e92bf0f9060 100644 --- a/net/core/net-procfs.c +++ b/net/core/net-procfs.c @@ -185,7 +185,13 @@ static void *ptype_get_idx(struct seq_file *seq, loff_t pos) } } - list_for_each_entry_rcu(pt, &net_hotdata.ptype_all, list) { + list_for_each_entry_rcu(pt, &seq_file_net(seq)->ptype_all, list) { + if (i == pos) + return pt; + ++i; + } + + list_for_each_entry_rcu(pt, &seq_file_net(seq)->ptype_specific, list) { if (i == pos) return pt; ++i; @@ -210,6 +216,7 @@ static void *ptype_seq_start(struct seq_file *seq, loff_t *pos) static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos) { + struct net *net = seq_file_net(seq); struct net_device *dev; struct packet_type *pt; struct list_head *nxt; @@ -232,15 +239,22 @@ static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos) goto found; } } - - nxt = net_hotdata.ptype_all.next; - goto ptype_all; + nxt = net->ptype_all.next; + goto net_ptype_all; } - if (pt->type == htons(ETH_P_ALL)) { -ptype_all: - if (nxt != &net_hotdata.ptype_all) + if (pt->af_packet_net) { +net_ptype_all: + if (nxt != &net->ptype_all && nxt != &net->ptype_specific) goto found; + + if (nxt == &net->ptype_all) { + /* continue with ->ptype_specific if it's not empty */ + nxt = net->ptype_specific.next; + if (nxt != &net->ptype_specific) + goto found; + } + hash = 0; nxt = ptype_base[0].next; } else diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 07cb99b114bd..1ace0cd01adc 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -23,6 +23,7 @@ #include <linux/of.h> #include <linux/of_net.h> #include <linux/cpu.h> +#include <net/netdev_lock.h> #include <net/netdev_rx_queue.h> #include <net/rps.h> @@ -42,6 +43,87 @@ static inline int dev_isalive(const struct net_device *dev) return READ_ONCE(dev->reg_state) <= NETREG_REGISTERED; } +/* There is a possible ABBA deadlock between rtnl_lock and kernfs_node->active, + * when unregistering a net device and accessing associated sysfs files. The + * potential deadlock is as follow: + * + * CPU 0 CPU 1 + * + * rtnl_lock vfs_read + * unregister_netdevice_many kernfs_seq_start + * device_del / kobject_put kernfs_get_active (kn->active++) + * kernfs_drain sysfs_kf_seq_show + * wait_event( rtnl_lock + * kn->active == KN_DEACTIVATED_BIAS) -> waits on CPU 0 to release + * -> waits on CPU 1 to decrease kn->active the rtnl lock. + * + * The historical fix was to use rtnl_trylock with restart_syscall to bail out + * of sysfs operations when the lock couldn't be taken. This fixed the above + * issue as it allowed CPU 1 to bail out of the ABBA situation. + * + * But it came with performances issues, as syscalls are being restarted in + * loops when there was contention on the rtnl lock, with huge slow downs in + * specific scenarios (e.g. lots of virtual interfaces created and userspace + * daemons querying their attributes). + * + * The idea below is to bail out of the active kernfs_node protection + * (kn->active) while trying to take the rtnl lock. + * + * This replaces rtnl_lock() and still has to be used with rtnl_unlock(). The + * net device is guaranteed to be alive if this returns successfully. + */ +static int sysfs_rtnl_lock(struct kobject *kobj, struct attribute *attr, + struct net_device *ndev) +{ + struct kernfs_node *kn; + int ret = 0; + + /* First, we hold a reference to the net device as the unregistration + * path might run in parallel. This will ensure the net device and the + * associated sysfs objects won't be freed while we try to take the rtnl + * lock. + */ + dev_hold(ndev); + /* sysfs_break_active_protection was introduced to allow self-removal of + * devices and their associated sysfs files by bailing out of the + * sysfs/kernfs protection. We do this here to allow the unregistration + * path to complete in parallel. The following takes a reference on the + * kobject and the kernfs_node being accessed. + * + * This works because we hold a reference onto the net device and the + * unregistration path will wait for us eventually in netdev_run_todo + * (outside an rtnl lock section). + */ + kn = sysfs_break_active_protection(kobj, attr); + /* We can now try to take the rtnl lock. This can't deadlock us as the + * unregistration path is able to drain sysfs files (kernfs_node) thanks + * to the above dance. + */ + if (rtnl_lock_interruptible()) { + ret = -ERESTARTSYS; + goto unbreak; + } + /* Check dismantle on the device hasn't started, otherwise deny the + * operation. + */ + if (!dev_isalive(ndev)) { + rtnl_unlock(); + ret = -ENODEV; + goto unbreak; + } + /* We are now sure the device dismantle hasn't started nor that it can + * start before we exit the locking section as we hold the rtnl lock. + * There's no need to keep unbreaking the sysfs protection nor to hold + * a net device reference from that point; that was only needed to take + * the rtnl lock. + */ +unbreak: + sysfs_unbreak_active_protection(kn); + dev_put(ndev); + + return ret; +} + /* use same locking rules as GIF* ioctl's */ static ssize_t netdev_show(const struct device *dev, struct device_attribute *attr, char *buf, @@ -95,14 +177,14 @@ static ssize_t netdev_store(struct device *dev, struct device_attribute *attr, if (ret) goto err; - if (!rtnl_trylock()) - return restart_syscall(); + ret = sysfs_rtnl_lock(&dev->kobj, &attr->attr, netdev); + if (ret) + goto err; + + ret = (*set)(netdev, new); + if (ret == 0) + ret = len; - if (dev_isalive(netdev)) { - ret = (*set)(netdev, new); - if (ret == 0) - ret = len; - } rtnl_unlock(); err: return ret; @@ -220,7 +302,7 @@ static ssize_t carrier_store(struct device *dev, struct device_attribute *attr, struct net_device *netdev = to_net_dev(dev); /* The check is also done in change_carrier; this helps returning early - * without hitting the trylock/restart in netdev_store. + * without hitting the locking section in netdev_store. */ if (!netdev->netdev_ops->ndo_change_carrier) return -EOPNOTSUPP; @@ -232,11 +314,13 @@ static ssize_t carrier_show(struct device *dev, struct device_attribute *attr, char *buf) { struct net_device *netdev = to_net_dev(dev); - int ret = -EINVAL; + int ret; - if (!rtnl_trylock()) - return restart_syscall(); + ret = sysfs_rtnl_lock(&dev->kobj, &attr->attr, netdev); + if (ret) + return ret; + ret = -EINVAL; if (netif_running(netdev)) { /* Synchronize carrier state with link watch, * see also rtnl_getlink(). @@ -245,8 +329,8 @@ static ssize_t carrier_show(struct device *dev, ret = sysfs_emit(buf, fmt_dec, !!netif_carrier_ok(netdev)); } - rtnl_unlock(); + rtnl_unlock(); return ret; } static DEVICE_ATTR_RW(carrier); @@ -258,14 +342,16 @@ static ssize_t speed_show(struct device *dev, int ret = -EINVAL; /* The check is also done in __ethtool_get_link_ksettings; this helps - * returning early without hitting the trylock/restart below. + * returning early without hitting the locking section below. */ if (!netdev->ethtool_ops->get_link_ksettings) return ret; - if (!rtnl_trylock()) - return restart_syscall(); + ret = sysfs_rtnl_lock(&dev->kobj, &attr->attr, netdev); + if (ret) + return ret; + ret = -EINVAL; if (netif_running(netdev)) { struct ethtool_link_ksettings cmd; @@ -284,14 +370,16 @@ static ssize_t duplex_show(struct device *dev, int ret = -EINVAL; /* The check is also done in __ethtool_get_link_ksettings; this helps - * returning early without hitting the trylock/restart below. + * returning early without hitting the locking section below. */ if (!netdev->ethtool_ops->get_link_ksettings) return ret; - if (!rtnl_trylock()) - return restart_syscall(); + ret = sysfs_rtnl_lock(&dev->kobj, &attr->attr, netdev); + if (ret) + return ret; + ret = -EINVAL; if (netif_running(netdev)) { struct ethtool_link_ksettings cmd; @@ -481,7 +569,7 @@ static ssize_t ifalias_store(struct device *dev, struct device_attribute *attr, struct net_device *netdev = to_net_dev(dev); struct net *net = dev_net(netdev); size_t count = len; - ssize_t ret = 0; + ssize_t ret; if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) return -EPERM; @@ -490,16 +578,15 @@ static ssize_t ifalias_store(struct device *dev, struct device_attribute *attr, if (len > 0 && buf[len - 1] == '\n') --count; - if (!rtnl_trylock()) - return restart_syscall(); + ret = sysfs_rtnl_lock(&dev->kobj, &attr->attr, netdev); + if (ret) + return ret; - if (dev_isalive(netdev)) { - ret = dev_set_alias(netdev, buf, count); - if (ret < 0) - goto err; - ret = len; - netdev_state_change(netdev); - } + ret = dev_set_alias(netdev, buf, count); + if (ret < 0) + goto err; + ret = len; + netdev_state_change(netdev); err: rtnl_unlock(); @@ -511,7 +598,7 @@ static ssize_t ifalias_show(struct device *dev, { const struct net_device *netdev = to_net_dev(dev); char tmp[IFALIASZ]; - ssize_t ret = 0; + ssize_t ret; ret = dev_get_alias(netdev, tmp, sizeof(tmp)); if (ret > 0) @@ -551,24 +638,23 @@ static ssize_t phys_port_id_show(struct device *dev, struct device_attribute *attr, char *buf) { struct net_device *netdev = to_net_dev(dev); - ssize_t ret = -EINVAL; + struct netdev_phys_item_id ppid; + ssize_t ret; /* The check is also done in dev_get_phys_port_id; this helps returning - * early without hitting the trylock/restart below. + * early without hitting the locking section below. */ if (!netdev->netdev_ops->ndo_get_phys_port_id) return -EOPNOTSUPP; - if (!rtnl_trylock()) - return restart_syscall(); + ret = sysfs_rtnl_lock(&dev->kobj, &attr->attr, netdev); + if (ret) + return ret; - if (dev_isalive(netdev)) { - struct netdev_phys_item_id ppid; + ret = dev_get_phys_port_id(netdev, &ppid); + if (!ret) + ret = sysfs_emit(buf, "%*phN\n", ppid.id_len, ppid.id); - ret = dev_get_phys_port_id(netdev, &ppid); - if (!ret) - ret = sysfs_emit(buf, "%*phN\n", ppid.id_len, ppid.id); - } rtnl_unlock(); return ret; @@ -579,25 +665,24 @@ static ssize_t phys_port_name_show(struct device *dev, struct device_attribute *attr, char *buf) { struct net_device *netdev = to_net_dev(dev); - ssize_t ret = -EINVAL; + char name[IFNAMSIZ]; + ssize_t ret; /* The checks are also done in dev_get_phys_port_name; this helps - * returning early without hitting the trylock/restart below. + * returning early without hitting the locking section below. */ if (!netdev->netdev_ops->ndo_get_phys_port_name && !netdev->devlink_port) return -EOPNOTSUPP; - if (!rtnl_trylock()) - return restart_syscall(); + ret = sysfs_rtnl_lock(&dev->kobj, &attr->attr, netdev); + if (ret) + return ret; - if (dev_isalive(netdev)) { - char name[IFNAMSIZ]; + ret = dev_get_phys_port_name(netdev, name, sizeof(name)); + if (!ret) + ret = sysfs_emit(buf, "%s\n", name); - ret = dev_get_phys_port_name(netdev, name, sizeof(name)); - if (!ret) - ret = sysfs_emit(buf, "%s\n", name); - } rtnl_unlock(); return ret; @@ -608,26 +693,25 @@ static ssize_t phys_switch_id_show(struct device *dev, struct device_attribute *attr, char *buf) { struct net_device *netdev = to_net_dev(dev); - ssize_t ret = -EINVAL; + struct netdev_phys_item_id ppid = { }; + ssize_t ret; /* The checks are also done in dev_get_phys_port_name; this helps - * returning early without hitting the trylock/restart below. This works + * returning early without hitting the locking section below. This works * because recurse is false when calling dev_get_port_parent_id. */ if (!netdev->netdev_ops->ndo_get_port_parent_id && !netdev->devlink_port) return -EOPNOTSUPP; - if (!rtnl_trylock()) - return restart_syscall(); + ret = sysfs_rtnl_lock(&dev->kobj, &attr->attr, netdev); + if (ret) + return ret; - if (dev_isalive(netdev)) { - struct netdev_phys_item_id ppid = { }; + ret = dev_get_port_parent_id(netdev, &ppid, false); + if (!ret) + ret = sysfs_emit(buf, "%*phN\n", ppid.id_len, ppid.id); - ret = dev_get_port_parent_id(netdev, &ppid, false); - if (!ret) - ret = sysfs_emit(buf, "%*phN\n", ppid.id_len, ppid.id); - } rtnl_unlock(); return ret; @@ -972,7 +1056,7 @@ static ssize_t show_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue, rcu_read_lock(); flow_table = rcu_dereference(queue->rps_flow_table); if (flow_table) - val = (unsigned long)flow_table->mask + 1; + val = 1UL << flow_table->log; rcu_read_unlock(); return sysfs_emit(buf, "%lu\n", val); @@ -1025,7 +1109,7 @@ static ssize_t store_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue, if (!table) return -ENOMEM; - table->mask = mask; + table->log = ilog2(mask) + 1; for (count = 0; count <= mask; count++) table->flows[count].cpu = RPS_NO_CPU; } else { @@ -1108,7 +1192,6 @@ static void rx_queue_get_ownership(const struct kobject *kobj, static const struct kobj_type rx_queue_ktype = { .sysfs_ops = &rx_queue_sysfs_ops, .release = rx_queue_release, - .default_groups = rx_queue_default_groups, .namespace = rx_queue_namespace, .get_ownership = rx_queue_get_ownership, }; @@ -1131,6 +1214,22 @@ static int rx_queue_add_kobject(struct net_device *dev, int index) struct kobject *kobj = &queue->kobj; int error = 0; + /* Rx queues are cleared in rx_queue_release to allow later + * re-registration. This is triggered when their kobj refcount is + * dropped. + * + * If a queue is removed while both a read (or write) operation and a + * the re-addition of the same queue are pending (waiting on rntl_lock) + * it might happen that the re-addition will execute before the read, + * making the initial removal to never happen (queue's kobj refcount + * won't drop enough because of the pending read). In such rare case, + * return to allow the removal operation to complete. + */ + if (unlikely(kobj->state_initialized)) { + netdev_warn_once(dev, "Cannot re-add rx queues before their removal completed"); + return -EAGAIN; + } + /* Kobject_put later will trigger rx_queue_release call which * decreases dev refcount: Take that reference here */ @@ -1142,20 +1241,27 @@ static int rx_queue_add_kobject(struct net_device *dev, int index) if (error) goto err; + queue->groups = rx_queue_default_groups; + error = sysfs_create_groups(kobj, queue->groups); + if (error) + goto err; + if (dev->sysfs_rx_queue_group) { error = sysfs_create_group(kobj, dev->sysfs_rx_queue_group); if (error) - goto err; + goto err_default_groups; } error = rx_queue_default_mask(dev, queue); if (error) - goto err; + goto err_default_groups; kobject_uevent(kobj, KOBJ_ADD); return error; +err_default_groups: + sysfs_remove_groups(kobj, queue->groups); err: kobject_put(kobj); return error; @@ -1200,12 +1306,14 @@ net_rx_queue_update_kobjects(struct net_device *dev, int old_num, int new_num) } while (--i >= new_num) { - struct kobject *kobj = &dev->_rx[i].kobj; + struct netdev_rx_queue *queue = &dev->_rx[i]; + struct kobject *kobj = &queue->kobj; if (!refcount_read(&dev_net(dev)->ns.count)) kobj->uevent_suppress = 1; if (dev->sysfs_rx_queue_group) sysfs_remove_group(kobj, dev->sysfs_rx_queue_group); + sysfs_remove_groups(kobj, queue->groups); kobject_put(kobj); } @@ -1244,9 +1352,11 @@ static int net_rx_queue_change_owner(struct net_device *dev, int num, */ struct netdev_queue_attribute { struct attribute attr; - ssize_t (*show)(struct netdev_queue *queue, char *buf); - ssize_t (*store)(struct netdev_queue *queue, - const char *buf, size_t len); + ssize_t (*show)(struct kobject *kobj, struct attribute *attr, + struct netdev_queue *queue, char *buf); + ssize_t (*store)(struct kobject *kobj, struct attribute *attr, + struct netdev_queue *queue, const char *buf, + size_t len); }; #define to_netdev_queue_attr(_attr) \ container_of(_attr, struct netdev_queue_attribute, attr) @@ -1263,7 +1373,7 @@ static ssize_t netdev_queue_attr_show(struct kobject *kobj, if (!attribute->show) return -EIO; - return attribute->show(queue, buf); + return attribute->show(kobj, attr, queue, buf); } static ssize_t netdev_queue_attr_store(struct kobject *kobj, @@ -1277,7 +1387,7 @@ static ssize_t netdev_queue_attr_store(struct kobject *kobj, if (!attribute->store) return -EIO; - return attribute->store(queue, buf, count); + return attribute->store(kobj, attr, queue, buf, count); } static const struct sysfs_ops netdev_queue_sysfs_ops = { @@ -1285,7 +1395,8 @@ static const struct sysfs_ops netdev_queue_sysfs_ops = { .store = netdev_queue_attr_store, }; -static ssize_t tx_timeout_show(struct netdev_queue *queue, char *buf) +static ssize_t tx_timeout_show(struct kobject *kobj, struct attribute *attr, + struct netdev_queue *queue, char *buf) { unsigned long trans_timeout = atomic_long_read(&queue->trans_timeout); @@ -1303,18 +1414,18 @@ static unsigned int get_netdev_queue_index(struct netdev_queue *queue) return i; } -static ssize_t traffic_class_show(struct netdev_queue *queue, - char *buf) +static ssize_t traffic_class_show(struct kobject *kobj, struct attribute *attr, + struct netdev_queue *queue, char *buf) { struct net_device *dev = queue->dev; - int num_tc, tc; - int index; + int num_tc, tc, index, ret; if (!netif_is_multiqueue(dev)) return -ENOENT; - if (!rtnl_trylock()) - return restart_syscall(); + ret = sysfs_rtnl_lock(kobj, attr, queue->dev); + if (ret) + return ret; index = get_netdev_queue_index(queue); @@ -1341,24 +1452,25 @@ static ssize_t traffic_class_show(struct netdev_queue *queue, } #ifdef CONFIG_XPS -static ssize_t tx_maxrate_show(struct netdev_queue *queue, - char *buf) +static ssize_t tx_maxrate_show(struct kobject *kobj, struct attribute *attr, + struct netdev_queue *queue, char *buf) { return sysfs_emit(buf, "%lu\n", queue->tx_maxrate); } -static ssize_t tx_maxrate_store(struct netdev_queue *queue, - const char *buf, size_t len) +static ssize_t tx_maxrate_store(struct kobject *kobj, struct attribute *attr, + struct netdev_queue *queue, const char *buf, + size_t len) { - struct net_device *dev = queue->dev; int err, index = get_netdev_queue_index(queue); + struct net_device *dev = queue->dev; u32 rate = 0; if (!capable(CAP_NET_ADMIN)) return -EPERM; /* The check is also done later; this helps returning early without - * hitting the trylock/restart below. + * hitting the locking section below. */ if (!dev->netdev_ops->ndo_set_tx_maxrate) return -EOPNOTSUPP; @@ -1367,18 +1479,23 @@ static ssize_t tx_maxrate_store(struct netdev_queue *queue, if (err < 0) return err; - if (!rtnl_trylock()) - return restart_syscall(); + err = sysfs_rtnl_lock(kobj, attr, dev); + if (err) + return err; err = -EOPNOTSUPP; + netdev_lock_ops(dev); if (dev->netdev_ops->ndo_set_tx_maxrate) err = dev->netdev_ops->ndo_set_tx_maxrate(dev, index, rate); + netdev_unlock_ops(dev); - rtnl_unlock(); if (!err) { queue->tx_maxrate = rate; + rtnl_unlock(); return len; } + + rtnl_unlock(); return err; } @@ -1422,16 +1539,17 @@ static ssize_t bql_set(const char *buf, const size_t count, return count; } -static ssize_t bql_show_hold_time(struct netdev_queue *queue, - char *buf) +static ssize_t bql_show_hold_time(struct kobject *kobj, struct attribute *attr, + struct netdev_queue *queue, char *buf) { struct dql *dql = &queue->dql; return sysfs_emit(buf, "%u\n", jiffies_to_msecs(dql->slack_hold_time)); } -static ssize_t bql_set_hold_time(struct netdev_queue *queue, - const char *buf, size_t len) +static ssize_t bql_set_hold_time(struct kobject *kobj, struct attribute *attr, + struct netdev_queue *queue, const char *buf, + size_t len) { struct dql *dql = &queue->dql; unsigned int value; @@ -1450,15 +1568,17 @@ static struct netdev_queue_attribute bql_hold_time_attribute __ro_after_init = __ATTR(hold_time, 0644, bql_show_hold_time, bql_set_hold_time); -static ssize_t bql_show_stall_thrs(struct netdev_queue *queue, char *buf) +static ssize_t bql_show_stall_thrs(struct kobject *kobj, struct attribute *attr, + struct netdev_queue *queue, char *buf) { struct dql *dql = &queue->dql; return sysfs_emit(buf, "%u\n", jiffies_to_msecs(dql->stall_thrs)); } -static ssize_t bql_set_stall_thrs(struct netdev_queue *queue, - const char *buf, size_t len) +static ssize_t bql_set_stall_thrs(struct kobject *kobj, struct attribute *attr, + struct netdev_queue *queue, const char *buf, + size_t len) { struct dql *dql = &queue->dql; unsigned int value; @@ -1484,13 +1604,15 @@ static ssize_t bql_set_stall_thrs(struct netdev_queue *queue, static struct netdev_queue_attribute bql_stall_thrs_attribute __ro_after_init = __ATTR(stall_thrs, 0644, bql_show_stall_thrs, bql_set_stall_thrs); -static ssize_t bql_show_stall_max(struct netdev_queue *queue, char *buf) +static ssize_t bql_show_stall_max(struct kobject *kobj, struct attribute *attr, + struct netdev_queue *queue, char *buf) { return sysfs_emit(buf, "%u\n", READ_ONCE(queue->dql.stall_max)); } -static ssize_t bql_set_stall_max(struct netdev_queue *queue, - const char *buf, size_t len) +static ssize_t bql_set_stall_max(struct kobject *kobj, struct attribute *attr, + struct netdev_queue *queue, const char *buf, + size_t len) { WRITE_ONCE(queue->dql.stall_max, 0); return len; @@ -1499,7 +1621,8 @@ static ssize_t bql_set_stall_max(struct netdev_queue *queue, static struct netdev_queue_attribute bql_stall_max_attribute __ro_after_init = __ATTR(stall_max, 0644, bql_show_stall_max, bql_set_stall_max); -static ssize_t bql_show_stall_cnt(struct netdev_queue *queue, char *buf) +static ssize_t bql_show_stall_cnt(struct kobject *kobj, struct attribute *attr, + struct netdev_queue *queue, char *buf) { struct dql *dql = &queue->dql; @@ -1509,8 +1632,8 @@ static ssize_t bql_show_stall_cnt(struct netdev_queue *queue, char *buf) static struct netdev_queue_attribute bql_stall_cnt_attribute __ro_after_init = __ATTR(stall_cnt, 0444, bql_show_stall_cnt, NULL); -static ssize_t bql_show_inflight(struct netdev_queue *queue, - char *buf) +static ssize_t bql_show_inflight(struct kobject *kobj, struct attribute *attr, + struct netdev_queue *queue, char *buf) { struct dql *dql = &queue->dql; @@ -1521,13 +1644,16 @@ static struct netdev_queue_attribute bql_inflight_attribute __ro_after_init = __ATTR(inflight, 0444, bql_show_inflight, NULL); #define BQL_ATTR(NAME, FIELD) \ -static ssize_t bql_show_ ## NAME(struct netdev_queue *queue, \ - char *buf) \ +static ssize_t bql_show_ ## NAME(struct kobject *kobj, \ + struct attribute *attr, \ + struct netdev_queue *queue, char *buf) \ { \ return bql_show(buf, queue->dql.FIELD); \ } \ \ -static ssize_t bql_set_ ## NAME(struct netdev_queue *queue, \ +static ssize_t bql_set_ ## NAME(struct kobject *kobj, \ + struct attribute *attr, \ + struct netdev_queue *queue, \ const char *buf, size_t len) \ { \ return bql_set(buf, len, &queue->dql.FIELD); \ @@ -1613,19 +1739,21 @@ out_no_maps: return len < PAGE_SIZE ? len : -EINVAL; } -static ssize_t xps_cpus_show(struct netdev_queue *queue, char *buf) +static ssize_t xps_cpus_show(struct kobject *kobj, struct attribute *attr, + struct netdev_queue *queue, char *buf) { struct net_device *dev = queue->dev; unsigned int index; - int len, tc; + int len, tc, ret; if (!netif_is_multiqueue(dev)) return -ENOENT; index = get_netdev_queue_index(queue); - if (!rtnl_trylock()) - return restart_syscall(); + ret = sysfs_rtnl_lock(kobj, attr, queue->dev); + if (ret) + return ret; /* If queue belongs to subordinate dev use its map */ dev = netdev_get_tx_queue(dev, index)->sb_dev ? : dev; @@ -1636,18 +1764,21 @@ static ssize_t xps_cpus_show(struct netdev_queue *queue, char *buf) return -EINVAL; } - /* Make sure the subordinate device can't be freed */ - get_device(&dev->dev); + /* Increase the net device refcnt to make sure it won't be freed while + * xps_queue_show is running. + */ + dev_hold(dev); rtnl_unlock(); len = xps_queue_show(dev, index, tc, buf, XPS_CPUS); - put_device(&dev->dev); + dev_put(dev); return len; } -static ssize_t xps_cpus_store(struct netdev_queue *queue, - const char *buf, size_t len) +static ssize_t xps_cpus_store(struct kobject *kobj, struct attribute *attr, + struct netdev_queue *queue, const char *buf, + size_t len) { struct net_device *dev = queue->dev; unsigned int index; @@ -1671,9 +1802,10 @@ static ssize_t xps_cpus_store(struct netdev_queue *queue, return err; } - if (!rtnl_trylock()) { + err = sysfs_rtnl_lock(kobj, attr, dev); + if (err) { free_cpumask_var(mask); - return restart_syscall(); + return err; } err = netif_set_xps_queue(dev, mask, index); @@ -1687,26 +1819,34 @@ static ssize_t xps_cpus_store(struct netdev_queue *queue, static struct netdev_queue_attribute xps_cpus_attribute __ro_after_init = __ATTR_RW(xps_cpus); -static ssize_t xps_rxqs_show(struct netdev_queue *queue, char *buf) +static ssize_t xps_rxqs_show(struct kobject *kobj, struct attribute *attr, + struct netdev_queue *queue, char *buf) { struct net_device *dev = queue->dev; unsigned int index; - int tc; + int tc, ret; index = get_netdev_queue_index(queue); - if (!rtnl_trylock()) - return restart_syscall(); + ret = sysfs_rtnl_lock(kobj, attr, dev); + if (ret) + return ret; tc = netdev_txq_to_tc(dev, index); + + /* Increase the net device refcnt to make sure it won't be freed while + * xps_queue_show is running. + */ + dev_hold(dev); rtnl_unlock(); - if (tc < 0) - return -EINVAL; - return xps_queue_show(dev, index, tc, buf, XPS_RXQS); + ret = tc >= 0 ? xps_queue_show(dev, index, tc, buf, XPS_RXQS) : -EINVAL; + dev_put(dev); + return ret; } -static ssize_t xps_rxqs_store(struct netdev_queue *queue, const char *buf, +static ssize_t xps_rxqs_store(struct kobject *kobj, struct attribute *attr, + struct netdev_queue *queue, const char *buf, size_t len) { struct net_device *dev = queue->dev; @@ -1730,9 +1870,10 @@ static ssize_t xps_rxqs_store(struct netdev_queue *queue, const char *buf, return err; } - if (!rtnl_trylock()) { + err = sysfs_rtnl_lock(kobj, attr, dev); + if (err) { bitmap_free(mask); - return restart_syscall(); + return err; } cpus_read_lock(); @@ -1792,7 +1933,6 @@ static void netdev_queue_get_ownership(const struct kobject *kobj, static const struct kobj_type netdev_queue_ktype = { .sysfs_ops = &netdev_queue_sysfs_ops, .release = netdev_queue_release, - .default_groups = netdev_queue_default_groups, .namespace = netdev_queue_namespace, .get_ownership = netdev_queue_get_ownership, }; @@ -1811,6 +1951,22 @@ static int netdev_queue_add_kobject(struct net_device *dev, int index) struct kobject *kobj = &queue->kobj; int error = 0; + /* Tx queues are cleared in netdev_queue_release to allow later + * re-registration. This is triggered when their kobj refcount is + * dropped. + * + * If a queue is removed while both a read (or write) operation and a + * the re-addition of the same queue are pending (waiting on rntl_lock) + * it might happen that the re-addition will execute before the read, + * making the initial removal to never happen (queue's kobj refcount + * won't drop enough because of the pending read). In such rare case, + * return to allow the removal operation to complete. + */ + if (unlikely(kobj->state_initialized)) { + netdev_warn_once(dev, "Cannot re-add tx queues before their removal completed"); + return -EAGAIN; + } + /* Kobject_put later will trigger netdev_queue_release call * which decreases dev refcount: Take that reference here */ @@ -1822,15 +1978,22 @@ static int netdev_queue_add_kobject(struct net_device *dev, int index) if (error) goto err; + queue->groups = netdev_queue_default_groups; + error = sysfs_create_groups(kobj, queue->groups); + if (error) + goto err; + if (netdev_uses_bql(dev)) { error = sysfs_create_group(kobj, &dql_group); if (error) - goto err; + goto err_default_groups; } kobject_uevent(kobj, KOBJ_ADD); return 0; +err_default_groups: + sysfs_remove_groups(kobj, queue->groups); err: kobject_put(kobj); return error; @@ -1885,6 +2048,7 @@ netdev_queue_update_kobjects(struct net_device *dev, int old_num, int new_num) if (netdev_uses_bql(dev)) sysfs_remove_group(&queue->kobj, &dql_group); + sysfs_remove_groups(&queue->kobj, queue->groups); kobject_put(&queue->kobj); } @@ -1984,8 +2148,10 @@ static void remove_queue_kobjects(struct net_device *dev) net_rx_queue_update_kobjects(dev, real_rx, 0); netdev_queue_update_kobjects(dev, real_tx, 0); + netdev_lock_ops(dev); dev->real_num_rx_queues = 0; dev->real_num_tx_queues = 0; + netdev_unlock_ops(dev); #ifdef CONFIG_SYSFS kset_unregister(dev->queues_kset); #endif diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 4303f2a49262..b0dfdf791ece 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -340,6 +340,8 @@ static __net_init void preinit_net(struct net *net, struct user_namespace *user_ lock_set_cmp_fn(&net->rtnl_mutex, rtnl_net_lock_cmp_fn, NULL); #endif + INIT_LIST_HEAD(&net->ptype_all); + INIT_LIST_HEAD(&net->ptype_specific); preinit_net_sysctl(net); } diff --git a/net/core/netdev-genl-gen.c b/net/core/netdev-genl-gen.c index 996ac6a449eb..739f7b6506a6 100644 --- a/net/core/netdev-genl-gen.c +++ b/net/core/netdev-genl-gen.c @@ -9,7 +9,7 @@ #include "netdev-genl-gen.h" #include <uapi/linux/netdev.h> -#include <linux/list.h> +#include <net/netdev_netlink.h> /* Integer value ranges */ static const struct netlink_range_validation netdev_a_page_pool_id_range = { @@ -217,7 +217,7 @@ struct genl_family netdev_nl_family __ro_after_init = { .n_split_ops = ARRAY_SIZE(netdev_nl_ops), .mcgrps = netdev_nl_mcgrps, .n_mcgrps = ARRAY_SIZE(netdev_nl_mcgrps), - .sock_priv_size = sizeof(struct list_head), + .sock_priv_size = sizeof(struct netdev_nl_sock), .sock_priv_init = __netdev_nl_sock_priv_init, .sock_priv_destroy = __netdev_nl_sock_priv_destroy, }; diff --git a/net/core/netdev-genl-gen.h b/net/core/netdev-genl-gen.h index e09dd7539ff2..17d39fd64c94 100644 --- a/net/core/netdev-genl-gen.h +++ b/net/core/netdev-genl-gen.h @@ -10,7 +10,7 @@ #include <net/genetlink.h> #include <uapi/linux/netdev.h> -#include <linux/list.h> +#include <net/netdev_netlink.h> /* Common nested types */ extern const struct nla_policy netdev_page_pool_info_nl_policy[NETDEV_A_PAGE_POOL_IFINDEX + 1]; @@ -42,7 +42,7 @@ enum { extern struct genl_family netdev_nl_family; -void netdev_nl_sock_priv_init(struct list_head *priv); -void netdev_nl_sock_priv_destroy(struct list_head *priv); +void netdev_nl_sock_priv_init(struct netdev_nl_sock *priv); +void netdev_nl_sock_priv_destroy(struct netdev_nl_sock *priv); #endif /* _LINUX_NETDEV_GEN_H */ diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c index 715f85c6b62e..fd1cfa9707dc 100644 --- a/net/core/netdev-genl.c +++ b/net/core/netdev-genl.c @@ -10,6 +10,7 @@ #include <net/sock.h> #include <net/xdp.h> #include <net/xdp_sock.h> +#include <net/page_pool/memory_provider.h> #include "dev.h" #include "devmem.h" @@ -52,6 +53,8 @@ XDP_METADATA_KFUNC_xxx xsk_features |= NETDEV_XSK_FLAGS_TX_TIMESTAMP; if (netdev->xsk_tx_metadata_ops->tmo_request_checksum) xsk_features |= NETDEV_XSK_FLAGS_TX_CHECKSUM; + if (netdev->xsk_tx_metadata_ops->tmo_request_launch_time) + xsk_features |= NETDEV_XSK_FLAGS_TX_LAUNCH_TIME_FIFO; } if (nla_put_u32(rsp, NETDEV_A_DEV_IFINDEX, netdev->ifindex) || @@ -266,7 +269,7 @@ netdev_nl_napi_dump_one(struct net_device *netdev, struct sk_buff *rsp, prev_id = UINT_MAX; list_for_each_entry(napi, &netdev->napi_list, dev_list) { - if (napi->napi_id < MIN_NAPI_ID) + if (!napi_id_valid(napi->napi_id)) continue; /* Dump continuation below depends on the list being sorted */ @@ -364,11 +367,18 @@ int netdev_nl_napi_set_doit(struct sk_buff *skb, struct genl_info *info) return err; } +static int nla_put_napi_id(struct sk_buff *skb, const struct napi_struct *napi) +{ + if (napi && napi_id_valid(napi->napi_id)) + return nla_put_u32(skb, NETDEV_A_QUEUE_NAPI_ID, napi->napi_id); + return 0; +} + static int netdev_nl_queue_fill_one(struct sk_buff *rsp, struct net_device *netdev, u32 q_idx, u32 q_type, const struct genl_info *info) { - struct net_devmem_dmabuf_binding *binding; + struct pp_memory_provider_params *params; struct netdev_rx_queue *rxq; struct netdev_queue *txq; void *hdr; @@ -385,21 +395,30 @@ netdev_nl_queue_fill_one(struct sk_buff *rsp, struct net_device *netdev, switch (q_type) { case NETDEV_QUEUE_TYPE_RX: rxq = __netif_get_rx_queue(netdev, q_idx); - if (rxq->napi && nla_put_u32(rsp, NETDEV_A_QUEUE_NAPI_ID, - rxq->napi->napi_id)) + if (nla_put_napi_id(rsp, rxq->napi)) goto nla_put_failure; - binding = rxq->mp_params.mp_priv; - if (binding && - nla_put_u32(rsp, NETDEV_A_QUEUE_DMABUF, binding->id)) + params = &rxq->mp_params; + if (params->mp_ops && + params->mp_ops->nl_fill(params->mp_priv, rsp, rxq)) goto nla_put_failure; +#ifdef CONFIG_XDP_SOCKETS + if (rxq->pool) + if (nla_put_empty_nest(rsp, NETDEV_A_QUEUE_XSK)) + goto nla_put_failure; +#endif break; case NETDEV_QUEUE_TYPE_TX: txq = netdev_get_tx_queue(netdev, q_idx); - if (txq->napi && nla_put_u32(rsp, NETDEV_A_QUEUE_NAPI_ID, - txq->napi->napi_id)) + if (nla_put_napi_id(rsp, txq->napi)) goto nla_put_failure; +#ifdef CONFIG_XDP_SOCKETS + if (txq->pool) + if (nla_put_empty_nest(rsp, NETDEV_A_QUEUE_XSK)) + goto nla_put_failure; +#endif + break; } genlmsg_end(rsp, hdr); @@ -576,6 +595,7 @@ netdev_nl_stats_write_rx(struct sk_buff *rsp, struct netdev_queue_stats_rx *rx) netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_ALLOC_FAIL, rx->alloc_fail) || netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_HW_DROPS, rx->hw_drops) || netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_HW_DROP_OVERRUNS, rx->hw_drop_overruns) || + netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_CSUM_COMPLETE, rx->csum_complete) || netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_CSUM_UNNECESSARY, rx->csum_unnecessary) || netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_CSUM_NONE, rx->csum_none) || netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_CSUM_BAD, rx->csum_bad) || @@ -809,8 +829,8 @@ int netdev_nl_bind_rx_doit(struct sk_buff *skb, struct genl_info *info) { struct nlattr *tb[ARRAY_SIZE(netdev_queue_id_nl_policy)]; struct net_devmem_dmabuf_binding *binding; - struct list_head *sock_binding_list; u32 ifindex, dmabuf_fd, rxq_idx; + struct netdev_nl_sock *priv; struct net_device *netdev; struct sk_buff *rsp; struct nlattr *attr; @@ -825,10 +845,9 @@ int netdev_nl_bind_rx_doit(struct sk_buff *skb, struct genl_info *info) ifindex = nla_get_u32(info->attrs[NETDEV_A_DEV_IFINDEX]); dmabuf_fd = nla_get_u32(info->attrs[NETDEV_A_DMABUF_FD]); - sock_binding_list = genl_sk_priv_get(&netdev_nl_family, - NETLINK_CB(skb).sk); - if (IS_ERR(sock_binding_list)) - return PTR_ERR(sock_binding_list); + priv = genl_sk_priv_get(&netdev_nl_family, NETLINK_CB(skb).sk); + if (IS_ERR(priv)) + return PTR_ERR(priv); rsp = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!rsp) @@ -840,11 +859,18 @@ int netdev_nl_bind_rx_doit(struct sk_buff *skb, struct genl_info *info) goto err_genlmsg_free; } - rtnl_lock(); + mutex_lock(&priv->lock); - netdev = __dev_get_by_index(genl_info_net(info), ifindex); + netdev = netdev_get_by_index_lock(genl_info_net(info), ifindex); if (!netdev || !netif_device_present(netdev)) { err = -ENODEV; + goto err_unlock_sock; + } + + if (!netdev_need_ops_lock(netdev)) { + err = -EOPNOTSUPP; + NL_SET_BAD_ATTR(info->extack, + info->attrs[NETDEV_A_DEV_IFINDEX]); goto err_unlock; } @@ -889,7 +915,7 @@ int netdev_nl_bind_rx_doit(struct sk_buff *skb, struct genl_info *info) goto err_unbind; } - list_add(&binding->list, sock_binding_list); + list_add(&binding->list, &priv->bindings); nla_put_u32(rsp, NETDEV_A_DMABUF_ID, binding->id); genlmsg_end(rsp, hdr); @@ -898,34 +924,41 @@ int netdev_nl_bind_rx_doit(struct sk_buff *skb, struct genl_info *info) if (err) goto err_unbind; - rtnl_unlock(); + netdev_unlock(netdev); + + mutex_unlock(&priv->lock); return 0; err_unbind: net_devmem_unbind_dmabuf(binding); err_unlock: - rtnl_unlock(); + netdev_unlock(netdev); +err_unlock_sock: + mutex_unlock(&priv->lock); err_genlmsg_free: nlmsg_free(rsp); return err; } -void netdev_nl_sock_priv_init(struct list_head *priv) +void netdev_nl_sock_priv_init(struct netdev_nl_sock *priv) { - INIT_LIST_HEAD(priv); + INIT_LIST_HEAD(&priv->bindings); + mutex_init(&priv->lock); } -void netdev_nl_sock_priv_destroy(struct list_head *priv) +void netdev_nl_sock_priv_destroy(struct netdev_nl_sock *priv) { struct net_devmem_dmabuf_binding *binding; struct net_devmem_dmabuf_binding *temp; - list_for_each_entry_safe(binding, temp, priv, list) { - rtnl_lock(); + mutex_lock(&priv->lock); + list_for_each_entry_safe(binding, temp, &priv->bindings, list) { + netdev_lock(binding->dev); net_devmem_unbind_dmabuf(binding); - rtnl_unlock(); + netdev_unlock(binding->dev); } + mutex_unlock(&priv->lock); } static int netdev_genl_netdevice_event(struct notifier_block *nb, diff --git a/net/core/netdev_rx_queue.c b/net/core/netdev_rx_queue.c index db82786fa0c4..3af716f77a13 100644 --- a/net/core/netdev_rx_queue.c +++ b/net/core/netdev_rx_queue.c @@ -1,36 +1,37 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include <linux/netdevice.h> +#include <net/netdev_lock.h> #include <net/netdev_queues.h> #include <net/netdev_rx_queue.h> +#include <net/page_pool/memory_provider.h> #include "page_pool_priv.h" int netdev_rx_queue_restart(struct net_device *dev, unsigned int rxq_idx) { struct netdev_rx_queue *rxq = __netif_get_rx_queue(dev, rxq_idx); + const struct netdev_queue_mgmt_ops *qops = dev->queue_mgmt_ops; void *new_mem, *old_mem; int err; - if (!dev->queue_mgmt_ops || !dev->queue_mgmt_ops->ndo_queue_stop || - !dev->queue_mgmt_ops->ndo_queue_mem_free || - !dev->queue_mgmt_ops->ndo_queue_mem_alloc || - !dev->queue_mgmt_ops->ndo_queue_start) + if (!qops || !qops->ndo_queue_stop || !qops->ndo_queue_mem_free || + !qops->ndo_queue_mem_alloc || !qops->ndo_queue_start) return -EOPNOTSUPP; - ASSERT_RTNL(); + netdev_assert_locked(dev); - new_mem = kvzalloc(dev->queue_mgmt_ops->ndo_queue_mem_size, GFP_KERNEL); + new_mem = kvzalloc(qops->ndo_queue_mem_size, GFP_KERNEL); if (!new_mem) return -ENOMEM; - old_mem = kvzalloc(dev->queue_mgmt_ops->ndo_queue_mem_size, GFP_KERNEL); + old_mem = kvzalloc(qops->ndo_queue_mem_size, GFP_KERNEL); if (!old_mem) { err = -ENOMEM; goto err_free_new_mem; } - err = dev->queue_mgmt_ops->ndo_queue_mem_alloc(dev, new_mem, rxq_idx); + err = qops->ndo_queue_mem_alloc(dev, new_mem, rxq_idx); if (err) goto err_free_old_mem; @@ -38,15 +39,19 @@ int netdev_rx_queue_restart(struct net_device *dev, unsigned int rxq_idx) if (err) goto err_free_new_queue_mem; - err = dev->queue_mgmt_ops->ndo_queue_stop(dev, old_mem, rxq_idx); - if (err) - goto err_free_new_queue_mem; + if (netif_running(dev)) { + err = qops->ndo_queue_stop(dev, old_mem, rxq_idx); + if (err) + goto err_free_new_queue_mem; - err = dev->queue_mgmt_ops->ndo_queue_start(dev, new_mem, rxq_idx); - if (err) - goto err_start_queue; + err = qops->ndo_queue_start(dev, new_mem, rxq_idx); + if (err) + goto err_start_queue; + } else { + swap(new_mem, old_mem); + } - dev->queue_mgmt_ops->ndo_queue_mem_free(dev, old_mem); + qops->ndo_queue_mem_free(dev, old_mem); kvfree(old_mem); kvfree(new_mem); @@ -61,15 +66,15 @@ err_start_queue: * WARN if we fail to recover the old rx queue, and at least free * old_mem so we don't also leak that. */ - if (dev->queue_mgmt_ops->ndo_queue_start(dev, old_mem, rxq_idx)) { + if (qops->ndo_queue_start(dev, old_mem, rxq_idx)) { WARN(1, "Failed to restart old queue in error path. RX queue %d may be unhealthy.", rxq_idx); - dev->queue_mgmt_ops->ndo_queue_mem_free(dev, old_mem); + qops->ndo_queue_mem_free(dev, old_mem); } err_free_new_queue_mem: - dev->queue_mgmt_ops->ndo_queue_mem_free(dev, new_mem); + qops->ndo_queue_mem_free(dev, new_mem); err_free_old_mem: kvfree(old_mem); @@ -80,3 +85,74 @@ err_free_new_mem: return err; } EXPORT_SYMBOL_NS_GPL(netdev_rx_queue_restart, "NETDEV_INTERNAL"); + +static int __net_mp_open_rxq(struct net_device *dev, unsigned ifq_idx, + struct pp_memory_provider_params *p) +{ + struct netdev_rx_queue *rxq; + int ret; + + if (!netdev_need_ops_lock(dev)) + return -EOPNOTSUPP; + + if (ifq_idx >= dev->real_num_rx_queues) + return -EINVAL; + ifq_idx = array_index_nospec(ifq_idx, dev->real_num_rx_queues); + + rxq = __netif_get_rx_queue(dev, ifq_idx); + if (rxq->mp_params.mp_ops) + return -EEXIST; + + rxq->mp_params = *p; + ret = netdev_rx_queue_restart(dev, ifq_idx); + if (ret) { + rxq->mp_params.mp_ops = NULL; + rxq->mp_params.mp_priv = NULL; + } + return ret; +} + +int net_mp_open_rxq(struct net_device *dev, unsigned ifq_idx, + struct pp_memory_provider_params *p) +{ + int ret; + + netdev_lock(dev); + ret = __net_mp_open_rxq(dev, ifq_idx, p); + netdev_unlock(dev); + return ret; +} + +static void __net_mp_close_rxq(struct net_device *dev, unsigned ifq_idx, + struct pp_memory_provider_params *old_p) +{ + struct netdev_rx_queue *rxq; + + if (WARN_ON_ONCE(ifq_idx >= dev->real_num_rx_queues)) + return; + + rxq = __netif_get_rx_queue(dev, ifq_idx); + + /* Callers holding a netdev ref may get here after we already + * went thru shutdown via dev_memory_provider_uninstall(). + */ + if (dev->reg_state > NETREG_REGISTERED && + !rxq->mp_params.mp_ops) + return; + + if (WARN_ON_ONCE(rxq->mp_params.mp_ops != old_p->mp_ops || + rxq->mp_params.mp_priv != old_p->mp_priv)) + return; + + rxq->mp_params.mp_ops = NULL; + rxq->mp_params.mp_priv = NULL; + WARN_ON(netdev_rx_queue_restart(dev, ifq_idx)); +} + +void net_mp_close_rxq(struct net_device *dev, unsigned ifq_idx, + struct pp_memory_provider_params *old_p) +{ + netdev_lock(dev); + __net_mp_close_rxq(dev, ifq_idx, old_p); + netdev_unlock(dev); +} diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 0ab722d95a2d..4ddb7490df4b 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -284,12 +284,13 @@ static struct sk_buff *find_skb(struct netpoll *np, int len, int reserve) struct sk_buff *skb; zap_completion_queue(); - refill_skbs(np); repeat: skb = alloc_skb(len, GFP_ATOMIC); - if (!skb) + if (!skb) { skb = skb_dequeue(&np->skb_pool); + schedule_work(&np->refill_wq); + } if (!skb) { if (++count < 10) { @@ -432,7 +433,6 @@ int netpoll_send_udp(struct netpoll *np, const char *msg, int len) udph->len = htons(udp_len); if (np->ipv6) { - udph->check = 0; udph->check = csum_ipv6_magic(&np->local_ip.in6, &np->remote_ip.in6, udp_len, IPPROTO_UDP, @@ -506,7 +506,8 @@ void netpoll_print_options(struct netpoll *np) np_info(np, "local IPv6 address %pI6c\n", &np->local_ip.in6); else np_info(np, "local IPv4 address %pI4\n", &np->local_ip.ip); - np_info(np, "interface '%s'\n", np->dev_name); + np_info(np, "interface name '%s'\n", np->dev_name); + np_info(np, "local ethernet address '%pM'\n", np->dev_mac); np_info(np, "remote port %d\n", np->remote_port); if (np->ipv6) np_info(np, "remote IPv6 address %pI6c\n", &np->remote_ip.in6); @@ -540,6 +541,7 @@ static void skb_pool_flush(struct netpoll *np) { struct sk_buff_head *skb_pool; + cancel_work_sync(&np->refill_wq); skb_pool = &np->skb_pool; skb_queue_purge_reason(skb_pool, SKB_CONSUMED); } @@ -575,11 +577,18 @@ int netpoll_parse_options(struct netpoll *np, char *opt) cur++; if (*cur != ',') { - /* parse out dev name */ + /* parse out dev_name or dev_mac */ if ((delim = strchr(cur, ',')) == NULL) goto parse_failed; *delim = 0; - strscpy(np->dev_name, cur, sizeof(np->dev_name)); + + np->dev_name[0] = '\0'; + eth_broadcast_addr(np->dev_mac); + if (!strchr(cur, ':')) + strscpy(np->dev_name, cur, sizeof(np->dev_name)); + else if (!mac_pton(cur, np->dev_mac)) + goto parse_failed; + cur = delim; } cur++; @@ -626,6 +635,14 @@ int netpoll_parse_options(struct netpoll *np, char *opt) } EXPORT_SYMBOL(netpoll_parse_options); +static void refill_skbs_work_handler(struct work_struct *work) +{ + struct netpoll *np = + container_of(work, struct netpoll, refill_wq); + + refill_skbs(np); +} + int __netpoll_setup(struct netpoll *np, struct net_device *ndev) { struct netpoll_info *npinfo; @@ -671,6 +688,7 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev) /* fill up the skb queue */ refill_skbs(np); + INIT_WORK(&np->refill_wq, refill_skbs_work_handler); /* last thing to do is link it to the net device structure */ rcu_assign_pointer(ndev->npinfo, npinfo); @@ -684,27 +702,45 @@ out: } EXPORT_SYMBOL_GPL(__netpoll_setup); +/* + * Returns a pointer to a string representation of the identifier used + * to select the egress interface for the given netpoll instance. buf + * must be a buffer of length at least MAC_ADDR_STR_LEN + 1. + */ +static char *egress_dev(struct netpoll *np, char *buf) +{ + if (np->dev_name[0]) + return np->dev_name; + + snprintf(buf, MAC_ADDR_STR_LEN, "%pM", np->dev_mac); + return buf; +} + int netpoll_setup(struct netpoll *np) { + struct net *net = current->nsproxy->net_ns; + char buf[MAC_ADDR_STR_LEN + 1]; struct net_device *ndev = NULL; bool ip_overwritten = false; struct in_device *in_dev; int err; rtnl_lock(); - if (np->dev_name[0]) { - struct net *net = current->nsproxy->net_ns; + if (np->dev_name[0]) ndev = __dev_get_by_name(net, np->dev_name); - } + else if (is_valid_ether_addr(np->dev_mac)) + ndev = dev_getbyhwaddr(net, ARPHRD_ETHER, np->dev_mac); + if (!ndev) { - np_err(np, "%s doesn't exist, aborting\n", np->dev_name); + np_err(np, "%s doesn't exist, aborting\n", egress_dev(np, buf)); err = -ENODEV; goto unlock; } netdev_hold(ndev, &np->dev_tracker, GFP_KERNEL); if (netdev_master_upper_dev_get(ndev)) { - np_err(np, "%s is a slave device, aborting\n", np->dev_name); + np_err(np, "%s is a slave device, aborting\n", + egress_dev(np, buf)); err = -EBUSY; goto put; } @@ -712,7 +748,8 @@ int netpoll_setup(struct netpoll *np) if (!netif_running(ndev)) { unsigned long atmost; - np_info(np, "device %s not up yet, forcing it\n", np->dev_name); + np_info(np, "device %s not up yet, forcing it\n", + egress_dev(np, buf)); err = dev_open(ndev, NULL); @@ -746,7 +783,7 @@ int netpoll_setup(struct netpoll *np) if (!ifa) { put_noaddr: np_err(np, "no IP address for %s, aborting\n", - np->dev_name); + egress_dev(np, buf)); err = -EDESTADDRREQ; goto put; } @@ -777,13 +814,13 @@ put_noaddr: } if (err) { np_err(np, "no IPv6 address for %s, aborting\n", - np->dev_name); + egress_dev(np, buf)); goto put; } else np_info(np, "local IPv6 %pI6c\n", &np->local_ip.in6); #else np_err(np, "IPv6 is not supported %s, aborting\n", - np->dev_name); + egress_dev(np, buf)); err = -EINVAL; goto put; #endif diff --git a/net/core/page_pool.c b/net/core/page_pool.c index f5e908c9e7ad..7745ad924ae2 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -11,8 +11,10 @@ #include <linux/slab.h> #include <linux/device.h> +#include <net/netdev_lock.h> #include <net/netdev_rx_queue.h> #include <net/page_pool/helpers.h> +#include <net/page_pool/memory_provider.h> #include <net/xdp.h> #include <linux/dma-direction.h> @@ -25,6 +27,7 @@ #include <trace/events/page_pool.h> +#include "dev.h" #include "mp_dmabuf_devmem.h" #include "netmem_priv.h" #include "page_pool_priv.h" @@ -277,21 +280,23 @@ static int page_pool_init(struct page_pool *pool, get_device(pool->p.dev); if (pool->slow.flags & PP_FLAG_ALLOW_UNREADABLE_NETMEM) { - /* We rely on rtnl_lock()ing to make sure netdev_rx_queue - * configuration doesn't change while we're initializing - * the page_pool. - */ - ASSERT_RTNL(); + netdev_assert_locked(pool->slow.netdev); rxq = __netif_get_rx_queue(pool->slow.netdev, pool->slow.queue_idx); pool->mp_priv = rxq->mp_params.mp_priv; + pool->mp_ops = rxq->mp_params.mp_ops; } - if (pool->mp_priv) { + if (pool->mp_ops) { if (!pool->dma_map || !pool->dma_sync) return -EOPNOTSUPP; - err = mp_dmabuf_devmem_init(pool); + if (WARN_ON(!is_kernel_rodata((unsigned long)pool->mp_ops))) { + err = -EFAULT; + goto free_ptr_ring; + } + + err = pool->mp_ops->init(pool); if (err) { pr_warn("%s() mem-provider init failed %d\n", __func__, err); @@ -587,8 +592,8 @@ netmem_ref page_pool_alloc_netmems(struct page_pool *pool, gfp_t gfp) return netmem; /* Slow-path: cache empty, do real allocation */ - if (static_branch_unlikely(&page_pool_mem_providers) && pool->mp_priv) - netmem = mp_dmabuf_devmem_alloc_netmems(pool, gfp); + if (static_branch_unlikely(&page_pool_mem_providers) && pool->mp_ops) + netmem = pool->mp_ops->alloc_netmems(pool, gfp); else netmem = __page_pool_alloc_pages_slow(pool, gfp); return netmem; @@ -679,8 +684,8 @@ void page_pool_return_page(struct page_pool *pool, netmem_ref netmem) bool put; put = true; - if (static_branch_unlikely(&page_pool_mem_providers) && pool->mp_priv) - put = mp_dmabuf_devmem_release_page(pool, netmem); + if (static_branch_unlikely(&page_pool_mem_providers) && pool->mp_ops) + put = pool->mp_ops->release_netmem(pool, netmem); else __page_pool_release_page_dma(pool, netmem); @@ -1048,8 +1053,8 @@ static void __page_pool_destroy(struct page_pool *pool) page_pool_unlist(pool); page_pool_uninit(pool); - if (pool->mp_priv) { - mp_dmabuf_devmem_destroy(pool); + if (pool->mp_ops) { + pool->mp_ops->destroy(pool); static_branch_dec(&page_pool_mem_providers); } @@ -1104,7 +1109,13 @@ static void page_pool_release_retry(struct work_struct *wq) int inflight; inflight = page_pool_release(pool); - if (!inflight) + /* In rare cases, a driver bug may cause inflight to go negative. + * Don't reschedule release if inflight is 0 or negative. + * - If 0, the page_pool has been destroyed + * - if negative, we will never recover + * in both cases no reschedule is necessary. + */ + if (inflight <= 0) return; /* Periodic warning for page pools the user can't see */ @@ -1140,11 +1151,7 @@ void page_pool_disable_direct_recycling(struct page_pool *pool) if (!pool->p.napi) return; - /* To avoid races with recycling and additional barriers make sure - * pool and NAPI are unlinked when NAPI is disabled. - */ - WARN_ON(!test_bit(NAPI_STATE_SCHED, &pool->p.napi->state)); - WARN_ON(READ_ONCE(pool->p.napi->list_owner) != -1); + napi_assert_will_not_race(pool->p.napi); mutex_lock(&page_pools_lock); WRITE_ONCE(pool->p.napi, NULL); @@ -1190,3 +1197,31 @@ void page_pool_update_nid(struct page_pool *pool, int new_nid) } } EXPORT_SYMBOL(page_pool_update_nid); + +bool net_mp_niov_set_dma_addr(struct net_iov *niov, dma_addr_t addr) +{ + return page_pool_set_dma_addr_netmem(net_iov_to_netmem(niov), addr); +} + +/* Associate a niov with a page pool. Should follow with a matching + * net_mp_niov_clear_page_pool() + */ +void net_mp_niov_set_page_pool(struct page_pool *pool, struct net_iov *niov) +{ + netmem_ref netmem = net_iov_to_netmem(niov); + + page_pool_set_pp_info(pool, netmem); + + pool->pages_state_hold_cnt++; + trace_page_pool_state_hold(pool, netmem, pool->pages_state_hold_cnt); +} + +/* Disassociate a niov from a page pool. Should only be used in the + * ->release_netmem() path. + */ +void net_mp_niov_clear_page_pool(struct net_iov *niov) +{ + netmem_ref netmem = net_iov_to_netmem(niov); + + page_pool_clear_pp_info(netmem); +} diff --git a/net/core/page_pool_user.c b/net/core/page_pool_user.c index 6677e0c2e256..c82a95beceff 100644 --- a/net/core/page_pool_user.c +++ b/net/core/page_pool_user.c @@ -8,9 +8,9 @@ #include <net/netdev_rx_queue.h> #include <net/page_pool/helpers.h> #include <net/page_pool/types.h> +#include <net/page_pool/memory_provider.h> #include <net/sock.h> -#include "devmem.h" #include "page_pool_priv.h" #include "netdev-genl-gen.h" @@ -216,7 +216,6 @@ static int page_pool_nl_fill(struct sk_buff *rsp, const struct page_pool *pool, const struct genl_info *info) { - struct net_devmem_dmabuf_binding *binding = pool->mp_priv; size_t inflight, refsz; unsigned int napi_id; void *hdr; @@ -234,7 +233,7 @@ page_pool_nl_fill(struct sk_buff *rsp, const struct page_pool *pool, goto err_cancel; napi_id = pool->p.napi ? READ_ONCE(pool->p.napi->napi_id) : 0; - if (napi_id >= MIN_NAPI_ID && + if (napi_id_valid(napi_id) && nla_put_uint(rsp, NETDEV_A_PAGE_POOL_NAPI_ID, napi_id)) goto err_cancel; @@ -249,7 +248,7 @@ page_pool_nl_fill(struct sk_buff *rsp, const struct page_pool *pool, pool->user.detach_time)) goto err_cancel; - if (binding && nla_put_u32(rsp, NETDEV_A_PAGE_POOL_DMABUF, binding->id)) + if (pool->mp_ops && pool->mp_ops->nl_fill(pool->mp_priv, rsp, NULL)) goto err_cancel; genlmsg_end(rsp, hdr); @@ -356,7 +355,7 @@ void page_pool_unlist(struct page_pool *pool) int page_pool_check_memory_provider(struct net_device *dev, struct netdev_rx_queue *rxq) { - struct net_devmem_dmabuf_binding *binding = rxq->mp_params.mp_priv; + void *binding = rxq->mp_params.mp_priv; struct page_pool *pool; struct hlist_node *n; diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 82b6a2c3c141..fe7fdefab994 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -158,9 +158,7 @@ #include <net/udp.h> #include <net/ip6_checksum.h> #include <net/addrconf.h> -#ifdef CONFIG_XFRM #include <net/xfrm.h> -#endif #include <net/netns/generic.h> #include <asm/byteorder.h> #include <linux/rcupdate.h> @@ -517,21 +515,23 @@ static ssize_t pgctrl_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { char data[128]; + size_t max; struct pktgen_net *pn = net_generic(current->nsproxy->net_ns, pg_net_id); if (!capable(CAP_NET_ADMIN)) return -EPERM; - if (count == 0) + if (count < 1) return -EINVAL; - if (count > sizeof(data)) - count = sizeof(data); - - if (copy_from_user(data, buf, count)) + max = min(count, sizeof(data) - 1); + if (copy_from_user(data, buf, max)) return -EFAULT; - data[count - 1] = 0; /* Strip trailing '\n' and terminate string */ + if (data[max - 1] == '\n') + data[max - 1] = 0; /* strip trailing '\n', terminate string */ + else + data[max] = 0; /* terminate string */ if (!strcmp(data, "stop")) pktgen_stop_all_threads(pn); @@ -744,31 +744,32 @@ static int pktgen_if_show(struct seq_file *seq, void *v) } -static int hex32_arg(const char __user *user_buffer, unsigned long maxlen, - __u32 *num) +static ssize_t hex32_arg(const char __user *user_buffer, size_t maxlen, + __u32 *num) { - int i = 0; + size_t i = 0; + *num = 0; for (; i < maxlen; i++) { int value; char c; - *num <<= 4; if (get_user(c, &user_buffer[i])) return -EFAULT; value = hex_to_bin(c); - if (value >= 0) + if (value >= 0) { + *num <<= 4; *num |= value; - else + } else { break; + } } return i; } -static int count_trail_chars(const char __user * user_buffer, - unsigned int maxlen) +static ssize_t count_trail_chars(const char __user *user_buffer, size_t maxlen) { - int i; + size_t i; for (i = 0; i < maxlen; i++) { char c; @@ -790,10 +791,10 @@ done: return i; } -static long num_arg(const char __user *user_buffer, unsigned long maxlen, - unsigned long *num) +static ssize_t num_arg(const char __user *user_buffer, size_t maxlen, + unsigned long *num) { - int i; + size_t i; *num = 0; for (i = 0; i < maxlen; i++) { @@ -809,9 +810,9 @@ static long num_arg(const char __user *user_buffer, unsigned long maxlen, return i; } -static int strn_len(const char __user * user_buffer, unsigned int maxlen) +static ssize_t strn_len(const char __user *user_buffer, size_t maxlen) { - int i; + size_t i; for (i = 0; i < maxlen; i++) { char c; @@ -823,6 +824,7 @@ static int strn_len(const char __user * user_buffer, unsigned int maxlen) case '\r': case '\t': case ' ': + case '=': goto done_str; default: break; @@ -838,11 +840,11 @@ done_str: * "size1,weight_1 size2,weight_2 ... size_n,weight_n" for example. */ static ssize_t get_imix_entries(const char __user *buffer, + size_t maxlen, struct pktgen_dev *pkt_dev) { - const int max_digits = 10; - int i = 0; - long len; + size_t i = 0, max; + ssize_t len; char c; pkt_dev->n_imix_entries = 0; @@ -854,21 +856,30 @@ static ssize_t get_imix_entries(const char __user *buffer, if (pkt_dev->n_imix_entries >= MAX_IMIX_ENTRIES) return -E2BIG; - len = num_arg(&buffer[i], max_digits, &size); + if (i >= maxlen) + return -EINVAL; + + max = min(10, maxlen - i); + len = num_arg(&buffer[i], max, &size); if (len < 0) return len; i += len; + if (i >= maxlen) + return -EINVAL; if (get_user(c, &buffer[i])) return -EFAULT; /* Check for comma between size_i and weight_i */ if (c != ',') return -EINVAL; i++; + if (i >= maxlen) + return -EINVAL; if (size < 14 + 20 + 8) size = 14 + 20 + 8; - len = num_arg(&buffer[i], max_digits, &weight); + max = min(10, maxlen - i); + len = num_arg(&buffer[i], max, &weight); if (len < 0) return len; if (weight <= 0) @@ -878,39 +889,55 @@ static ssize_t get_imix_entries(const char __user *buffer, pkt_dev->imix_entries[pkt_dev->n_imix_entries].weight = weight; i += len; + pkt_dev->n_imix_entries++; + + if (i >= maxlen) + break; if (get_user(c, &buffer[i])) return -EFAULT; - i++; - pkt_dev->n_imix_entries++; } while (c == ' '); return i; } -static ssize_t get_labels(const char __user *buffer, struct pktgen_dev *pkt_dev) +static ssize_t get_labels(const char __user *buffer, + size_t maxlen, struct pktgen_dev *pkt_dev) { unsigned int n = 0; + size_t i = 0, max; + ssize_t len; char c; - ssize_t i = 0; - int len; pkt_dev->nr_labels = 0; do { __u32 tmp; - len = hex32_arg(&buffer[i], 8, &tmp); - if (len <= 0) + + if (n >= MAX_MPLS_LABELS) + return -E2BIG; + + if (i >= maxlen) + return -EINVAL; + + max = min(8, maxlen - i); + len = hex32_arg(&buffer[i], max, &tmp); + if (len < 0) return len; + + /* return empty list in case of invalid input or zero value */ + if (len == 0 || tmp == 0) + return maxlen; + pkt_dev->labels[n] = htonl(tmp); if (pkt_dev->labels[n] & MPLS_STACK_BOTTOM) pkt_dev->flags |= F_MPLS_RND; i += len; + n++; + if (i >= maxlen) + break; if (get_user(c, &buffer[i])) return -EFAULT; i++; - n++; - if (n >= MAX_MPLS_LABELS) - return -E2BIG; } while (c == ','); pkt_dev->nr_labels = n; @@ -952,11 +979,11 @@ static ssize_t pktgen_if_write(struct file *file, { struct seq_file *seq = file->private_data; struct pktgen_dev *pkt_dev = seq->private; - int i, max, len; + size_t i, max; + ssize_t len; char name[16], valstr[32]; unsigned long value = 0; char *pg_result = NULL; - int tmp = 0; char buf[128]; pg_result = &(pkt_dev->result[0]); @@ -967,16 +994,16 @@ static ssize_t pktgen_if_write(struct file *file, } max = count; - tmp = count_trail_chars(user_buffer, max); - if (tmp < 0) { + len = count_trail_chars(user_buffer, max); + if (len < 0) { pr_warn("illegal format\n"); - return tmp; + return len; } - i = tmp; + i = len; /* Read variable name */ - - len = strn_len(&user_buffer[i], sizeof(name) - 1); + max = min(sizeof(name) - 1, count - i); + len = strn_len(&user_buffer[i], max); if (len < 0) return len; @@ -1004,11 +1031,11 @@ static ssize_t pktgen_if_write(struct file *file, } if (!strcmp(name, "min_pkt_size")) { - len = num_arg(&user_buffer[i], 10, &value); + max = min(10, count - i); + len = num_arg(&user_buffer[i], max, &value); if (len < 0) return len; - i += len; if (value < 14 + 20 + 8) value = 14 + 20 + 8; if (value != pkt_dev->min_pkt_size) { @@ -1021,11 +1048,11 @@ static ssize_t pktgen_if_write(struct file *file, } if (!strcmp(name, "max_pkt_size")) { - len = num_arg(&user_buffer[i], 10, &value); + max = min(10, count - i); + len = num_arg(&user_buffer[i], max, &value); if (len < 0) return len; - i += len; if (value < 14 + 20 + 8) value = 14 + 20 + 8; if (value != pkt_dev->max_pkt_size) { @@ -1040,11 +1067,11 @@ static ssize_t pktgen_if_write(struct file *file, /* Shortcut for min = max */ if (!strcmp(name, "pkt_size")) { - len = num_arg(&user_buffer[i], 10, &value); + max = min(10, count - i); + len = num_arg(&user_buffer[i], max, &value); if (len < 0) return len; - i += len; if (value < 14 + 20 + 8) value = 14 + 20 + 8; if (value != pkt_dev->min_pkt_size) { @@ -1060,43 +1087,43 @@ static ssize_t pktgen_if_write(struct file *file, if (pkt_dev->clone_skb > 0) return -EINVAL; - len = get_imix_entries(&user_buffer[i], pkt_dev); + max = count - i; + len = get_imix_entries(&user_buffer[i], max, pkt_dev); if (len < 0) return len; fill_imix_distribution(pkt_dev); - i += len; return count; } if (!strcmp(name, "debug")) { - len = num_arg(&user_buffer[i], 10, &value); + max = min(10, count - i); + len = num_arg(&user_buffer[i], max, &value); if (len < 0) return len; - i += len; debug = value; sprintf(pg_result, "OK: debug=%u", debug); return count; } if (!strcmp(name, "frags")) { - len = num_arg(&user_buffer[i], 10, &value); + max = min(10, count - i); + len = num_arg(&user_buffer[i], max, &value); if (len < 0) return len; - i += len; pkt_dev->nfrags = value; sprintf(pg_result, "OK: frags=%d", pkt_dev->nfrags); return count; } if (!strcmp(name, "delay")) { - len = num_arg(&user_buffer[i], 10, &value); + max = min(10, count - i); + len = num_arg(&user_buffer[i], max, &value); if (len < 0) return len; - i += len; if (value == 0x7FFFFFFF) pkt_dev->delay = ULLONG_MAX; else @@ -1107,13 +1134,13 @@ static ssize_t pktgen_if_write(struct file *file, return count; } if (!strcmp(name, "rate")) { - len = num_arg(&user_buffer[i], 10, &value); + max = min(10, count - i); + len = num_arg(&user_buffer[i], max, &value); if (len < 0) return len; - i += len; if (!value) - return len; + return -EINVAL; pkt_dev->delay = pkt_dev->min_pkt_size*8*NSEC_PER_USEC/value; if (debug) pr_info("Delay set at: %llu ns\n", pkt_dev->delay); @@ -1122,13 +1149,13 @@ static ssize_t pktgen_if_write(struct file *file, return count; } if (!strcmp(name, "ratep")) { - len = num_arg(&user_buffer[i], 10, &value); + max = min(10, count - i); + len = num_arg(&user_buffer[i], max, &value); if (len < 0) return len; - i += len; if (!value) - return len; + return -EINVAL; pkt_dev->delay = NSEC_PER_SEC/value; if (debug) pr_info("Delay set at: %llu ns\n", pkt_dev->delay); @@ -1137,11 +1164,11 @@ static ssize_t pktgen_if_write(struct file *file, return count; } if (!strcmp(name, "udp_src_min")) { - len = num_arg(&user_buffer[i], 10, &value); + max = min(10, count - i); + len = num_arg(&user_buffer[i], max, &value); if (len < 0) return len; - i += len; if (value != pkt_dev->udp_src_min) { pkt_dev->udp_src_min = value; pkt_dev->cur_udp_src = value; @@ -1150,11 +1177,11 @@ static ssize_t pktgen_if_write(struct file *file, return count; } if (!strcmp(name, "udp_dst_min")) { - len = num_arg(&user_buffer[i], 10, &value); + max = min(10, count - i); + len = num_arg(&user_buffer[i], max, &value); if (len < 0) return len; - i += len; if (value != pkt_dev->udp_dst_min) { pkt_dev->udp_dst_min = value; pkt_dev->cur_udp_dst = value; @@ -1163,11 +1190,11 @@ static ssize_t pktgen_if_write(struct file *file, return count; } if (!strcmp(name, "udp_src_max")) { - len = num_arg(&user_buffer[i], 10, &value); + max = min(10, count - i); + len = num_arg(&user_buffer[i], max, &value); if (len < 0) return len; - i += len; if (value != pkt_dev->udp_src_max) { pkt_dev->udp_src_max = value; pkt_dev->cur_udp_src = value; @@ -1176,11 +1203,11 @@ static ssize_t pktgen_if_write(struct file *file, return count; } if (!strcmp(name, "udp_dst_max")) { - len = num_arg(&user_buffer[i], 10, &value); + max = min(10, count - i); + len = num_arg(&user_buffer[i], max, &value); if (len < 0) return len; - i += len; if (value != pkt_dev->udp_dst_max) { pkt_dev->udp_dst_max = value; pkt_dev->cur_udp_dst = value; @@ -1189,7 +1216,8 @@ static ssize_t pktgen_if_write(struct file *file, return count; } if (!strcmp(name, "clone_skb")) { - len = num_arg(&user_buffer[i], 10, &value); + max = min(10, count - i); + len = num_arg(&user_buffer[i], max, &value); if (len < 0) return len; /* clone_skb is not supported for netif_receive xmit_mode and @@ -1198,34 +1226,33 @@ static ssize_t pktgen_if_write(struct file *file, if ((value > 0) && ((pkt_dev->xmit_mode == M_NETIF_RECEIVE) || !(pkt_dev->odev->priv_flags & IFF_TX_SKB_SHARING))) - return -ENOTSUPP; + return -EOPNOTSUPP; if (value > 0 && (pkt_dev->n_imix_entries > 0 || !(pkt_dev->flags & F_SHARED))) return -EINVAL; - i += len; pkt_dev->clone_skb = value; sprintf(pg_result, "OK: clone_skb=%d", pkt_dev->clone_skb); return count; } if (!strcmp(name, "count")) { - len = num_arg(&user_buffer[i], 10, &value); + max = min(10, count - i); + len = num_arg(&user_buffer[i], max, &value); if (len < 0) return len; - i += len; pkt_dev->count = value; sprintf(pg_result, "OK: count=%llu", (unsigned long long)pkt_dev->count); return count; } if (!strcmp(name, "src_mac_count")) { - len = num_arg(&user_buffer[i], 10, &value); + max = min(10, count - i); + len = num_arg(&user_buffer[i], max, &value); if (len < 0) return len; - i += len; if (pkt_dev->src_mac_count != value) { pkt_dev->src_mac_count = value; pkt_dev->cur_src_mac_offset = 0; @@ -1235,11 +1262,11 @@ static ssize_t pktgen_if_write(struct file *file, return count; } if (!strcmp(name, "dst_mac_count")) { - len = num_arg(&user_buffer[i], 10, &value); + max = min(10, count - i); + len = num_arg(&user_buffer[i], max, &value); if (len < 0) return len; - i += len; if (pkt_dev->dst_mac_count != value) { pkt_dev->dst_mac_count = value; pkt_dev->cur_dst_mac_offset = 0; @@ -1249,16 +1276,16 @@ static ssize_t pktgen_if_write(struct file *file, return count; } if (!strcmp(name, "burst")) { - len = num_arg(&user_buffer[i], 10, &value); + max = min(10, count - i); + len = num_arg(&user_buffer[i], max, &value); if (len < 0) return len; - i += len; if ((value > 1) && ((pkt_dev->xmit_mode == M_QUEUE_XMIT) || ((pkt_dev->xmit_mode == M_START_XMIT) && (!(pkt_dev->odev->priv_flags & IFF_TX_SKB_SHARING))))) - return -ENOTSUPP; + return -EOPNOTSUPP; if (value > 1 && !(pkt_dev->flags & F_SHARED)) return -EINVAL; @@ -1268,12 +1295,11 @@ static ssize_t pktgen_if_write(struct file *file, return count; } if (!strcmp(name, "node")) { - len = num_arg(&user_buffer[i], 10, &value); + max = min(10, count - i); + len = num_arg(&user_buffer[i], max, &value); if (len < 0) return len; - i += len; - if (node_possible(value)) { pkt_dev->node = value; sprintf(pg_result, "OK: node=%d", pkt_dev->node); @@ -1289,21 +1315,21 @@ static ssize_t pktgen_if_write(struct file *file, if (!strcmp(name, "xmit_mode")) { char f[32]; - memset(f, 0, 32); - len = strn_len(&user_buffer[i], sizeof(f) - 1); + max = min(sizeof(f) - 1, count - i); + len = strn_len(&user_buffer[i], max); if (len < 0) return len; + memset(f, 0, sizeof(f)); if (copy_from_user(f, &user_buffer[i], len)) return -EFAULT; - i += len; if (strcmp(f, "start_xmit") == 0) { pkt_dev->xmit_mode = M_START_XMIT; } else if (strcmp(f, "netif_receive") == 0) { /* clone_skb set earlier, not supported in this mode */ if (pkt_dev->clone_skb > 0) - return -ENOTSUPP; + return -EOPNOTSUPP; pkt_dev->xmit_mode = M_NETIF_RECEIVE; @@ -1329,14 +1355,14 @@ static ssize_t pktgen_if_write(struct file *file, char f[32]; char *end; - memset(f, 0, 32); - len = strn_len(&user_buffer[i], sizeof(f) - 1); + max = min(sizeof(f) - 1, count - i); + len = strn_len(&user_buffer[i], max); if (len < 0) return len; + memset(f, 0, 32); if (copy_from_user(f, &user_buffer[i], len)) return -EFAULT; - i += len; flag = pktgen_read_flag(f, &disable); if (flag) { @@ -1378,7 +1404,8 @@ static ssize_t pktgen_if_write(struct file *file, return count; } if (!strcmp(name, "dst_min") || !strcmp(name, "dst")) { - len = strn_len(&user_buffer[i], sizeof(pkt_dev->dst_min) - 1); + max = min(sizeof(pkt_dev->dst_min) - 1, count - i); + len = strn_len(&user_buffer[i], max); if (len < 0) return len; @@ -1393,12 +1420,13 @@ static ssize_t pktgen_if_write(struct file *file, } if (debug) pr_debug("dst_min set to: %s\n", pkt_dev->dst_min); - i += len; + sprintf(pg_result, "OK: dst_min=%s", pkt_dev->dst_min); return count; } if (!strcmp(name, "dst_max")) { - len = strn_len(&user_buffer[i], sizeof(pkt_dev->dst_max) - 1); + max = min(sizeof(pkt_dev->dst_max) - 1, count - i); + len = strn_len(&user_buffer[i], max); if (len < 0) return len; @@ -1413,12 +1441,13 @@ static ssize_t pktgen_if_write(struct file *file, } if (debug) pr_debug("dst_max set to: %s\n", pkt_dev->dst_max); - i += len; + sprintf(pg_result, "OK: dst_max=%s", pkt_dev->dst_max); return count; } if (!strcmp(name, "dst6")) { - len = strn_len(&user_buffer[i], sizeof(buf) - 1); + max = min(sizeof(buf) - 1, count - i); + len = strn_len(&user_buffer[i], max); if (len < 0) return len; @@ -1436,12 +1465,12 @@ static ssize_t pktgen_if_write(struct file *file, if (debug) pr_debug("dst6 set to: %s\n", buf); - i += len; sprintf(pg_result, "OK: dst6=%s", buf); return count; } if (!strcmp(name, "dst6_min")) { - len = strn_len(&user_buffer[i], sizeof(buf) - 1); + max = min(sizeof(buf) - 1, count - i); + len = strn_len(&user_buffer[i], max); if (len < 0) return len; @@ -1458,12 +1487,12 @@ static ssize_t pktgen_if_write(struct file *file, if (debug) pr_debug("dst6_min set to: %s\n", buf); - i += len; sprintf(pg_result, "OK: dst6_min=%s", buf); return count; } if (!strcmp(name, "dst6_max")) { - len = strn_len(&user_buffer[i], sizeof(buf) - 1); + max = min(sizeof(buf) - 1, count - i); + len = strn_len(&user_buffer[i], max); if (len < 0) return len; @@ -1479,12 +1508,12 @@ static ssize_t pktgen_if_write(struct file *file, if (debug) pr_debug("dst6_max set to: %s\n", buf); - i += len; sprintf(pg_result, "OK: dst6_max=%s", buf); return count; } if (!strcmp(name, "src6")) { - len = strn_len(&user_buffer[i], sizeof(buf) - 1); + max = min(sizeof(buf) - 1, count - i); + len = strn_len(&user_buffer[i], max); if (len < 0) return len; @@ -1502,12 +1531,12 @@ static ssize_t pktgen_if_write(struct file *file, if (debug) pr_debug("src6 set to: %s\n", buf); - i += len; sprintf(pg_result, "OK: src6=%s", buf); return count; } if (!strcmp(name, "src_min")) { - len = strn_len(&user_buffer[i], sizeof(pkt_dev->src_min) - 1); + max = min(sizeof(pkt_dev->src_min) - 1, count - i); + len = strn_len(&user_buffer[i], max); if (len < 0) return len; @@ -1522,12 +1551,13 @@ static ssize_t pktgen_if_write(struct file *file, } if (debug) pr_debug("src_min set to: %s\n", pkt_dev->src_min); - i += len; + sprintf(pg_result, "OK: src_min=%s", pkt_dev->src_min); return count; } if (!strcmp(name, "src_max")) { - len = strn_len(&user_buffer[i], sizeof(pkt_dev->src_max) - 1); + max = min(sizeof(pkt_dev->src_max) - 1, count - i); + len = strn_len(&user_buffer[i], max); if (len < 0) return len; @@ -1542,12 +1572,13 @@ static ssize_t pktgen_if_write(struct file *file, } if (debug) pr_debug("src_max set to: %s\n", pkt_dev->src_max); - i += len; + sprintf(pg_result, "OK: src_max=%s", pkt_dev->src_max); return count; } if (!strcmp(name, "dst_mac")) { - len = strn_len(&user_buffer[i], sizeof(valstr) - 1); + max = min(sizeof(valstr) - 1, count - i); + len = strn_len(&user_buffer[i], max); if (len < 0) return len; @@ -1564,7 +1595,8 @@ static ssize_t pktgen_if_write(struct file *file, return count; } if (!strcmp(name, "src_mac")) { - len = strn_len(&user_buffer[i], sizeof(valstr) - 1); + max = min(sizeof(valstr) - 1, count - i); + len = strn_len(&user_buffer[i], max); if (len < 0) return len; @@ -1588,11 +1620,11 @@ static ssize_t pktgen_if_write(struct file *file, } if (!strcmp(name, "flows")) { - len = num_arg(&user_buffer[i], 10, &value); + max = min(10, count - i); + len = num_arg(&user_buffer[i], max, &value); if (len < 0) return len; - i += len; if (value > MAX_CFLOWS) value = MAX_CFLOWS; @@ -1602,44 +1634,44 @@ static ssize_t pktgen_if_write(struct file *file, } #ifdef CONFIG_XFRM if (!strcmp(name, "spi")) { - len = num_arg(&user_buffer[i], 10, &value); + max = min(10, count - i); + len = num_arg(&user_buffer[i], max, &value); if (len < 0) return len; - i += len; pkt_dev->spi = value; sprintf(pg_result, "OK: spi=%u", pkt_dev->spi); return count; } #endif if (!strcmp(name, "flowlen")) { - len = num_arg(&user_buffer[i], 10, &value); + max = min(10, count - i); + len = num_arg(&user_buffer[i], max, &value); if (len < 0) return len; - i += len; pkt_dev->lflow = value; sprintf(pg_result, "OK: flowlen=%u", pkt_dev->lflow); return count; } if (!strcmp(name, "queue_map_min")) { - len = num_arg(&user_buffer[i], 5, &value); + max = min(5, count - i); + len = num_arg(&user_buffer[i], max, &value); if (len < 0) return len; - i += len; pkt_dev->queue_map_min = value; sprintf(pg_result, "OK: queue_map_min=%u", pkt_dev->queue_map_min); return count; } if (!strcmp(name, "queue_map_max")) { - len = num_arg(&user_buffer[i], 5, &value); + max = min(5, count - i); + len = num_arg(&user_buffer[i], max, &value); if (len < 0) return len; - i += len; pkt_dev->queue_map_max = value; sprintf(pg_result, "OK: queue_map_max=%u", pkt_dev->queue_map_max); return count; @@ -1648,10 +1680,11 @@ static ssize_t pktgen_if_write(struct file *file, if (!strcmp(name, "mpls")) { unsigned int n, cnt; - len = get_labels(&user_buffer[i], pkt_dev); + max = count - i; + len = get_labels(&user_buffer[i], max, pkt_dev); if (len < 0) return len; - i += len; + cnt = sprintf(pg_result, "OK: mpls="); for (n = 0; n < pkt_dev->nr_labels; n++) cnt += sprintf(pg_result + cnt, @@ -1669,11 +1702,11 @@ static ssize_t pktgen_if_write(struct file *file, } if (!strcmp(name, "vlan_id")) { - len = num_arg(&user_buffer[i], 4, &value); + max = min(4, count - i); + len = num_arg(&user_buffer[i], max, &value); if (len < 0) return len; - i += len; if (value <= 4095) { pkt_dev->vlan_id = value; /* turn on VLAN */ @@ -1696,11 +1729,11 @@ static ssize_t pktgen_if_write(struct file *file, } if (!strcmp(name, "vlan_p")) { - len = num_arg(&user_buffer[i], 1, &value); + max = min(1, count - i); + len = num_arg(&user_buffer[i], max, &value); if (len < 0) return len; - i += len; if ((value <= 7) && (pkt_dev->vlan_id != 0xffff)) { pkt_dev->vlan_p = value; sprintf(pg_result, "OK: vlan_p=%u", pkt_dev->vlan_p); @@ -1711,11 +1744,11 @@ static ssize_t pktgen_if_write(struct file *file, } if (!strcmp(name, "vlan_cfi")) { - len = num_arg(&user_buffer[i], 1, &value); + max = min(1, count - i); + len = num_arg(&user_buffer[i], max, &value); if (len < 0) return len; - i += len; if ((value <= 1) && (pkt_dev->vlan_id != 0xffff)) { pkt_dev->vlan_cfi = value; sprintf(pg_result, "OK: vlan_cfi=%u", pkt_dev->vlan_cfi); @@ -1726,11 +1759,11 @@ static ssize_t pktgen_if_write(struct file *file, } if (!strcmp(name, "svlan_id")) { - len = num_arg(&user_buffer[i], 4, &value); + max = min(4, count - i); + len = num_arg(&user_buffer[i], max, &value); if (len < 0) return len; - i += len; if ((value <= 4095) && ((pkt_dev->vlan_id != 0xffff))) { pkt_dev->svlan_id = value; /* turn on SVLAN */ @@ -1753,11 +1786,11 @@ static ssize_t pktgen_if_write(struct file *file, } if (!strcmp(name, "svlan_p")) { - len = num_arg(&user_buffer[i], 1, &value); + max = min(1, count - i); + len = num_arg(&user_buffer[i], max, &value); if (len < 0) return len; - i += len; if ((value <= 7) && (pkt_dev->svlan_id != 0xffff)) { pkt_dev->svlan_p = value; sprintf(pg_result, "OK: svlan_p=%u", pkt_dev->svlan_p); @@ -1768,11 +1801,11 @@ static ssize_t pktgen_if_write(struct file *file, } if (!strcmp(name, "svlan_cfi")) { - len = num_arg(&user_buffer[i], 1, &value); + max = min(1, count - i); + len = num_arg(&user_buffer[i], max, &value); if (len < 0) return len; - i += len; if ((value <= 1) && (pkt_dev->svlan_id != 0xffff)) { pkt_dev->svlan_cfi = value; sprintf(pg_result, "OK: svlan_cfi=%u", pkt_dev->svlan_cfi); @@ -1783,12 +1816,13 @@ static ssize_t pktgen_if_write(struct file *file, } if (!strcmp(name, "tos")) { - __u32 tmp_value = 0; - len = hex32_arg(&user_buffer[i], 2, &tmp_value); + __u32 tmp_value; + + max = min(2, count - i); + len = hex32_arg(&user_buffer[i], max, &tmp_value); if (len < 0) return len; - i += len; if (len == 2) { pkt_dev->tos = tmp_value; sprintf(pg_result, "OK: tos=0x%02x", pkt_dev->tos); @@ -1799,12 +1833,13 @@ static ssize_t pktgen_if_write(struct file *file, } if (!strcmp(name, "traffic_class")) { - __u32 tmp_value = 0; - len = hex32_arg(&user_buffer[i], 2, &tmp_value); + __u32 tmp_value; + + max = min(2, count - i); + len = hex32_arg(&user_buffer[i], max, &tmp_value); if (len < 0) return len; - i += len; if (len == 2) { pkt_dev->traffic_class = tmp_value; sprintf(pg_result, "OK: traffic_class=0x%02x", pkt_dev->traffic_class); @@ -1815,11 +1850,11 @@ static ssize_t pktgen_if_write(struct file *file, } if (!strcmp(name, "skb_priority")) { - len = num_arg(&user_buffer[i], 9, &value); + max = min(9, count - i); + len = num_arg(&user_buffer[i], max, &value); if (len < 0) return len; - i += len; pkt_dev->skb_priority = value; sprintf(pg_result, "OK: skb_priority=%i", pkt_dev->skb_priority); @@ -1879,7 +1914,8 @@ static ssize_t pktgen_thread_write(struct file *file, { struct seq_file *seq = file->private_data; struct pktgen_thread *t = seq->private; - int i, max, len, ret; + size_t i, max; + ssize_t len, ret; char name[40]; char *pg_result; @@ -1896,8 +1932,8 @@ static ssize_t pktgen_thread_write(struct file *file, i = len; /* Read variable name */ - - len = strn_len(&user_buffer[i], sizeof(name) - 1); + max = min(sizeof(name) - 1, count - i); + len = strn_len(&user_buffer[i], max); if (len < 0) return len; @@ -1927,14 +1963,15 @@ static ssize_t pktgen_thread_write(struct file *file, if (!strcmp(name, "add_device")) { char f[32]; memset(f, 0, 32); - len = strn_len(&user_buffer[i], sizeof(f) - 1); + max = min(sizeof(f) - 1, count - i); + len = strn_len(&user_buffer[i], max); if (len < 0) { ret = len; goto out; } if (copy_from_user(f, &user_buffer[i], len)) return -EFAULT; - i += len; + mutex_lock(&pktgen_thread_lock); ret = pktgen_add_device(t, f); mutex_unlock(&pktgen_thread_lock); @@ -2358,13 +2395,13 @@ static inline int f_pick(struct pktgen_dev *pkt_dev) } -#ifdef CONFIG_XFRM /* If there was already an IPSEC SA, we keep it as is, else * we go look for it ... */ #define DUMMY_MARK 0 static void get_ipsec_sa(struct pktgen_dev *pkt_dev, int flow) { +#ifdef CONFIG_XFRM struct xfrm_state *x = pkt_dev->flows[flow].x; struct pktgen_net *pn = net_generic(dev_net(pkt_dev->odev), pg_net_id); if (!x) { @@ -2390,11 +2427,10 @@ static void get_ipsec_sa(struct pktgen_dev *pkt_dev, int flow) } } -} #endif +} static void set_cur_queue_map(struct pktgen_dev *pkt_dev) { - if (pkt_dev->flags & F_QUEUE_MAP_CPU) pkt_dev->cur_queue_map = smp_processor_id(); @@ -2569,10 +2605,8 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev) pkt_dev->flows[flow].flags |= F_INIT; pkt_dev->flows[flow].cur_daddr = pkt_dev->cur_daddr; -#ifdef CONFIG_XFRM if (pkt_dev->flags & F_IPSEC) get_ipsec_sa(pkt_dev, flow); -#endif pkt_dev->nflows++; } } diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index d1e559fce918..5a24a30dfc2d 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -53,6 +53,7 @@ #include <net/fib_rules.h> #include <net/rtnetlink.h> #include <net/net_namespace.h> +#include <net/netdev_lock.h> #include <net/devlink.h> #if IS_ENABLED(CONFIG_IPV6) #include <net/addrconf.h> @@ -80,6 +81,11 @@ void rtnl_lock(void) } EXPORT_SYMBOL(rtnl_lock); +int rtnl_lock_interruptible(void) +{ + return mutex_lock_interruptible(&rtnl_mutex); +} + int rtnl_lock_killable(void) { return mutex_lock_killable(&rtnl_mutex); @@ -1287,6 +1293,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev, + nla_total_size(4) /* IFLA_TSO_MAX_SEGS */ + nla_total_size(1) /* IFLA_OPERSTATE */ + nla_total_size(1) /* IFLA_LINKMODE */ + + nla_total_size(1) /* IFLA_NETNS_IMMUTABLE */ + nla_total_size(4) /* IFLA_CARRIER_CHANGES */ + nla_total_size(4) /* IFLA_LINK_NETNSID */ + nla_total_size(4) /* IFLA_GROUP */ @@ -2041,6 +2048,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, netif_running(dev) ? READ_ONCE(dev->operstate) : IF_OPER_DOWN) || nla_put_u8(skb, IFLA_LINKMODE, READ_ONCE(dev->link_mode)) || + nla_put_u8(skb, IFLA_NETNS_IMMUTABLE, dev->netns_immutable) || nla_put_u32(skb, IFLA_MTU, READ_ONCE(dev->mtu)) || nla_put_u32(skb, IFLA_MIN_MTU, READ_ONCE(dev->min_mtu)) || nla_put_u32(skb, IFLA_MAX_MTU, READ_ONCE(dev->max_mtu)) || @@ -2229,6 +2237,7 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = { [IFLA_ALLMULTI] = { .type = NLA_REJECT }, [IFLA_GSO_IPV4_MAX_SIZE] = NLA_POLICY_MIN(NLA_U32, MAX_TCP_HEADER + 1), [IFLA_GRO_IPV4_MAX_SIZE] = { .type = NLA_U32 }, + [IFLA_NETNS_IMMUTABLE] = { .type = NLA_REJECT }, }; static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = { @@ -2904,12 +2913,19 @@ static int do_set_master(struct net_device *dev, int ifindex, const struct net_device_ops *ops; int err; + /* Release the lower lock, the upper is responsible for locking + * the lower if needed. None of the existing upper devices + * use netdev instance lock, so don't grab it. + */ + if (upper_dev) { if (upper_dev->ifindex == ifindex) return 0; ops = upper_dev->netdev_ops; if (ops->ndo_del_slave) { + netdev_unlock_ops(dev); err = ops->ndo_del_slave(upper_dev, dev); + netdev_lock_ops(dev); if (err) return err; } else { @@ -2923,7 +2939,9 @@ static int do_set_master(struct net_device *dev, int ifindex, return -EINVAL; ops = upper_dev->netdev_ops; if (ops->ndo_add_slave) { + netdev_unlock_ops(dev); err = ops->ndo_add_slave(upper_dev, dev, extack); + netdev_lock_ops(dev); if (err) return err; } else { @@ -2973,7 +2991,7 @@ static int do_set_proto_down(struct net_device *dev, if (pdreason[IFLA_PROTO_DOWN_REASON_MASK]) mask = nla_get_u32(pdreason[IFLA_PROTO_DOWN_REASON_MASK]); - dev_change_proto_down_reason(dev, mask, value); + netdev_change_proto_down_reason_locked(dev, mask, value); } if (nl_proto_down) { @@ -2984,8 +3002,7 @@ static int do_set_proto_down(struct net_device *dev, NL_SET_ERR_MSG(extack, "Cannot clear protodown, active reasons"); return -EBUSY; } - err = dev_change_proto_down(dev, - proto_down); + err = netif_change_proto_down(dev, proto_down); if (err) return err; } @@ -3005,6 +3022,8 @@ static int do_setlink(const struct sk_buff *skb, struct net_device *dev, char ifname[IFNAMSIZ]; int err; + netdev_lock_ops(dev); + err = validate_linkmsg(dev, tb, extack); if (err < 0) goto errout; @@ -3020,7 +3039,8 @@ static int do_setlink(const struct sk_buff *skb, struct net_device *dev, new_ifindex = nla_get_s32_default(tb[IFLA_NEW_IFINDEX], 0); - err = __dev_change_net_namespace(dev, tgt_net, pat, new_ifindex); + err = netif_change_net_namespace(dev, tgt_net, pat, + new_ifindex, extack); if (err) goto errout; @@ -3068,24 +3088,35 @@ static int do_setlink(const struct sk_buff *skb, struct net_device *dev, goto errout; } sa->sa_family = dev->type; + + netdev_unlock_ops(dev); + + /* dev_addr_sem is an outer lock, enforce proper ordering */ + down_write(&dev_addr_sem); + netdev_lock_ops(dev); + memcpy(sa->sa_data, nla_data(tb[IFLA_ADDRESS]), dev->addr_len); - err = dev_set_mac_address_user(dev, sa, extack); + err = netif_set_mac_address(dev, sa, extack); kfree(sa); - if (err) + if (err) { + up_write(&dev_addr_sem); goto errout; + } status |= DO_SETLINK_MODIFIED; + + up_write(&dev_addr_sem); } if (tb[IFLA_MTU]) { - err = dev_set_mtu_ext(dev, nla_get_u32(tb[IFLA_MTU]), extack); + err = netif_set_mtu_ext(dev, nla_get_u32(tb[IFLA_MTU]), extack); if (err < 0) goto errout; status |= DO_SETLINK_MODIFIED; } if (tb[IFLA_GROUP]) { - dev_set_group(dev, nla_get_u32(tb[IFLA_GROUP])); + netif_set_group(dev, nla_get_u32(tb[IFLA_GROUP])); status |= DO_SETLINK_NOTIFY; } @@ -3095,15 +3126,15 @@ static int do_setlink(const struct sk_buff *skb, struct net_device *dev, * requested. */ if (ifm->ifi_index > 0 && ifname[0]) { - err = dev_change_name(dev, ifname); + err = netif_change_name(dev, ifname); if (err < 0) goto errout; status |= DO_SETLINK_MODIFIED; } if (tb[IFLA_IFALIAS]) { - err = dev_set_alias(dev, nla_data(tb[IFLA_IFALIAS]), - nla_len(tb[IFLA_IFALIAS])); + err = netif_set_alias(dev, nla_data(tb[IFLA_IFALIAS]), + nla_len(tb[IFLA_IFALIAS])); if (err < 0) goto errout; status |= DO_SETLINK_NOTIFY; @@ -3115,8 +3146,8 @@ static int do_setlink(const struct sk_buff *skb, struct net_device *dev, } if (ifm->ifi_flags || ifm->ifi_change) { - err = dev_change_flags(dev, rtnl_dev_combine_flags(dev, ifm), - extack); + err = netif_change_flags(dev, rtnl_dev_combine_flags(dev, ifm), + extack); if (err < 0) goto errout; } @@ -3129,7 +3160,7 @@ static int do_setlink(const struct sk_buff *skb, struct net_device *dev, } if (tb[IFLA_CARRIER]) { - err = dev_change_carrier(dev, nla_get_u8(tb[IFLA_CARRIER])); + err = netif_change_carrier(dev, nla_get_u8(tb[IFLA_CARRIER])); if (err) goto errout; status |= DO_SETLINK_MODIFIED; @@ -3138,7 +3169,7 @@ static int do_setlink(const struct sk_buff *skb, struct net_device *dev, if (tb[IFLA_TXQLEN]) { unsigned int value = nla_get_u32(tb[IFLA_TXQLEN]); - err = dev_change_tx_queue_len(dev, value); + err = netif_change_tx_queue_len(dev, value); if (err) goto errout; status |= DO_SETLINK_MODIFIED; @@ -3369,6 +3400,8 @@ errout: dev->name); } + netdev_unlock_ops(dev); + return err; } @@ -3762,7 +3795,13 @@ static int rtnl_newlink_create(struct sk_buff *skb, struct ifinfomsg *ifm, struct netlink_ext_ack *extack) { unsigned char name_assign_type = NET_NAME_USER; - struct net *net = sock_net(skb->sk); + struct rtnl_newlink_params params = { + .src_net = sock_net(skb->sk), + .link_net = link_net, + .peer_net = peer_net, + .tb = tb, + .data = data, + }; u32 portid = NETLINK_CB(skb).portid; struct net_device *dev; char ifname[IFNAMSIZ]; @@ -3778,8 +3817,8 @@ static int rtnl_newlink_create(struct sk_buff *skb, struct ifinfomsg *ifm, name_assign_type = NET_NAME_ENUM; } - dev = rtnl_create_link(link_net ? : tgt_net, ifname, - name_assign_type, ops, tb, extack); + dev = rtnl_create_link(tgt_net, ifname, name_assign_type, ops, tb, + extack); if (IS_ERR(dev)) { err = PTR_ERR(dev); goto out; @@ -3787,13 +3826,8 @@ static int rtnl_newlink_create(struct sk_buff *skb, struct ifinfomsg *ifm, dev->ifindex = ifm->ifi_index; - if (link_net) - net = link_net; - if (peer_net) - net = peer_net; - if (ops->newlink) - err = ops->newlink(net, dev, tb, data, extack); + err = ops->newlink(dev, ¶ms, extack); else err = register_netdevice(dev); if (err < 0) { @@ -3801,22 +3835,22 @@ static int rtnl_newlink_create(struct sk_buff *skb, struct ifinfomsg *ifm, goto out; } + netdev_lock_ops(dev); + err = rtnl_configure_link(dev, ifm, portid, nlh); if (err < 0) goto out_unregister; - if (link_net) { - err = dev_change_net_namespace(dev, tgt_net, ifname); - if (err < 0) - goto out_unregister; - } if (tb[IFLA_MASTER]) { err = do_set_master(dev, nla_get_u32(tb[IFLA_MASTER]), extack); if (err) goto out_unregister; } + + netdev_unlock_ops(dev); out: return err; out_unregister: + netdev_unlock_ops(dev); if (ops->newlink) { LIST_HEAD(list_kill); @@ -3862,20 +3896,26 @@ static int __rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, { struct nlattr ** const tb = tbs->tb; struct net *net = sock_net(skb->sk); + struct net *device_net; struct net_device *dev; struct ifinfomsg *ifm; bool link_specified; + /* When creating, lookup for existing device in target net namespace */ + device_net = (nlh->nlmsg_flags & NLM_F_CREATE) && + (nlh->nlmsg_flags & NLM_F_EXCL) ? + tgt_net : net; + ifm = nlmsg_data(nlh); if (ifm->ifi_index > 0) { link_specified = true; - dev = __dev_get_by_index(net, ifm->ifi_index); + dev = __dev_get_by_index(device_net, ifm->ifi_index); } else if (ifm->ifi_index < 0) { NL_SET_ERR_MSG(extack, "ifindex can't be negative"); return -EINVAL; } else if (tb[IFLA_IFNAME] || tb[IFLA_ALT_IFNAME]) { link_specified = true; - dev = rtnl_dev_get(net, tb); + dev = rtnl_dev_get(device_net, tb); } else { link_specified = false; dev = NULL; diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c index b0ff6153be62..568779d5a0ef 100644 --- a/net/core/secure_seq.c +++ b/net/core/secure_seq.c @@ -71,7 +71,7 @@ u32 secure_tcpv6_ts_off(const struct net *net, return siphash(&combined, offsetofend(typeof(combined), daddr), &ts_secret); } -EXPORT_SYMBOL(secure_tcpv6_ts_off); +EXPORT_IPV6_MOD(secure_tcpv6_ts_off); u32 secure_tcpv6_seq(const __be32 *saddr, const __be32 *daddr, __be16 sport, __be16 dport) diff --git a/net/core/selftests.c b/net/core/selftests.c index 8f801e6e3b91..e99ae983fca9 100644 --- a/net/core/selftests.c +++ b/net/core/selftests.c @@ -299,7 +299,7 @@ static int net_test_phy_loopback_enable(struct net_device *ndev) if (!ndev->phydev) return -EOPNOTSUPP; - return phy_loopback(ndev->phydev, true); + return phy_loopback(ndev->phydev, true, 0); } static int net_test_phy_loopback_disable(struct net_device *ndev) @@ -307,7 +307,7 @@ static int net_test_phy_loopback_disable(struct net_device *ndev) if (!ndev->phydev) return -EOPNOTSUPP; - return phy_loopback(ndev->phydev, false); + return phy_loopback(ndev->phydev, false, 0); } static int net_test_phy_loopback_udp(struct net_device *ndev) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index b1c81687e9d8..6cbf77bc61fc 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -295,6 +295,68 @@ static struct sk_buff *napi_skb_cache_get(void) return skb; } +/** + * napi_skb_cache_get_bulk - obtain a number of zeroed skb heads from the cache + * @skbs: pointer to an at least @n-sized array to fill with skb pointers + * @n: number of entries to provide + * + * Tries to obtain @n &sk_buff entries from the NAPI percpu cache and writes + * the pointers into the provided array @skbs. If there are less entries + * available, tries to replenish the cache and bulk-allocates the diff from + * the MM layer if needed. + * The heads are being zeroed with either memset() or %__GFP_ZERO, so they are + * ready for {,__}build_skb_around() and don't have any data buffers attached. + * Must be called *only* from the BH context. + * + * Return: number of successfully allocated skbs (@n if no actual allocation + * needed or kmem_cache_alloc_bulk() didn't fail). + */ +u32 napi_skb_cache_get_bulk(void **skbs, u32 n) +{ + struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache); + u32 bulk, total = n; + + local_lock_nested_bh(&napi_alloc_cache.bh_lock); + + if (nc->skb_count >= n) + goto get; + + /* No enough cached skbs. Try refilling the cache first */ + bulk = min(NAPI_SKB_CACHE_SIZE - nc->skb_count, NAPI_SKB_CACHE_BULK); + nc->skb_count += kmem_cache_alloc_bulk(net_hotdata.skbuff_cache, + GFP_ATOMIC | __GFP_NOWARN, bulk, + &nc->skb_cache[nc->skb_count]); + if (likely(nc->skb_count >= n)) + goto get; + + /* Still not enough. Bulk-allocate the missing part directly, zeroed */ + n -= kmem_cache_alloc_bulk(net_hotdata.skbuff_cache, + GFP_ATOMIC | __GFP_ZERO | __GFP_NOWARN, + n - nc->skb_count, &skbs[nc->skb_count]); + if (likely(nc->skb_count >= n)) + goto get; + + /* kmem_cache didn't allocate the number we need, limit the output */ + total -= n - nc->skb_count; + n = nc->skb_count; + +get: + for (u32 base = nc->skb_count - n, i = 0; i < n; i++) { + u32 cache_size = kmem_cache_size(net_hotdata.skbuff_cache); + + skbs[i] = nc->skb_cache[base + i]; + + kasan_mempool_unpoison_object(skbs[i], cache_size); + memset(skbs[i], 0, offsetof(struct sk_buff, tail)); + } + + nc->skb_count -= n; + local_unlock_nested_bh(&napi_alloc_cache.bh_lock); + + return total; +} +EXPORT_SYMBOL_GPL(napi_skb_cache_get_bulk); + static inline void __finalize_skb_around(struct sk_buff *skb, void *data, unsigned int size) { @@ -5449,6 +5511,54 @@ err: } EXPORT_SYMBOL_GPL(skb_complete_tx_timestamp); +static bool skb_tstamp_tx_report_so_timestamping(struct sk_buff *skb, + struct skb_shared_hwtstamps *hwtstamps, + int tstype) +{ + switch (tstype) { + case SCM_TSTAMP_SCHED: + return skb_shinfo(skb)->tx_flags & SKBTX_SCHED_TSTAMP; + case SCM_TSTAMP_SND: + return skb_shinfo(skb)->tx_flags & (hwtstamps ? SKBTX_HW_TSTAMP_NOBPF : + SKBTX_SW_TSTAMP); + case SCM_TSTAMP_ACK: + return TCP_SKB_CB(skb)->txstamp_ack & TSTAMP_ACK_SK; + case SCM_TSTAMP_COMPLETION: + return skb_shinfo(skb)->tx_flags & SKBTX_COMPLETION_TSTAMP; + } + + return false; +} + +static void skb_tstamp_tx_report_bpf_timestamping(struct sk_buff *skb, + struct skb_shared_hwtstamps *hwtstamps, + struct sock *sk, + int tstype) +{ + int op; + + switch (tstype) { + case SCM_TSTAMP_SCHED: + op = BPF_SOCK_OPS_TSTAMP_SCHED_CB; + break; + case SCM_TSTAMP_SND: + if (hwtstamps) { + op = BPF_SOCK_OPS_TSTAMP_SND_HW_CB; + *skb_hwtstamps(skb) = *hwtstamps; + } else { + op = BPF_SOCK_OPS_TSTAMP_SND_SW_CB; + } + break; + case SCM_TSTAMP_ACK: + op = BPF_SOCK_OPS_TSTAMP_ACK_CB; + break; + default: + return; + } + + bpf_skops_tx_timestamping(sk, skb, op); +} + void __skb_tstamp_tx(struct sk_buff *orig_skb, const struct sk_buff *ack_skb, struct skb_shared_hwtstamps *hwtstamps, @@ -5461,6 +5571,13 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb, if (!sk) return; + if (skb_shinfo(orig_skb)->tx_flags & SKBTX_BPF) + skb_tstamp_tx_report_bpf_timestamping(orig_skb, hwtstamps, + sk, tstype); + + if (!skb_tstamp_tx_report_so_timestamping(orig_skb, hwtstamps, tstype)) + return; + tsflags = READ_ONCE(sk->sk_tsflags); if (!hwtstamps && !(tsflags & SOF_TIMESTAMPING_OPT_TX_SWHW) && skb_shinfo(orig_skb)->tx_flags & SKBTX_IN_PROGRESS) diff --git a/net/core/sock.c b/net/core/sock.c index 6c0e87f97fa4..323892066def 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -938,6 +938,7 @@ int sock_set_timestamping(struct sock *sk, int optname, WRITE_ONCE(sk->sk_tsflags, val); sock_valbool_flag(sk, SOCK_TSTAMP_NEW, optname == SO_TIMESTAMPING_NEW); + sock_valbool_flag(sk, SOCK_TIMESTAMPING_ANY, !!(val & TSFLAGS_ANY)); if (val & SOF_TIMESTAMPING_RX_SOFTWARE) sock_enable_timestamp(sk, @@ -948,6 +949,20 @@ int sock_set_timestamping(struct sock *sk, int optname, return 0; } +#if defined(CONFIG_CGROUP_BPF) +void bpf_skops_tx_timestamping(struct sock *sk, struct sk_buff *skb, int op) +{ + struct bpf_sock_ops_kern sock_ops; + + memset(&sock_ops, 0, offsetof(struct bpf_sock_ops_kern, temp)); + sock_ops.op = op; + sock_ops.is_fullsock = 1; + sock_ops.sk = sk; + bpf_skops_init_skb(&sock_ops, skb, 0); + __cgroup_bpf_run_filter_sock_ops(sk, &sock_ops, CGROUP_SOCK_OPS); +} +#endif + void sock_set_keepalive(struct sock *sk) { lock_sock(sk); @@ -2041,7 +2056,7 @@ int sk_getsockopt(struct sock *sk, int level, int optname, v.val = READ_ONCE(sk->sk_napi_id); /* aggregate non-NAPI IDs down to 0 */ - if (v.val < MIN_NAPI_ID) + if (!napi_id_valid(v.val)) v.val = 0; break; @@ -2550,8 +2565,12 @@ void sk_setup_caps(struct sock *sk, struct dst_entry *dst) u32 max_segs = 1; sk->sk_route_caps = dst->dev->features; - if (sk_is_tcp(sk)) + if (sk_is_tcp(sk)) { + struct inet_connection_sock *icsk = inet_csk(sk); + sk->sk_route_caps |= NETIF_F_GSO; + icsk->icsk_ack.dst_quick_ack = dst_metric(dst, RTAX_QUICKACK); + } if (sk->sk_route_caps & NETIF_F_GSO) sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE; if (unlikely(sk->sk_gso_disabled)) @@ -2821,6 +2840,22 @@ void *sock_kmalloc(struct sock *sk, int size, gfp_t priority) } EXPORT_SYMBOL(sock_kmalloc); +/* + * Duplicate the input "src" memory block using the socket's + * option memory buffer. + */ +void *sock_kmemdup(struct sock *sk, const void *src, + int size, gfp_t priority) +{ + void *mem; + + mem = sock_kmalloc(sk, size, priority); + if (mem) + memcpy(mem, src, size); + return mem; +} +EXPORT_SYMBOL(sock_kmemdup); + /* Free an option memory block. Note, we actually want the inline * here as this allows gcc to detect the nullify and fold away the * condition entirely. @@ -3898,7 +3933,7 @@ void sk_get_meminfo(const struct sock *sk, u32 *mem) mem[SK_MEMINFO_RCVBUF] = READ_ONCE(sk->sk_rcvbuf); mem[SK_MEMINFO_WMEM_ALLOC] = sk_wmem_alloc_get(sk); mem[SK_MEMINFO_SNDBUF] = READ_ONCE(sk->sk_sndbuf); - mem[SK_MEMINFO_FWD_ALLOC] = sk_forward_alloc_get(sk); + mem[SK_MEMINFO_FWD_ALLOC] = READ_ONCE(sk->sk_forward_alloc); mem[SK_MEMINFO_WMEM_QUEUED] = READ_ONCE(sk->sk_wmem_queued); mem[SK_MEMINFO_OPTMEM] = atomic_read(&sk->sk_omem_alloc); mem[SK_MEMINFO_BACKLOG] = READ_ONCE(sk->sk_backlog.len); diff --git a/net/core/xdp.c b/net/core/xdp.c index 2c6ab6fb452f..f86eedad586a 100644 --- a/net/core/xdp.c +++ b/net/core/xdp.c @@ -618,16 +618,6 @@ void xdp_warn(const char *msg, const char *func, const int line) }; EXPORT_SYMBOL_GPL(xdp_warn); -int xdp_alloc_skb_bulk(void **skbs, int n_skb, gfp_t gfp) -{ - n_skb = kmem_cache_alloc_bulk(net_hotdata.skbuff_cache, gfp, n_skb, skbs); - if (unlikely(!n_skb)) - return -ENOMEM; - - return 0; -} -EXPORT_SYMBOL_GPL(xdp_alloc_skb_bulk); - /** * xdp_build_skb_from_buff - create an skb from &xdp_buff * @xdp: &xdp_buff to convert to an skb diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index be515ba821e2..2045ddac0fe9 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -426,9 +426,6 @@ struct sock *dccp_v4_request_recv_sock(const struct sock *sk, newinet = inet_sk(newsk); ireq = inet_rsk(req); - sk_daddr_set(newsk, ireq->ir_rmt_addr); - sk_rcv_saddr_set(newsk, ireq->ir_loc_addr); - newinet->inet_saddr = ireq->ir_loc_addr; RCU_INIT_POINTER(newinet->inet_opt, rcu_dereference(ireq->ireq_opt)); newinet->mc_index = inet_iif(skb); newinet->mc_ttl = ip_hdr(skb)->ttl; @@ -937,8 +934,6 @@ static const struct inet_connection_sock_af_ops dccp_ipv4_af_ops = { .net_header_len = sizeof(struct iphdr), .setsockopt = ip_setsockopt, .getsockopt = ip_getsockopt, - .addr2sockaddr = inet_csk_addr2sockaddr, - .sockaddr_len = sizeof(struct sockaddr_in), }; static int dccp_v4_init_sock(struct sock *sk) diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index d6649246188d..e24dbffabfc1 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -365,6 +365,9 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb) ireq = inet_rsk(req); ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr; ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr; + ireq->ir_rmt_addr = LOOPBACK4_IPV6; + ireq->ir_loc_addr = LOOPBACK4_IPV6; + ireq->ireq_family = AF_INET6; ireq->ir_mark = inet_request_mark(sk, skb); @@ -504,10 +507,7 @@ static struct sock *dccp_v6_request_recv_sock(const struct sock *sk, memcpy(newnp, np, sizeof(struct ipv6_pinfo)); - newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr; newnp->saddr = ireq->ir_v6_loc_addr; - newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr; - newsk->sk_bound_dev_if = ireq->ir_iif; /* Now IPv6 options... @@ -546,9 +546,6 @@ static struct sock *dccp_v6_request_recv_sock(const struct sock *sk, dccp_sync_mss(newsk, dst_mtu(dst)); - newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6; - newinet->inet_rcv_saddr = LOOPBACK4_IPV6; - if (__inet_inherit_port(sk, newsk) < 0) { inet_csk_prepare_forced_close(newsk); dccp_done(newsk); @@ -991,8 +988,6 @@ static const struct inet_connection_sock_af_ops dccp_ipv6_af_ops = { .net_header_len = sizeof(struct ipv6hdr), .setsockopt = ipv6_setsockopt, .getsockopt = ipv6_getsockopt, - .addr2sockaddr = inet6_csk_addr2sockaddr, - .sockaddr_len = sizeof(struct sockaddr_in6), }; /* @@ -1007,8 +1002,6 @@ static const struct inet_connection_sock_af_ops dccp_ipv6_mapped = { .net_header_len = sizeof(struct iphdr), .setsockopt = ipv6_setsockopt, .getsockopt = ipv6_getsockopt, - .addr2sockaddr = inet6_csk_addr2sockaddr, - .sockaddr_len = sizeof(struct sockaddr_in6), }; static void dccp_v6_sk_destruct(struct sock *sk) diff --git a/net/dccp/output.c b/net/dccp/output.c index 5c2e24f3c39b..39cf3430177a 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -627,11 +627,10 @@ void dccp_send_delayed_ack(struct sock *sk) return; } - if (!time_before(timeout, icsk->icsk_ack.timeout)) - timeout = icsk->icsk_ack.timeout; + if (!time_before(timeout, icsk_delack_timeout(icsk))) + timeout = icsk_delack_timeout(icsk); } icsk->icsk_ack.pending |= ICSK_ACK_SCHED | ICSK_ACK_TIMER; - icsk->icsk_ack.timeout = timeout; sk_reset_timer(sk, &icsk->icsk_delack_timer, timeout); } #endif diff --git a/net/dccp/timer.c b/net/dccp/timer.c index a4cfb47b60e5..232ac4ae0a73 100644 --- a/net/dccp/timer.c +++ b/net/dccp/timer.c @@ -139,9 +139,9 @@ static void dccp_write_timer(struct timer_list *t) if (sk->sk_state == DCCP_CLOSED || !icsk->icsk_pending) goto out; - if (time_after(icsk->icsk_timeout, jiffies)) { + if (time_after(icsk_timeout(icsk), jiffies)) { sk_reset_timer(sk, &icsk->icsk_retransmit_timer, - icsk->icsk_timeout); + icsk_timeout(icsk)); goto out; } @@ -185,9 +185,9 @@ static void dccp_delack_timer(struct timer_list *t) if (sk->sk_state == DCCP_CLOSED || !(icsk->icsk_ack.pending & ICSK_ACK_TIMER)) goto out; - if (time_after(icsk->icsk_ack.timeout, jiffies)) { + if (time_after(icsk_delack_timeout(icsk), jiffies)) { sk_reset_timer(sk, &icsk->icsk_delack_timer, - icsk->icsk_ack.timeout); + icsk_delack_timeout(icsk)); goto out; } diff --git a/net/dsa/conduit.c b/net/dsa/conduit.c index 3dfdb3cb47dc..4ae255cfb23f 100644 --- a/net/dsa/conduit.c +++ b/net/dsa/conduit.c @@ -10,6 +10,7 @@ #include <linux/netdevice.h> #include <linux/netlink.h> #include <net/dsa.h> +#include <net/netdev_lock.h> #include "conduit.h" #include "dsa.h" @@ -26,7 +27,9 @@ static int dsa_conduit_get_regs_len(struct net_device *dev) int len; if (ops->get_regs_len) { + netdev_lock_ops(dev); len = ops->get_regs_len(dev); + netdev_unlock_ops(dev); if (len < 0) return len; ret += len; @@ -57,11 +60,15 @@ static void dsa_conduit_get_regs(struct net_device *dev, int len; if (ops->get_regs_len && ops->get_regs) { + netdev_lock_ops(dev); len = ops->get_regs_len(dev); - if (len < 0) + if (len < 0) { + netdev_unlock_ops(dev); return; + } regs->len = len; ops->get_regs(dev, regs, data); + netdev_unlock_ops(dev); data += regs->len; } @@ -91,8 +98,10 @@ static void dsa_conduit_get_ethtool_stats(struct net_device *dev, int count = 0; if (ops->get_sset_count && ops->get_ethtool_stats) { + netdev_lock_ops(dev); count = ops->get_sset_count(dev, ETH_SS_STATS); ops->get_ethtool_stats(dev, stats, data); + netdev_unlock_ops(dev); } if (ds->ops->get_ethtool_stats) @@ -114,8 +123,10 @@ static void dsa_conduit_get_ethtool_phy_stats(struct net_device *dev, if (count >= 0) phy_ethtool_get_stats(dev->phydev, stats, data); } else if (ops->get_sset_count && ops->get_ethtool_phy_stats) { + netdev_lock_ops(dev); count = ops->get_sset_count(dev, ETH_SS_PHY_STATS); ops->get_ethtool_phy_stats(dev, stats, data); + netdev_unlock_ops(dev); } if (count < 0) @@ -132,11 +143,13 @@ static int dsa_conduit_get_sset_count(struct net_device *dev, int sset) struct dsa_switch *ds = cpu_dp->ds; int count = 0; + netdev_lock_ops(dev); if (sset == ETH_SS_PHY_STATS && dev->phydev && !ops->get_ethtool_phy_stats) count = phy_ethtool_get_sset_count(dev->phydev); else if (ops->get_sset_count) count = ops->get_sset_count(dev, sset); + netdev_unlock_ops(dev); if (count < 0) count = 0; @@ -163,6 +176,7 @@ static void dsa_conduit_get_strings(struct net_device *dev, uint32_t stringset, /* We do not want to be NULL-terminated, since this is a prefix */ pfx[sizeof(pfx) - 1] = '_'; + netdev_lock_ops(dev); if (stringset == ETH_SS_PHY_STATS && dev->phydev && !ops->get_ethtool_phy_stats) { mcount = phy_ethtool_get_sset_count(dev->phydev); @@ -176,6 +190,7 @@ static void dsa_conduit_get_strings(struct net_device *dev, uint32_t stringset, mcount = 0; ops->get_strings(dev, stringset, data); } + netdev_unlock_ops(dev); if (ds->ops->get_strings) { ndata = data + mcount * len; diff --git a/net/dsa/user.c b/net/dsa/user.c index 291ab1b4acc4..804dc7dac4f2 100644 --- a/net/dsa/user.c +++ b/net/dsa/user.c @@ -897,7 +897,7 @@ static void dsa_skb_tx_timestamp(struct dsa_user_priv *p, { struct dsa_switch *ds = p->dp->ds; - if (!(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) + if (!(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP_NOBPF)) return; if (!ds->ops->port_txtstamp) @@ -1243,16 +1243,25 @@ static int dsa_user_set_eee(struct net_device *dev, struct ethtool_keee *e) if (!ds->ops->support_eee || !ds->ops->support_eee(ds, dp->index)) return -EOPNOTSUPP; - /* Port's PHY and MAC both need to be EEE capable */ - if (!dev->phydev) - return -ENODEV; + /* If the port is using phylink managed EEE, then an unimplemented + * set_mac_eee() is permissible. + */ + if (!phylink_mac_implements_lpi(ds->phylink_mac_ops)) { + /* Port's PHY and MAC both need to be EEE capable */ + if (!dev->phydev) + return -ENODEV; - if (!ds->ops->set_mac_eee) - return -EOPNOTSUPP; + if (!ds->ops->set_mac_eee) + return -EOPNOTSUPP; - ret = ds->ops->set_mac_eee(ds, dp->index, e); - if (ret) - return ret; + ret = ds->ops->set_mac_eee(ds, dp->index, e); + if (ret) + return ret; + } else if (ds->ops->set_mac_eee) { + ret = ds->ops->set_mac_eee(ds, dp->index, e); + if (ret) + return ret; + } return phylink_ethtool_set_eee(dp->pl, e); } diff --git a/net/ethtool/cabletest.c b/net/ethtool/cabletest.c index 84096f6b0236..0364b8fb577b 100644 --- a/net/ethtool/cabletest.c +++ b/net/ethtool/cabletest.c @@ -2,6 +2,7 @@ #include <linux/phy.h> #include <linux/ethtool_netlink.h> +#include <net/netdev_lock.h> #include "netlink.h" #include "common.h" @@ -72,23 +73,24 @@ int ethnl_act_cable_test(struct sk_buff *skb, struct genl_info *info) dev = req_info.dev; rtnl_lock(); + netdev_lock_ops(dev); phydev = ethnl_req_get_phydev(&req_info, tb, ETHTOOL_A_CABLE_TEST_HEADER, info->extack); if (IS_ERR_OR_NULL(phydev)) { ret = -EOPNOTSUPP; - goto out_rtnl; + goto out_unlock; } ops = ethtool_phy_ops; if (!ops || !ops->start_cable_test) { ret = -EOPNOTSUPP; - goto out_rtnl; + goto out_unlock; } ret = ethnl_ops_begin(dev); if (ret < 0) - goto out_rtnl; + goto out_unlock; ret = ops->start_cable_test(phydev, info->extack); @@ -97,7 +99,8 @@ int ethnl_act_cable_test(struct sk_buff *skb, struct genl_info *info) if (!ret) ethnl_cable_test_started(phydev, ETHTOOL_MSG_CABLE_TEST_NTF); -out_rtnl: +out_unlock: + netdev_unlock_ops(dev); rtnl_unlock(); ethnl_parse_header_dev_put(&req_info); return ret; @@ -339,23 +342,24 @@ int ethnl_act_cable_test_tdr(struct sk_buff *skb, struct genl_info *info) goto out_dev_put; rtnl_lock(); + netdev_lock_ops(dev); phydev = ethnl_req_get_phydev(&req_info, tb, ETHTOOL_A_CABLE_TEST_TDR_HEADER, info->extack); if (IS_ERR_OR_NULL(phydev)) { ret = -EOPNOTSUPP; - goto out_rtnl; + goto out_unlock; } ops = ethtool_phy_ops; if (!ops || !ops->start_cable_test_tdr) { ret = -EOPNOTSUPP; - goto out_rtnl; + goto out_unlock; } ret = ethnl_ops_begin(dev); if (ret < 0) - goto out_rtnl; + goto out_unlock; ret = ops->start_cable_test_tdr(phydev, info->extack, &cfg); @@ -365,7 +369,8 @@ int ethnl_act_cable_test_tdr(struct sk_buff *skb, struct genl_info *info) ethnl_cable_test_started(phydev, ETHTOOL_MSG_CABLE_TEST_TDR_NTF); -out_rtnl: +out_unlock: + netdev_unlock_ops(dev); rtnl_unlock(); out_dev_put: ethnl_parse_header_dev_put(&req_info); diff --git a/net/ethtool/cmis_fw_update.c b/net/ethtool/cmis_fw_update.c index 48aef6220f00..df5f344209c4 100644 --- a/net/ethtool/cmis_fw_update.c +++ b/net/ethtool/cmis_fw_update.c @@ -2,6 +2,7 @@ #include <linux/ethtool.h> #include <linux/firmware.h> +#include <net/netdev_lock.h> #include "common.h" #include "module_fw.h" @@ -418,8 +419,13 @@ cmis_fw_update_commit_image(struct ethtool_cmis_cdb *cdb, static int cmis_fw_update_reset(struct net_device *dev) { __u32 reset_data = ETH_RESET_PHY; + int ret; - return dev->ethtool_ops->reset(dev, &reset_data); + netdev_lock_ops(dev); + ret = dev->ethtool_ops->reset(dev, &reset_data); + netdev_unlock_ops(dev); + + return ret; } void diff --git a/net/ethtool/common.c b/net/ethtool/common.c index b97374b508f6..0cb6da1f692a 100644 --- a/net/ethtool/common.c +++ b/net/ethtool/common.c @@ -36,6 +36,7 @@ const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] = { [NETIF_F_TSO_BIT] = "tx-tcp-segmentation", [NETIF_F_GSO_ROBUST_BIT] = "tx-gso-robust", [NETIF_F_TSO_ECN_BIT] = "tx-tcp-ecn-segmentation", + [NETIF_F_GSO_ACCECN_BIT] = "tx-tcp-accecn-segmentation", [NETIF_F_TSO_MANGLEID_BIT] = "tx-tcp-mangleid-segmentation", [NETIF_F_TSO6_BIT] = "tx-tcp6-segmentation", [NETIF_F_FSO_BIT] = "tx-fcoe-segmentation", @@ -214,6 +215,24 @@ const char link_mode_names[][ETH_GSTRING_LEN] = { __DEFINE_LINK_MODE_NAME(10, T1S, Half), __DEFINE_LINK_MODE_NAME(10, T1S_P2MP, Half), __DEFINE_LINK_MODE_NAME(10, T1BRR, Full), + __DEFINE_LINK_MODE_NAME(200000, CR, Full), + __DEFINE_LINK_MODE_NAME(200000, KR, Full), + __DEFINE_LINK_MODE_NAME(200000, DR, Full), + __DEFINE_LINK_MODE_NAME(200000, DR_2, Full), + __DEFINE_LINK_MODE_NAME(200000, SR, Full), + __DEFINE_LINK_MODE_NAME(200000, VR, Full), + __DEFINE_LINK_MODE_NAME(400000, CR2, Full), + __DEFINE_LINK_MODE_NAME(400000, KR2, Full), + __DEFINE_LINK_MODE_NAME(400000, DR2, Full), + __DEFINE_LINK_MODE_NAME(400000, DR2_2, Full), + __DEFINE_LINK_MODE_NAME(400000, SR2, Full), + __DEFINE_LINK_MODE_NAME(400000, VR2, Full), + __DEFINE_LINK_MODE_NAME(800000, CR4, Full), + __DEFINE_LINK_MODE_NAME(800000, KR4, Full), + __DEFINE_LINK_MODE_NAME(800000, DR4, Full), + __DEFINE_LINK_MODE_NAME(800000, DR4_2, Full), + __DEFINE_LINK_MODE_NAME(800000, SR4, Full), + __DEFINE_LINK_MODE_NAME(800000, VR4, Full), }; static_assert(ARRAY_SIZE(link_mode_names) == __ETHTOOL_LINK_MODE_MASK_NBITS); @@ -222,8 +241,11 @@ static_assert(ARRAY_SIZE(link_mode_names) == __ETHTOOL_LINK_MODE_MASK_NBITS); #define __LINK_MODE_LANES_CR4 4 #define __LINK_MODE_LANES_CR8 8 #define __LINK_MODE_LANES_DR 1 +#define __LINK_MODE_LANES_DR_2 1 #define __LINK_MODE_LANES_DR2 2 +#define __LINK_MODE_LANES_DR2_2 2 #define __LINK_MODE_LANES_DR4 4 +#define __LINK_MODE_LANES_DR4_2 4 #define __LINK_MODE_LANES_DR8 8 #define __LINK_MODE_LANES_KR 1 #define __LINK_MODE_LANES_KR2 2 @@ -252,6 +274,9 @@ static_assert(ARRAY_SIZE(link_mode_names) == __ETHTOOL_LINK_MODE_MASK_NBITS); #define __LINK_MODE_LANES_T1L 1 #define __LINK_MODE_LANES_T1S 1 #define __LINK_MODE_LANES_T1S_P2MP 1 +#define __LINK_MODE_LANES_VR 1 +#define __LINK_MODE_LANES_VR2 2 +#define __LINK_MODE_LANES_VR4 4 #define __LINK_MODE_LANES_VR8 8 #define __LINK_MODE_LANES_DR8_2 8 #define __LINK_MODE_LANES_T1BRR 1 @@ -379,8 +404,27 @@ const struct link_mode_info link_mode_params[] = { __DEFINE_LINK_MODE_PARAMS(10, T1S, Half), __DEFINE_LINK_MODE_PARAMS(10, T1S_P2MP, Half), __DEFINE_LINK_MODE_PARAMS(10, T1BRR, Full), + __DEFINE_LINK_MODE_PARAMS(200000, CR, Full), + __DEFINE_LINK_MODE_PARAMS(200000, KR, Full), + __DEFINE_LINK_MODE_PARAMS(200000, DR, Full), + __DEFINE_LINK_MODE_PARAMS(200000, DR_2, Full), + __DEFINE_LINK_MODE_PARAMS(200000, SR, Full), + __DEFINE_LINK_MODE_PARAMS(200000, VR, Full), + __DEFINE_LINK_MODE_PARAMS(400000, CR2, Full), + __DEFINE_LINK_MODE_PARAMS(400000, KR2, Full), + __DEFINE_LINK_MODE_PARAMS(400000, DR2, Full), + __DEFINE_LINK_MODE_PARAMS(400000, DR2_2, Full), + __DEFINE_LINK_MODE_PARAMS(400000, SR2, Full), + __DEFINE_LINK_MODE_PARAMS(400000, VR2, Full), + __DEFINE_LINK_MODE_PARAMS(800000, CR4, Full), + __DEFINE_LINK_MODE_PARAMS(800000, KR4, Full), + __DEFINE_LINK_MODE_PARAMS(800000, DR4, Full), + __DEFINE_LINK_MODE_PARAMS(800000, DR4_2, Full), + __DEFINE_LINK_MODE_PARAMS(800000, SR4, Full), + __DEFINE_LINK_MODE_PARAMS(800000, VR4, Full), }; static_assert(ARRAY_SIZE(link_mode_params) == __ETHTOOL_LINK_MODE_MASK_NBITS); +EXPORT_SYMBOL_GPL(link_mode_params); const char netif_msg_class_names[][ETH_GSTRING_LEN] = { [NETIF_MSG_DRV_BIT] = "drv", @@ -432,6 +476,7 @@ const char sof_timestamping_names[][ETH_GSTRING_LEN] = { [const_ilog2(SOF_TIMESTAMPING_BIND_PHC)] = "bind-phc", [const_ilog2(SOF_TIMESTAMPING_OPT_ID_TCP)] = "option-id-tcp", [const_ilog2(SOF_TIMESTAMPING_OPT_RX_FILTER)] = "option-rx-filter", + [const_ilog2(SOF_TIMESTAMPING_TX_COMPLETION)] = "tx-completion", }; static_assert(ARRAY_SIZE(sof_timestamping_names) == __SOF_TIMESTAMPING_CNT); diff --git a/net/ethtool/common.h b/net/ethtool/common.h index a1088c2441d0..b4683d286a5a 100644 --- a/net/ethtool/common.h +++ b/net/ethtool/common.h @@ -15,12 +15,6 @@ #define __SOF_TIMESTAMPING_CNT (const_ilog2(SOF_TIMESTAMPING_LAST) + 1) #define __HWTSTAMP_FLAG_CNT (const_ilog2(HWTSTAMP_FLAG_LAST) + 1) -struct link_mode_info { - int speed; - u8 lanes; - u8 duplex; -}; - struct genl_info; struct hwtstamp_provider_desc; @@ -33,7 +27,6 @@ tunable_strings[__ETHTOOL_TUNABLE_COUNT][ETH_GSTRING_LEN]; extern const char phy_tunable_strings[__ETHTOOL_PHY_TUNABLE_COUNT][ETH_GSTRING_LEN]; extern const char link_mode_names[][ETH_GSTRING_LEN]; -extern const struct link_mode_info link_mode_params[]; extern const char netif_msg_class_names[][ETH_GSTRING_LEN]; extern const char wol_mode_names[][ETH_GSTRING_LEN]; extern const char sof_timestamping_names[][ETH_GSTRING_LEN]; diff --git a/net/ethtool/features.c b/net/ethtool/features.c index b6cb101d7f19..f2217983be2b 100644 --- a/net/ethtool/features.c +++ b/net/ethtool/features.c @@ -1,5 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only +#include <net/netdev_lock.h> + #include "netlink.h" #include "common.h" #include "bitset.h" @@ -234,9 +236,10 @@ int ethnl_set_features(struct sk_buff *skb, struct genl_info *info) dev = req_info.dev; rtnl_lock(); + netdev_lock_ops(dev); ret = ethnl_ops_begin(dev); if (ret < 0) - goto out_rtnl; + goto out_unlock; ethnl_features_to_bitmap(old_active, dev->features); ethnl_features_to_bitmap(old_wanted, dev->wanted_features); ret = ethnl_parse_bitset(req_wanted, req_mask, NETDEV_FEATURE_COUNT, @@ -286,7 +289,8 @@ int ethnl_set_features(struct sk_buff *skb, struct genl_info *info) out_ops: ethnl_ops_complete(dev); -out_rtnl: +out_unlock: + netdev_unlock_ops(dev); rtnl_unlock(); ethnl_parse_header_dev_put(&req_info); return ret; diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index 1c3ba2247776..221639407c72 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -31,6 +31,7 @@ #include <net/ipv6.h> #include <net/xdp_sock_drv.h> #include <net/flow_offload.h> +#include <net/netdev_lock.h> #include <linux/ethtool_netlink.h> #include "common.h" @@ -992,11 +993,17 @@ static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev, if (rc) return rc; - /* Nonzero ring with RSS only makes sense if NIC adds them together */ - if (cmd == ETHTOOL_SRXCLSRLINS && info.fs.flow_type & FLOW_RSS && - !ops->cap_rss_rxnfc_adds && - ethtool_get_flow_spec_ring(info.fs.ring_cookie)) - return -EINVAL; + if (cmd == ETHTOOL_SRXCLSRLINS && info.fs.flow_type & FLOW_RSS) { + /* Nonzero ring with RSS only makes sense + * if NIC adds them together + */ + if (!ops->cap_rss_rxnfc_adds && + ethtool_get_flow_spec_ring(info.fs.ring_cookie)) + return -EINVAL; + + if (!xa_load(&dev->ethtool->rss_ctx, info.rss_context)) + return -EINVAL; + } if (cmd == ETHTOOL_SRXFH && ops->get_rxfh) { struct ethtool_rxfh_param rxfh = {}; @@ -1005,11 +1012,11 @@ static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev, if (rc) return rc; - /* Sanity check: if symmetric-xor is set, then: + /* Sanity check: if symmetric-xor/symmetric-or-xor is set, then: * 1 - no other fields besides IP src/dst and/or L4 src/dst * 2 - If src is set, dst must also be set */ - if ((rxfh.input_xfrm & RXH_XFRM_SYM_XOR) && + if ((rxfh.input_xfrm & (RXH_XFRM_SYM_XOR | RXH_XFRM_SYM_OR_XOR)) && ((info.data & ~(RXH_IP_SRC | RXH_IP_DST | RXH_L4_B_0_1 | RXH_L4_B_2_3)) || (!!(info.data & RXH_IP_SRC) ^ !!(info.data & RXH_IP_DST)) || @@ -1382,11 +1389,11 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, return -EOPNOTSUPP; /* Check input data transformation capabilities */ if (rxfh.input_xfrm && rxfh.input_xfrm != RXH_XFRM_SYM_XOR && + rxfh.input_xfrm != RXH_XFRM_SYM_OR_XOR && rxfh.input_xfrm != RXH_XFRM_NO_CHANGE) return -EINVAL; if (rxfh.input_xfrm != RXH_XFRM_NO_CHANGE && - (rxfh.input_xfrm & RXH_XFRM_SYM_XOR) && - !ops->cap_rss_sym_xor_supported) + rxfh.input_xfrm & ~ops->supported_input_xfrm) return -EOPNOTSUPP; create = rxfh.rss_context == ETH_RXFH_CONTEXT_ALLOC; @@ -2311,6 +2318,7 @@ static int ethtool_phys_id(struct net_device *dev, void __user *useraddr) */ busy = true; netdev_hold(dev, &dev_tracker, GFP_KERNEL); + netdev_unlock_ops(dev); rtnl_unlock(); if (rc == 0) { @@ -2325,8 +2333,10 @@ static int ethtool_phys_id(struct net_device *dev, void __user *useraddr) do { rtnl_lock(); + netdev_lock_ops(dev); rc = ops->set_phys_id(dev, (i++ & 1) ? ETHTOOL_ID_OFF : ETHTOOL_ID_ON); + netdev_unlock_ops(dev); rtnl_unlock(); if (rc) break; @@ -2335,6 +2345,7 @@ static int ethtool_phys_id(struct net_device *dev, void __user *useraddr) } rtnl_lock(); + netdev_lock_ops(dev); netdev_put(dev, &dev_tracker); busy = false; @@ -3134,6 +3145,7 @@ __dev_ethtool(struct net *net, struct ifreq *ifr, void __user *useraddr, return -EPERM; } + netdev_lock_ops(dev); if (dev->dev.parent) pm_runtime_get_sync(dev->dev.parent); @@ -3367,6 +3379,7 @@ __dev_ethtool(struct net *net, struct ifreq *ifr, void __user *useraddr, out: if (dev->dev.parent) pm_runtime_put(dev->dev.parent); + netdev_unlock_ops(dev); return rc; } diff --git a/net/ethtool/module.c b/net/ethtool/module.c index 6988e07bdcd6..4d4e0a82579a 100644 --- a/net/ethtool/module.c +++ b/net/ethtool/module.c @@ -4,6 +4,7 @@ #include <linux/firmware.h> #include <linux/sfp.h> #include <net/devlink.h> +#include <net/netdev_lock.h> #include "netlink.h" #include "common.h" @@ -419,19 +420,21 @@ int ethnl_act_module_fw_flash(struct sk_buff *skb, struct genl_info *info) dev = req_info.dev; rtnl_lock(); + netdev_lock_ops(dev); ret = ethnl_ops_begin(dev); if (ret < 0) - goto out_rtnl; + goto out_unlock; ret = ethnl_module_fw_flash_validate(dev, info->extack); if (ret < 0) - goto out_rtnl; + goto out_unlock; ret = module_flash_fw(dev, tb, skb, info); ethnl_ops_complete(dev); -out_rtnl: +out_unlock: + netdev_unlock_ops(dev); rtnl_unlock(); ethnl_parse_header_dev_put(&req_info); return ret; diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c index 734849a57369..a163d40c6431 100644 --- a/net/ethtool/netlink.c +++ b/net/ethtool/netlink.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only +#include <net/netdev_lock.h> #include <net/netdev_queues.h> #include <net/sock.h> #include <linux/ethtool_netlink.h> @@ -90,6 +91,8 @@ int ethnl_ops_begin(struct net_device *dev) if (dev->dev.parent) pm_runtime_get_sync(dev->dev.parent); + netdev_ops_assert_locked(dev); + if (!netif_device_present(dev) || dev->reg_state >= NETREG_UNREGISTERING) { ret = -ENODEV; @@ -490,7 +493,11 @@ static int ethnl_default_doit(struct sk_buff *skb, struct genl_info *info) ethnl_init_reply_data(reply_data, ops, req_info->dev); rtnl_lock(); + if (req_info->dev) + netdev_lock_ops(req_info->dev); ret = ops->prepare_data(req_info, reply_data, info); + if (req_info->dev) + netdev_unlock_ops(req_info->dev); rtnl_unlock(); if (ret < 0) goto err_cleanup; @@ -548,7 +555,9 @@ static int ethnl_default_dump_one(struct sk_buff *skb, struct net_device *dev, ethnl_init_reply_data(ctx->reply_data, ctx->ops, dev); rtnl_lock(); + netdev_lock_ops(dev); ret = ctx->ops->prepare_data(ctx->req_info, ctx->reply_data, info); + netdev_unlock_ops(dev); rtnl_unlock(); if (ret < 0) goto out; @@ -693,6 +702,7 @@ static int ethnl_default_set_doit(struct sk_buff *skb, struct genl_info *info) dev = req_info.dev; rtnl_lock(); + netdev_lock_ops(dev); dev->cfg_pending = kmemdup(dev->cfg, sizeof(*dev->cfg), GFP_KERNEL_ACCOUNT); if (!dev->cfg_pending) { @@ -720,6 +730,7 @@ out_free_cfg: kfree(dev->cfg_pending); out_tie_cfg: dev->cfg_pending = dev->cfg; + netdev_unlock_ops(dev); rtnl_unlock(); out_dev: ethnl_parse_header_dev_put(&req_info); @@ -777,6 +788,8 @@ static void ethnl_default_notify(struct net_device *dev, unsigned int cmd, req_info->dev = dev; req_info->flags |= ETHTOOL_FLAG_COMPACT_BITSETS; + netdev_ops_assert_locked(dev); + ethnl_init_reply_data(reply_data, ops, dev); ret = ops->prepare_data(req_info, reply_data, &info); if (ret < 0) diff --git a/net/ethtool/phy.c b/net/ethtool/phy.c index e067cc234419..1f590e8d75ed 100644 --- a/net/ethtool/phy.c +++ b/net/ethtool/phy.c @@ -9,6 +9,7 @@ #include <linux/phy.h> #include <linux/phy_link_topology.h> #include <linux/sfp.h> +#include <net/netdev_lock.h> struct phy_req_info { struct ethnl_req_info base; @@ -158,18 +159,19 @@ int ethnl_phy_doit(struct sk_buff *skb, struct genl_info *info) return ret; rtnl_lock(); + netdev_lock_ops(req_info.base.dev); ret = ethnl_phy_parse_request(&req_info.base, tb, info->extack); if (ret < 0) - goto err_unlock_rtnl; + goto err_unlock; /* No PHY, return early */ if (!req_info.pdn) - goto err_unlock_rtnl; + goto err_unlock; ret = ethnl_phy_reply_size(&req_info.base, info->extack); if (ret < 0) - goto err_unlock_rtnl; + goto err_unlock; reply_len = ret + ethnl_reply_header_size(); rskb = ethnl_reply_init(reply_len, req_info.base.dev, @@ -178,13 +180,14 @@ int ethnl_phy_doit(struct sk_buff *skb, struct genl_info *info) info, &reply_payload); if (!rskb) { ret = -ENOMEM; - goto err_unlock_rtnl; + goto err_unlock; } ret = ethnl_phy_fill_reply(&req_info.base, rskb); if (ret) goto err_free_msg; + netdev_unlock_ops(req_info.base.dev); rtnl_unlock(); ethnl_parse_header_dev_put(&req_info.base); genlmsg_end(rskb, reply_payload); @@ -193,7 +196,8 @@ int ethnl_phy_doit(struct sk_buff *skb, struct genl_info *info) err_free_msg: nlmsg_free(rskb); -err_unlock_rtnl: +err_unlock: + netdev_unlock_ops(req_info.base.dev); rtnl_unlock(); ethnl_parse_header_dev_put(&req_info.base); return ret; @@ -290,10 +294,15 @@ int ethnl_phy_dumpit(struct sk_buff *skb, struct netlink_callback *cb) rtnl_lock(); if (ctx->phy_req_info->base.dev) { - ret = ethnl_phy_dump_one_dev(skb, ctx->phy_req_info->base.dev, cb); + dev = ctx->phy_req_info->base.dev; + netdev_lock_ops(dev); + ret = ethnl_phy_dump_one_dev(skb, dev, cb); + netdev_unlock_ops(dev); } else { for_each_netdev_dump(net, dev, ctx->ifindex) { + netdev_lock_ops(dev); ret = ethnl_phy_dump_one_dev(skb, dev, cb); + netdev_unlock_ops(dev); if (ret) break; diff --git a/net/ethtool/rss.c b/net/ethtool/rss.c index 58df9ad02ce8..6d9b1769896b 100644 --- a/net/ethtool/rss.c +++ b/net/ethtool/rss.c @@ -1,5 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only +#include <net/netdev_lock.h> + #include "netlink.h" #include "common.h" @@ -345,7 +347,9 @@ int ethnl_rss_dumpit(struct sk_buff *skb, struct netlink_callback *cb) if (ctx->match_ifindex && ctx->match_ifindex != ctx->ifindex) break; + netdev_lock_ops(dev); ret = rss_dump_one_dev(skb, cb, dev); + netdev_unlock_ops(dev); if (ret) break; } diff --git a/net/ethtool/tsinfo.c b/net/ethtool/tsinfo.c index ad3866c5a902..8130b406ef10 100644 --- a/net/ethtool/tsinfo.c +++ b/net/ethtool/tsinfo.c @@ -4,6 +4,7 @@ #include <linux/phy.h> #include <linux/phy_link_topology.h> #include <linux/ptp_clock_kernel.h> +#include <net/netdev_lock.h> #include "netlink.h" #include "common.h" @@ -449,12 +450,15 @@ int ethnl_tsinfo_dumpit(struct sk_buff *skb, struct netlink_callback *cb) rtnl_lock(); if (ctx->req_info->base.dev) { - ret = ethnl_tsinfo_dump_one_net_topo(skb, - ctx->req_info->base.dev, - cb); + dev = ctx->req_info->base.dev; + netdev_lock_ops(dev); + ret = ethnl_tsinfo_dump_one_net_topo(skb, dev, cb); + netdev_unlock_ops(dev); } else { for_each_netdev_dump(net, dev, ctx->pos_ifindex) { + netdev_lock_ops(dev); ret = ethnl_tsinfo_dump_one_net_topo(skb, dev, cb); + netdev_unlock_ops(dev); if (ret < 0 && ret != -EOPNOTSUPP) break; ctx->pos_phyindex = 0; diff --git a/net/hsr/Kconfig b/net/hsr/Kconfig index 1b048c17b6c8..fcacdf4f0ffc 100644 --- a/net/hsr/Kconfig +++ b/net/hsr/Kconfig @@ -38,3 +38,21 @@ config HSR relying on this code in a safety critical system! If unsure, say N. + +if HSR + +config PRP_DUP_DISCARD_KUNIT_TEST + tristate "PRP duplicate discard KUnit tests" if !KUNIT_ALL_TESTS + depends on KUNIT + default KUNIT_ALL_TESTS + help + Covers the PRP duplicate discard algorithm. + Only useful for kernel devs running KUnit test harness and are not + for inclusion into a production build. + + For more information on KUnit and unit tests in general please refer + to the KUnit documentation in Documentation/dev-tools/kunit/. + + If unsure, say N. + +endif diff --git a/net/hsr/Makefile b/net/hsr/Makefile index 75df90d3b416..34e581db5c41 100644 --- a/net/hsr/Makefile +++ b/net/hsr/Makefile @@ -8,3 +8,5 @@ obj-$(CONFIG_HSR) += hsr.o hsr-y := hsr_main.o hsr_framereg.o hsr_device.o \ hsr_netlink.o hsr_slave.o hsr_forward.o hsr-$(CONFIG_DEBUG_FS) += hsr_debugfs.o + +obj-$(CONFIG_PRP_DUP_DISCARD_KUNIT_TEST) += prp_dup_discard_test.o diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c index b6fb18469439..439cfb7ad5d1 100644 --- a/net/hsr/hsr_device.c +++ b/net/hsr/hsr_device.c @@ -616,6 +616,7 @@ static struct hsr_proto_ops hsr_ops = { .drop_frame = hsr_drop_frame, .fill_frame_info = hsr_fill_frame_info, .invalid_dan_ingress_frame = hsr_invalid_dan_ingress_frame, + .register_frame_out = hsr_register_frame_out, }; static struct hsr_proto_ops prp_ops = { @@ -626,6 +627,7 @@ static struct hsr_proto_ops prp_ops = { .fill_frame_info = prp_fill_frame_info, .handle_san_frame = prp_handle_san_frame, .update_san_info = prp_update_san_info, + .register_frame_out = prp_register_frame_out, }; void hsr_dev_setup(struct net_device *dev) @@ -643,7 +645,7 @@ void hsr_dev_setup(struct net_device *dev) /* Not sure about this. Taken from bridge code. netdevice.h says * it means "Does not change network namespaces". */ - dev->netns_local = true; + dev->netns_immutable = true; dev->needs_free_netdev = true; diff --git a/net/hsr/hsr_forward.c b/net/hsr/hsr_forward.c index a4bacf198555..c67c0d35921d 100644 --- a/net/hsr/hsr_forward.c +++ b/net/hsr/hsr_forward.c @@ -536,8 +536,8 @@ static void hsr_forward_do(struct hsr_frame_info *frame) * Also for SAN, this shouldn't be done. */ if (!frame->is_from_san && - hsr_register_frame_out(port, frame->node_src, - frame->sequence_nr)) + hsr->proto_ops->register_frame_out && + hsr->proto_ops->register_frame_out(port, frame)) continue; if (frame->is_supervision && port->type == HSR_PT_MASTER && diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c index 73bc6f659812..4ce471a2f387 100644 --- a/net/hsr/hsr_framereg.c +++ b/net/hsr/hsr_framereg.c @@ -35,6 +35,7 @@ static bool seq_nr_after(u16 a, u16 b) #define seq_nr_before(a, b) seq_nr_after((b), (a)) #define seq_nr_before_or_eq(a, b) (!seq_nr_after((a), (b))) +#define PRP_DROP_WINDOW_LEN 32768 bool hsr_addr_is_redbox(struct hsr_priv *hsr, unsigned char *addr) { @@ -176,8 +177,11 @@ static struct hsr_node *hsr_add_node(struct hsr_priv *hsr, new_node->time_in[i] = now; new_node->time_out[i] = now; } - for (i = 0; i < HSR_PT_PORTS; i++) + for (i = 0; i < HSR_PT_PORTS; i++) { new_node->seq_out[i] = seq_out; + new_node->seq_expected[i] = seq_out + 1; + new_node->seq_start[i] = seq_out + 1; + } if (san && hsr->proto_ops->handle_san_frame) hsr->proto_ops->handle_san_frame(san, rx_port, new_node); @@ -482,9 +486,11 @@ void hsr_register_frame_in(struct hsr_node *node, struct hsr_port *port, * 0 otherwise, or * negative error code on error */ -int hsr_register_frame_out(struct hsr_port *port, struct hsr_node *node, - u16 sequence_nr) +int hsr_register_frame_out(struct hsr_port *port, struct hsr_frame_info *frame) { + struct hsr_node *node = frame->node_src; + u16 sequence_nr = frame->sequence_nr; + spin_lock_bh(&node->seq_out_lock); if (seq_nr_before_or_eq(sequence_nr, node->seq_out[port->type]) && time_is_after_jiffies(node->time_out[port->type] + @@ -499,6 +505,93 @@ int hsr_register_frame_out(struct hsr_port *port, struct hsr_node *node, return 0; } +/* Adaptation of the PRP duplicate discard algorithm described in wireshark + * wiki (https://wiki.wireshark.org/PRP) + * + * A drop window is maintained for both LANs with start sequence set to the + * first sequence accepted on the LAN that has not been seen on the other LAN, + * and expected sequence set to the latest received sequence number plus one. + * + * When a frame is received on either LAN it is compared against the received + * frames on the other LAN. If it is outside the drop window of the other LAN + * the frame is accepted and the drop window is updated. + * The drop window for the other LAN is reset. + * + * 'port' is the outgoing interface + * 'frame' is the frame to be sent + * + * Return: + * 1 if frame can be shown to have been sent recently on this interface, + * 0 otherwise + */ +int prp_register_frame_out(struct hsr_port *port, struct hsr_frame_info *frame) +{ + enum hsr_port_type other_port; + enum hsr_port_type rcv_port; + struct hsr_node *node; + u16 sequence_diff; + u16 sequence_exp; + u16 sequence_nr; + + /* out-going frames are always in order + * and can be checked the same way as for HSR + */ + if (frame->port_rcv->type == HSR_PT_MASTER) + return hsr_register_frame_out(port, frame); + + /* for PRP we should only forward frames from the slave ports + * to the master port + */ + if (port->type != HSR_PT_MASTER) + return 1; + + node = frame->node_src; + sequence_nr = frame->sequence_nr; + sequence_exp = sequence_nr + 1; + rcv_port = frame->port_rcv->type; + other_port = rcv_port == HSR_PT_SLAVE_A ? HSR_PT_SLAVE_B : + HSR_PT_SLAVE_A; + + spin_lock_bh(&node->seq_out_lock); + if (time_is_before_jiffies(node->time_out[port->type] + + msecs_to_jiffies(HSR_ENTRY_FORGET_TIME)) || + (node->seq_start[rcv_port] == node->seq_expected[rcv_port] && + node->seq_start[other_port] == node->seq_expected[other_port])) { + /* the node hasn't been sending for a while + * or both drop windows are empty, forward the frame + */ + node->seq_start[rcv_port] = sequence_nr; + } else if (seq_nr_before(sequence_nr, node->seq_expected[other_port]) && + seq_nr_before_or_eq(node->seq_start[other_port], sequence_nr)) { + /* drop the frame, update the drop window for the other port + * and reset our drop window + */ + node->seq_start[other_port] = sequence_exp; + node->seq_expected[rcv_port] = sequence_exp; + node->seq_start[rcv_port] = node->seq_expected[rcv_port]; + spin_unlock_bh(&node->seq_out_lock); + return 1; + } + + /* update the drop window for the port where this frame was received + * and clear the drop window for the other port + */ + node->seq_start[other_port] = node->seq_expected[other_port]; + node->seq_expected[rcv_port] = sequence_exp; + sequence_diff = sequence_exp - node->seq_start[rcv_port]; + if (sequence_diff > PRP_DROP_WINDOW_LEN) + node->seq_start[rcv_port] = sequence_exp - PRP_DROP_WINDOW_LEN; + + node->time_out[port->type] = jiffies; + node->seq_out[port->type] = sequence_nr; + spin_unlock_bh(&node->seq_out_lock); + return 0; +} + +#if IS_MODULE(CONFIG_PRP_DUP_DISCARD_KUNIT_TEST) +EXPORT_SYMBOL(prp_register_frame_out); +#endif + static struct hsr_port *get_late_port(struct hsr_priv *hsr, struct hsr_node *node) { diff --git a/net/hsr/hsr_framereg.h b/net/hsr/hsr_framereg.h index 993fa950d814..b04948659d84 100644 --- a/net/hsr/hsr_framereg.h +++ b/net/hsr/hsr_framereg.h @@ -44,8 +44,7 @@ void hsr_addr_subst_dest(struct hsr_node *node_src, struct sk_buff *skb, void hsr_register_frame_in(struct hsr_node *node, struct hsr_port *port, u16 sequence_nr); -int hsr_register_frame_out(struct hsr_port *port, struct hsr_node *node, - u16 sequence_nr); +int hsr_register_frame_out(struct hsr_port *port, struct hsr_frame_info *frame); void hsr_prune_nodes(struct timer_list *t); void hsr_prune_proxy_nodes(struct timer_list *t); @@ -73,6 +72,8 @@ void prp_update_san_info(struct hsr_node *node, bool is_sup); bool hsr_is_node_in_db(struct list_head *node_db, const unsigned char addr[ETH_ALEN]); +int prp_register_frame_out(struct hsr_port *port, struct hsr_frame_info *frame); + struct hsr_node { struct list_head mac_list; /* Protect R/W access to seq_out */ @@ -89,6 +90,9 @@ struct hsr_node { bool san_b; u16 seq_out[HSR_PT_PORTS]; bool removed; + /* PRP specific duplicate handling */ + u16 seq_expected[HSR_PT_PORTS]; + u16 seq_start[HSR_PT_PORTS]; struct rcu_head rcu_head; }; diff --git a/net/hsr/hsr_main.h b/net/hsr/hsr_main.h index 7561845b8bf6..1bc47b17a296 100644 --- a/net/hsr/hsr_main.h +++ b/net/hsr/hsr_main.h @@ -175,6 +175,8 @@ struct hsr_proto_ops { struct hsr_frame_info *frame); bool (*invalid_dan_ingress_frame)(__be16 protocol); void (*update_san_info)(struct hsr_node *node, bool is_sup); + int (*register_frame_out)(struct hsr_port *port, + struct hsr_frame_info *frame); }; struct hsr_self_node { diff --git a/net/hsr/hsr_netlink.c b/net/hsr/hsr_netlink.c index b68f2f71d0e1..b120470246cc 100644 --- a/net/hsr/hsr_netlink.c +++ b/net/hsr/hsr_netlink.c @@ -29,10 +29,12 @@ static const struct nla_policy hsr_policy[IFLA_HSR_MAX + 1] = { /* Here, it seems a netdevice has already been allocated for us, and the * hsr_dev_setup routine has been executed. Nice! */ -static int hsr_newlink(struct net *src_net, struct net_device *dev, - struct nlattr *tb[], struct nlattr *data[], +static int hsr_newlink(struct net_device *dev, + struct rtnl_newlink_params *params, struct netlink_ext_ack *extack) { + struct net *link_net = rtnl_newlink_link_net(params); + struct nlattr **data = params->data; enum hsr_version proto_version; unsigned char multicast_spec; u8 proto = HSR_PROTOCOL_HSR; @@ -46,7 +48,7 @@ static int hsr_newlink(struct net *src_net, struct net_device *dev, NL_SET_ERR_MSG_MOD(extack, "Slave1 device not specified"); return -EINVAL; } - link[0] = __dev_get_by_index(src_net, + link[0] = __dev_get_by_index(link_net, nla_get_u32(data[IFLA_HSR_SLAVE1])); if (!link[0]) { NL_SET_ERR_MSG_MOD(extack, "Slave1 does not exist"); @@ -56,7 +58,7 @@ static int hsr_newlink(struct net *src_net, struct net_device *dev, NL_SET_ERR_MSG_MOD(extack, "Slave2 device not specified"); return -EINVAL; } - link[1] = __dev_get_by_index(src_net, + link[1] = __dev_get_by_index(link_net, nla_get_u32(data[IFLA_HSR_SLAVE2])); if (!link[1]) { NL_SET_ERR_MSG_MOD(extack, "Slave2 does not exist"); @@ -69,7 +71,7 @@ static int hsr_newlink(struct net *src_net, struct net_device *dev, } if (data[IFLA_HSR_INTERLINK]) - interlink = __dev_get_by_index(src_net, + interlink = __dev_get_by_index(link_net, nla_get_u32(data[IFLA_HSR_INTERLINK])); if (interlink && interlink == link[0]) { diff --git a/net/hsr/prp_dup_discard_test.c b/net/hsr/prp_dup_discard_test.c new file mode 100644 index 000000000000..e86b7b633ae8 --- /dev/null +++ b/net/hsr/prp_dup_discard_test.c @@ -0,0 +1,212 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <kunit/test.h> + +#include "hsr_main.h" +#include "hsr_framereg.h" + +struct prp_test_data { + struct hsr_port port; + struct hsr_port port_rcv; + struct hsr_frame_info frame; + struct hsr_node node; +}; + +static struct prp_test_data *build_prp_test_data(struct kunit *test) +{ + struct prp_test_data *data = kunit_kzalloc(test, + sizeof(struct prp_test_data), GFP_USER); + KUNIT_EXPECT_NOT_ERR_OR_NULL(test, data); + + data->frame.node_src = &data->node; + data->frame.port_rcv = &data->port_rcv; + data->port_rcv.type = HSR_PT_SLAVE_A; + data->node.seq_start[HSR_PT_SLAVE_A] = 1; + data->node.seq_expected[HSR_PT_SLAVE_A] = 1; + data->node.seq_start[HSR_PT_SLAVE_B] = 1; + data->node.seq_expected[HSR_PT_SLAVE_B] = 1; + data->node.seq_out[HSR_PT_MASTER] = 0; + data->node.time_out[HSR_PT_MASTER] = jiffies; + data->port.type = HSR_PT_MASTER; + + return data; +} + +static void check_prp_counters(struct kunit *test, + struct prp_test_data *data, + u16 seq_start_a, u16 seq_expected_a, + u16 seq_start_b, u16 seq_expected_b) +{ + KUNIT_EXPECT_EQ(test, data->node.seq_start[HSR_PT_SLAVE_A], + seq_start_a); + KUNIT_EXPECT_EQ(test, data->node.seq_start[HSR_PT_SLAVE_B], + seq_start_b); + KUNIT_EXPECT_EQ(test, data->node.seq_expected[HSR_PT_SLAVE_A], + seq_expected_a); + KUNIT_EXPECT_EQ(test, data->node.seq_expected[HSR_PT_SLAVE_B], + seq_expected_b); +} + +static void prp_dup_discard_forward(struct kunit *test) +{ + /* Normal situation, both LANs in sync. Next frame is forwarded */ + struct prp_test_data *data = build_prp_test_data(test); + + data->frame.sequence_nr = 2; + KUNIT_EXPECT_EQ(test, 0, + prp_register_frame_out(&data->port, &data->frame)); + KUNIT_EXPECT_EQ(test, data->frame.sequence_nr, + data->node.seq_out[HSR_PT_MASTER]); + KUNIT_EXPECT_EQ(test, jiffies, data->node.time_out[HSR_PT_MASTER]); + check_prp_counters(test, data, data->frame.sequence_nr, + data->frame.sequence_nr + 1, 1, 1); +} + +static void prp_dup_discard_inside_dropwindow(struct kunit *test) +{ + /* Normal situation, other LAN ahead by one. Frame is dropped */ + struct prp_test_data *data = build_prp_test_data(test); + unsigned long time = jiffies - 10; + + data->frame.sequence_nr = 1; + data->node.seq_expected[HSR_PT_SLAVE_B] = 3; + data->node.seq_out[HSR_PT_MASTER] = 2; + data->node.time_out[HSR_PT_MASTER] = time; + + KUNIT_EXPECT_EQ(test, 1, + prp_register_frame_out(&data->port, &data->frame)); + KUNIT_EXPECT_EQ(test, 2, data->node.seq_out[HSR_PT_MASTER]); + KUNIT_EXPECT_EQ(test, time, data->node.time_out[HSR_PT_MASTER]); + check_prp_counters(test, data, 2, 2, 2, 3); +} + +static void prp_dup_discard_node_timeout(struct kunit *test) +{ + /* Timeout situation, node hasn't sent anything for a while */ + struct prp_test_data *data = build_prp_test_data(test); + + data->frame.sequence_nr = 7; + data->node.seq_start[HSR_PT_SLAVE_A] = 1234; + data->node.seq_expected[HSR_PT_SLAVE_A] = 1235; + data->node.seq_start[HSR_PT_SLAVE_B] = 1234; + data->node.seq_expected[HSR_PT_SLAVE_B] = 1234; + data->node.seq_out[HSR_PT_MASTER] = 1234; + data->node.time_out[HSR_PT_MASTER] = + jiffies - msecs_to_jiffies(HSR_ENTRY_FORGET_TIME) - 1; + + KUNIT_EXPECT_EQ(test, 0, + prp_register_frame_out(&data->port, &data->frame)); + KUNIT_EXPECT_EQ(test, data->frame.sequence_nr, + data->node.seq_out[HSR_PT_MASTER]); + KUNIT_EXPECT_EQ(test, jiffies, data->node.time_out[HSR_PT_MASTER]); + check_prp_counters(test, data, data->frame.sequence_nr, + data->frame.sequence_nr + 1, 1234, 1234); +} + +static void prp_dup_discard_out_of_sequence(struct kunit *test) +{ + /* One frame is received out of sequence on both LANs */ + struct prp_test_data *data = build_prp_test_data(test); + + data->node.seq_start[HSR_PT_SLAVE_A] = 10; + data->node.seq_expected[HSR_PT_SLAVE_A] = 10; + data->node.seq_start[HSR_PT_SLAVE_B] = 10; + data->node.seq_expected[HSR_PT_SLAVE_B] = 10; + data->node.seq_out[HSR_PT_MASTER] = 9; + + /* 1st old frame, should be accepted */ + data->frame.sequence_nr = 8; + KUNIT_EXPECT_EQ(test, 0, + prp_register_frame_out(&data->port, &data->frame)); + KUNIT_EXPECT_EQ(test, data->frame.sequence_nr, + data->node.seq_out[HSR_PT_MASTER]); + check_prp_counters(test, data, data->frame.sequence_nr, + data->frame.sequence_nr + 1, 10, 10); + + /* 2nd frame should be dropped */ + data->frame.sequence_nr = 8; + data->port_rcv.type = HSR_PT_SLAVE_B; + KUNIT_EXPECT_EQ(test, 1, + prp_register_frame_out(&data->port, &data->frame)); + check_prp_counters(test, data, data->frame.sequence_nr + 1, + data->frame.sequence_nr + 1, + data->frame.sequence_nr + 1, + data->frame.sequence_nr + 1); + + /* Next frame, this is forwarded */ + data->frame.sequence_nr = 10; + data->port_rcv.type = HSR_PT_SLAVE_A; + KUNIT_EXPECT_EQ(test, 0, + prp_register_frame_out(&data->port, &data->frame)); + KUNIT_EXPECT_EQ(test, data->frame.sequence_nr, + data->node.seq_out[HSR_PT_MASTER]); + check_prp_counters(test, data, data->frame.sequence_nr, + data->frame.sequence_nr + 1, 9, 9); + + /* and next one is dropped */ + data->frame.sequence_nr = 10; + data->port_rcv.type = HSR_PT_SLAVE_B; + KUNIT_EXPECT_EQ(test, 1, + prp_register_frame_out(&data->port, &data->frame)); + check_prp_counters(test, data, data->frame.sequence_nr + 1, + data->frame.sequence_nr + 1, + data->frame.sequence_nr + 1, + data->frame.sequence_nr + 1); +} + +static void prp_dup_discard_lan_b_late(struct kunit *test) +{ + /* LAN B is behind */ + struct prp_test_data *data = build_prp_test_data(test); + + data->node.seq_start[HSR_PT_SLAVE_A] = 9; + data->node.seq_expected[HSR_PT_SLAVE_A] = 9; + data->node.seq_start[HSR_PT_SLAVE_B] = 9; + data->node.seq_expected[HSR_PT_SLAVE_B] = 9; + data->node.seq_out[HSR_PT_MASTER] = 8; + + data->frame.sequence_nr = 9; + KUNIT_EXPECT_EQ(test, 0, + prp_register_frame_out(&data->port, &data->frame)); + KUNIT_EXPECT_EQ(test, data->frame.sequence_nr, + data->node.seq_out[HSR_PT_MASTER]); + check_prp_counters(test, data, 9, 10, 9, 9); + + data->frame.sequence_nr = 10; + KUNIT_EXPECT_EQ(test, 0, + prp_register_frame_out(&data->port, &data->frame)); + KUNIT_EXPECT_EQ(test, data->frame.sequence_nr, + data->node.seq_out[HSR_PT_MASTER]); + check_prp_counters(test, data, 9, 11, 9, 9); + + data->frame.sequence_nr = 9; + data->port_rcv.type = HSR_PT_SLAVE_B; + KUNIT_EXPECT_EQ(test, 1, + prp_register_frame_out(&data->port, &data->frame)); + check_prp_counters(test, data, 10, 11, 10, 10); + + data->frame.sequence_nr = 10; + data->port_rcv.type = HSR_PT_SLAVE_B; + KUNIT_EXPECT_EQ(test, 1, + prp_register_frame_out(&data->port, &data->frame)); + check_prp_counters(test, data, 11, 11, 11, 11); +} + +static struct kunit_case prp_dup_discard_test_cases[] = { + KUNIT_CASE(prp_dup_discard_forward), + KUNIT_CASE(prp_dup_discard_inside_dropwindow), + KUNIT_CASE(prp_dup_discard_node_timeout), + KUNIT_CASE(prp_dup_discard_out_of_sequence), + KUNIT_CASE(prp_dup_discard_lan_b_late), + {} +}; + +static struct kunit_suite prp_dup_discard_suite = { + .name = "prp_duplicate_discard", + .test_cases = prp_dup_discard_test_cases, +}; + +kunit_test_suite(prp_dup_discard_suite); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("KUnit tests for PRP duplicate discard"); +MODULE_AUTHOR("Jaakko Karrenpalo <jkarrenpalo@gmail.com>"); diff --git a/net/ieee802154/6lowpan/core.c b/net/ieee802154/6lowpan/core.c index 175efd860f7b..018929563c6b 100644 --- a/net/ieee802154/6lowpan/core.c +++ b/net/ieee802154/6lowpan/core.c @@ -50,6 +50,7 @@ #include <linux/if_arp.h> #include <net/ipv6.h> +#include <net/netdev_lock.h> #include "6lowpan_i.h" @@ -116,7 +117,7 @@ static void lowpan_setup(struct net_device *ldev) ldev->netdev_ops = &lowpan_netdev_ops; ldev->header_ops = &lowpan_header_ops; ldev->needs_free_netdev = true; - ldev->netns_local = true; + ldev->netns_immutable = true; } static int lowpan_validate(struct nlattr *tb[], struct nlattr *data[], @@ -129,10 +130,11 @@ static int lowpan_validate(struct nlattr *tb[], struct nlattr *data[], return 0; } -static int lowpan_newlink(struct net *src_net, struct net_device *ldev, - struct nlattr *tb[], struct nlattr *data[], +static int lowpan_newlink(struct net_device *ldev, + struct rtnl_newlink_params *params, struct netlink_ext_ack *extack) { + struct nlattr **tb = params->tb; struct net_device *wdev; int ret; @@ -142,6 +144,8 @@ static int lowpan_newlink(struct net *src_net, struct net_device *ldev, if (!tb[IFLA_LINK]) return -EINVAL; + if (params->link_net && !net_eq(params->link_net, dev_net(ldev))) + return -EINVAL; /* find and hold wpan device */ wdev = dev_get_by_index(dev_net(ldev), nla_get_u32(tb[IFLA_LINK])); if (!wdev) diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c index 867d637d86f0..d4b983d17038 100644 --- a/net/ieee802154/6lowpan/reassembly.c +++ b/net/ieee802154/6lowpan/reassembly.c @@ -31,7 +31,8 @@ static const char lowpan_frags_cache_name[] = "lowpan-frags"; static struct inet_frags lowpan_frags; static int lowpan_frag_reasm(struct lowpan_frag_queue *fq, struct sk_buff *skb, - struct sk_buff *prev, struct net_device *ldev); + struct sk_buff *prev, struct net_device *ldev, + int *refs); static void lowpan_frag_init(struct inet_frag_queue *q, const void *a) { @@ -45,6 +46,7 @@ static void lowpan_frag_expire(struct timer_list *t) { struct inet_frag_queue *frag = from_timer(frag, t, timer); struct frag_queue *fq; + int refs = 1; fq = container_of(frag, struct frag_queue, q); @@ -53,10 +55,10 @@ static void lowpan_frag_expire(struct timer_list *t) if (fq->q.flags & INET_FRAG_COMPLETE) goto out; - inet_frag_kill(&fq->q); + inet_frag_kill(&fq->q, &refs); out: spin_unlock(&fq->q.lock); - inet_frag_put(&fq->q); + inet_frag_putn(&fq->q, refs); } static inline struct lowpan_frag_queue * @@ -82,7 +84,8 @@ fq_find(struct net *net, const struct lowpan_802154_cb *cb, } static int lowpan_frag_queue(struct lowpan_frag_queue *fq, - struct sk_buff *skb, u8 frag_type) + struct sk_buff *skb, u8 frag_type, + int *refs) { struct sk_buff *prev_tail; struct net_device *ldev; @@ -143,7 +146,7 @@ static int lowpan_frag_queue(struct lowpan_frag_queue *fq, unsigned long orefdst = skb->_skb_refdst; skb->_skb_refdst = 0UL; - res = lowpan_frag_reasm(fq, skb, prev_tail, ldev); + res = lowpan_frag_reasm(fq, skb, prev_tail, ldev, refs); skb->_skb_refdst = orefdst; return res; } @@ -162,11 +165,12 @@ err: * the last and the first frames arrived and all the bits are here. */ static int lowpan_frag_reasm(struct lowpan_frag_queue *fq, struct sk_buff *skb, - struct sk_buff *prev_tail, struct net_device *ldev) + struct sk_buff *prev_tail, struct net_device *ldev, + int *refs) { void *reasm_data; - inet_frag_kill(&fq->q); + inet_frag_kill(&fq->q, refs); reasm_data = inet_frag_reasm_prepare(&fq->q, skb, prev_tail); if (!reasm_data) @@ -300,17 +304,20 @@ int lowpan_frag_rcv(struct sk_buff *skb, u8 frag_type) goto err; } + rcu_read_lock(); fq = fq_find(net, cb, &hdr.source, &hdr.dest); if (fq != NULL) { - int ret; + int ret, refs = 0; spin_lock(&fq->q.lock); - ret = lowpan_frag_queue(fq, skb, frag_type); + ret = lowpan_frag_queue(fq, skb, frag_type, &refs); spin_unlock(&fq->q.lock); - inet_frag_put(&fq->q); + rcu_read_unlock(); + inet_frag_putn(&fq->q, refs); return ret; } + rcu_read_unlock(); err: kfree_skb(skb); diff --git a/net/ieee802154/core.c b/net/ieee802154/core.c index 88adb04e4072..89b671b12600 100644 --- a/net/ieee802154/core.c +++ b/net/ieee802154/core.c @@ -226,11 +226,11 @@ int cfg802154_switch_netns(struct cfg802154_registered_device *rdev, list_for_each_entry(wpan_dev, &rdev->wpan_dev_list, list) { if (!wpan_dev->netdev) continue; - wpan_dev->netdev->netns_local = false; + wpan_dev->netdev->netns_immutable = false; err = dev_change_net_namespace(wpan_dev->netdev, net, "wpan%d"); if (err) break; - wpan_dev->netdev->netns_local = true; + wpan_dev->netdev->netns_immutable = true; } if (err) { @@ -242,11 +242,11 @@ int cfg802154_switch_netns(struct cfg802154_registered_device *rdev, list) { if (!wpan_dev->netdev) continue; - wpan_dev->netdev->netns_local = false; + wpan_dev->netdev->netns_immutable = false; err = dev_change_net_namespace(wpan_dev->netdev, net, "wpan%d"); WARN_ON(err); - wpan_dev->netdev->netns_local = true; + wpan_dev->netdev->netns_immutable = true; } return err; @@ -291,7 +291,7 @@ static int cfg802154_netdev_notifier_call(struct notifier_block *nb, switch (state) { /* TODO NETDEV_DEVTYPE */ case NETDEV_REGISTER: - dev->netns_local = true; + dev->netns_immutable = true; wpan_dev->identifier = ++rdev->wpan_dev_id; list_add_rcu(&wpan_dev->list, &rdev->wpan_dev_list); rdev->devlist_generation++; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 21f46ee7b6e9..5df1f1325259 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -153,7 +153,7 @@ void inet_sock_destruct(struct sock *sk) WARN_ON_ONCE(atomic_read(&sk->sk_rmem_alloc)); WARN_ON_ONCE(refcount_read(&sk->sk_wmem_alloc)); WARN_ON_ONCE(sk->sk_wmem_queued); - WARN_ON_ONCE(sk_forward_alloc_get(sk)); + WARN_ON_ONCE(sk->sk_forward_alloc); kfree(rcu_dereference_protected(inet->inet_opt, 1)); dst_release(rcu_dereference_protected(sk->sk_dst_cache, 1)); diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 814300eee39d..a648fff71ea7 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -1064,8 +1064,8 @@ static int arp_req_set_proxy(struct net *net, struct net_device *dev, int on) IPV4_DEVCONF_ALL(net, PROXY_ARP) = on; return 0; } - if (__in_dev_get_rtnl(dev)) { - IN_DEV_CONF_SET(__in_dev_get_rtnl(dev), PROXY_ARP, on); + if (__in_dev_get_rtnl_net(dev)) { + IN_DEV_CONF_SET(__in_dev_get_rtnl_net(dev), PROXY_ARP, on); return 0; } return -ENXIO; @@ -1295,14 +1295,14 @@ int arp_ioctl(struct net *net, unsigned int cmd, void __user *arg) switch (cmd) { case SIOCDARP: - rtnl_lock(); + rtnl_net_lock(net); err = arp_req_delete(net, &r); - rtnl_unlock(); + rtnl_net_unlock(net); break; case SIOCSARP: - rtnl_lock(); + rtnl_net_lock(net); err = arp_req_set(net, &r); - rtnl_unlock(); + rtnl_net_unlock(net); break; case SIOCGARP: rcu_read_lock(); diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c index 554804774628..e01492234b0b 100644 --- a/net/ipv4/bpf_tcp_ca.c +++ b/net/ipv4/bpf_tcp_ca.c @@ -121,7 +121,7 @@ static int bpf_tcp_ca_btf_struct_access(struct bpf_verifier_log *log, BPF_CALL_2(bpf_tcp_send_ack, struct tcp_sock *, tp, u32, rcv_nxt) { /* bpf_tcp_ca prog cannot have NULL tp */ - __tcp_send_ack((struct sock *)tp, rcv_nxt); + __tcp_send_ack((struct sock *)tp, rcv_nxt, 0); return 0; } diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 55b8151759bc..754f60fb6e25 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -46,6 +46,7 @@ #include <linux/notifier.h> #include <linux/inetdevice.h> #include <linux/igmp.h> +#include "igmp_internal.h" #include <linux/slab.h> #include <linux/hash.h> #ifdef CONFIG_SYSCTL @@ -107,15 +108,6 @@ static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = { [IFA_PROTO] = { .type = NLA_U8 }, }; -struct inet_fill_args { - u32 portid; - u32 seq; - int event; - unsigned int flags; - int netnsid; - int ifindex; -}; - #define IN4_ADDR_HSIZE_SHIFT 8 #define IN4_ADDR_HSIZE (1U << IN4_ADDR_HSIZE_SHIFT) @@ -1847,9 +1839,38 @@ static int inet_valid_dump_ifaddr_req(const struct nlmsghdr *nlh, return 0; } -static int in_dev_dump_addr(struct in_device *in_dev, struct sk_buff *skb, - struct netlink_callback *cb, int *s_ip_idx, - struct inet_fill_args *fillargs) +static int in_dev_dump_ifmcaddr(struct in_device *in_dev, struct sk_buff *skb, + struct netlink_callback *cb, int *s_ip_idx, + struct inet_fill_args *fillargs) +{ + struct ip_mc_list *im; + int ip_idx = 0; + int err; + + for (im = rcu_dereference(in_dev->mc_list); + im; + im = rcu_dereference(im->next_rcu)) { + if (ip_idx < *s_ip_idx) { + ip_idx++; + continue; + } + err = inet_fill_ifmcaddr(skb, in_dev->dev, im, fillargs); + if (err < 0) + goto done; + + nl_dump_check_consistent(cb, nlmsg_hdr(skb)); + ip_idx++; + } + err = 0; + ip_idx = 0; +done: + *s_ip_idx = ip_idx; + return err; +} + +static int in_dev_dump_ifaddr(struct in_device *in_dev, struct sk_buff *skb, + struct netlink_callback *cb, int *s_ip_idx, + struct inet_fill_args *fillargs) { struct in_ifaddr *ifa; int ip_idx = 0; @@ -1875,6 +1896,21 @@ done: return err; } +static int in_dev_dump_addr(struct in_device *in_dev, struct sk_buff *skb, + struct netlink_callback *cb, int *s_ip_idx, + struct inet_fill_args *fillargs) +{ + switch (fillargs->event) { + case RTM_NEWADDR: + return in_dev_dump_ifaddr(in_dev, skb, cb, s_ip_idx, fillargs); + case RTM_GETMULTICAST: + return in_dev_dump_ifmcaddr(in_dev, skb, cb, s_ip_idx, + fillargs); + default: + return -EINVAL; + } +} + /* Combine dev_addr_genid and dev_base_seq to detect changes. */ static u32 inet_base_seq(const struct net *net) @@ -1890,13 +1926,14 @@ static u32 inet_base_seq(const struct net *net) return res; } -static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) +static int inet_dump_addr(struct sk_buff *skb, struct netlink_callback *cb, + int event) { const struct nlmsghdr *nlh = cb->nlh; struct inet_fill_args fillargs = { .portid = NETLINK_CB(cb->skb).portid, .seq = nlh->nlmsg_seq, - .event = RTM_NEWADDR, + .event = event, .flags = NLM_F_MULTI, .netnsid = -1, }; @@ -1950,6 +1987,16 @@ done: return err; } +static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) +{ + return inet_dump_addr(skb, cb, RTM_NEWADDR); +} + +static int inet_dump_ifmcaddr(struct sk_buff *skb, struct netlink_callback *cb) +{ + return inet_dump_addr(skb, cb, RTM_GETMULTICAST); +} + static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh, u32 portid) { @@ -2846,6 +2893,8 @@ static const struct rtnl_msg_handler devinet_rtnl_msg_handlers[] __initconst = { {.protocol = PF_INET, .msgtype = RTM_GETNETCONF, .doit = inet_netconf_get_devconf, .dumpit = inet_netconf_dump_devconf, .flags = RTNL_FLAG_DOIT_UNLOCKED | RTNL_FLAG_DUMP_UNLOCKED}, + {.owner = THIS_MODULE, .protocol = PF_INET, .msgtype = RTM_GETMULTICAST, + .dumpit = inet_dump_ifmcaddr, .flags = RTNL_FLAG_DUMP_UNLOCKED}, }; void __init devinet_init(void) diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 272e42d81323..3f4e629998fa 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -553,18 +553,16 @@ static int rtentry_to_fib_config(struct net *net, int cmd, struct rtentry *rt, const struct in_ifaddr *ifa; struct in_device *in_dev; - in_dev = __in_dev_get_rtnl(dev); + in_dev = __in_dev_get_rtnl_net(dev); if (!in_dev) return -ENODEV; *colon = ':'; - rcu_read_lock(); - in_dev_for_each_ifa_rcu(ifa, in_dev) { + in_dev_for_each_ifa_rtnl_net(net, ifa, in_dev) { if (strcmp(ifa->ifa_label, devname) == 0) break; } - rcu_read_unlock(); if (!ifa) return -ENODEV; @@ -635,7 +633,7 @@ int ip_rt_ioctl(struct net *net, unsigned int cmd, struct rtentry *rt) if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) return -EPERM; - rtnl_lock(); + rtnl_net_lock(net); err = rtentry_to_fib_config(net, cmd, rt, &cfg); if (err == 0) { struct fib_table *tb; @@ -659,7 +657,7 @@ int ip_rt_ioctl(struct net *net, unsigned int cmd, struct rtentry *rt) /* allocated by rtentry_to_fib_config() */ kfree(cfg.fc_mx); } - rtnl_unlock(); + rtnl_net_unlock(net); return err; } return -EINVAL; @@ -809,7 +807,7 @@ static int rtm_to_fib_config(struct net *net, struct sk_buff *skb, case RTA_MULTIPATH: err = lwtunnel_valid_encap_type_attr(nla_data(attr), nla_len(attr), - extack); + extack, false); if (err < 0) goto errout; cfg->fc_mp = nla_data(attr); @@ -827,7 +825,7 @@ static int rtm_to_fib_config(struct net *net, struct sk_buff *skb, case RTA_ENCAP_TYPE: cfg->fc_encap_type = nla_get_u16(attr); err = lwtunnel_valid_encap_type(cfg->fc_encap_type, - extack); + extack, false); if (err < 0) goto errout; break; @@ -837,19 +835,33 @@ static int rtm_to_fib_config(struct net *net, struct sk_buff *skb, } } + if (cfg->fc_dst_len > 32) { + NL_SET_ERR_MSG(extack, "Invalid prefix length"); + err = -EINVAL; + goto errout; + } + + if (cfg->fc_dst_len < 32 && (ntohl(cfg->fc_dst) << cfg->fc_dst_len)) { + NL_SET_ERR_MSG(extack, "Invalid prefix for given prefix length"); + err = -EINVAL; + goto errout; + } + if (cfg->fc_nh_id) { if (cfg->fc_oif || cfg->fc_gw_family || cfg->fc_encap || cfg->fc_mp) { NL_SET_ERR_MSG(extack, "Nexthop specification and nexthop id are mutually exclusive"); - return -EINVAL; + err = -EINVAL; + goto errout; } } if (has_gw && has_via) { NL_SET_ERR_MSG(extack, "Nexthop configuration can not contain both GATEWAY and VIA"); - return -EINVAL; + err = -EINVAL; + goto errout; } if (!cfg->fc_table) @@ -872,20 +884,24 @@ static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, if (err < 0) goto errout; + rtnl_net_lock(net); + if (cfg.fc_nh_id && !nexthop_find_by_id(net, cfg.fc_nh_id)) { NL_SET_ERR_MSG(extack, "Nexthop id does not exist"); err = -EINVAL; - goto errout; + goto unlock; } tb = fib_get_table(net, cfg.fc_table); if (!tb) { NL_SET_ERR_MSG(extack, "FIB table does not exist"); err = -ESRCH; - goto errout; + goto unlock; } err = fib_table_delete(net, tb, &cfg, extack); +unlock: + rtnl_net_unlock(net); errout: return err; } @@ -902,15 +918,20 @@ static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, if (err < 0) goto errout; + rtnl_net_lock(net); + tb = fib_new_table(net, cfg.fc_table); if (!tb) { err = -ENOBUFS; - goto errout; + goto unlock; } err = fib_table_insert(net, tb, &cfg, extack); if (!err && cfg.fc_type == RTN_LOCAL) net->ipv4.fib_has_custom_local_routes = true; + +unlock: + rtnl_net_unlock(net); errout: return err; } @@ -1450,7 +1471,7 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, fib_sync_up(dev, RTNH_F_DEAD); #endif atomic_inc(&net->ipv4.dev_addr_genid); - rt_cache_flush(dev_net(dev)); + rt_cache_flush(net); break; case NETDEV_DOWN: fib_del_ifaddr(ifa, NULL); @@ -1461,7 +1482,7 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, */ fib_disable_ip(dev, event, true); } else { - rt_cache_flush(dev_net(dev)); + rt_cache_flush(net); } break; } @@ -1575,7 +1596,7 @@ static void ip_fib_net_exit(struct net *net) { int i; - ASSERT_RTNL(); + ASSERT_RTNL_NET(net); #ifdef CONFIG_IP_MULTIPLE_TABLES RCU_INIT_POINTER(net->ipv4.fib_main, NULL); RCU_INIT_POINTER(net->ipv4.fib_default, NULL); @@ -1615,9 +1636,15 @@ static int __net_init fib_net_init(struct net *net) error = ip_fib_net_init(net); if (error < 0) goto out; + + error = fib4_semantics_init(net); + if (error) + goto out_semantics; + error = nl_fib_lookup_init(net); if (error < 0) goto out_nlfl; + error = fib_proc_init(net); if (error < 0) goto out_proc; @@ -1627,9 +1654,11 @@ out: out_proc: nl_fib_lookup_exit(net); out_nlfl: - rtnl_lock(); + fib4_semantics_exit(net); +out_semantics: + rtnl_net_lock(net); ip_fib_net_exit(net); - rtnl_unlock(); + rtnl_net_unlock(net); goto out; } @@ -1644,10 +1673,15 @@ static void __net_exit fib_net_exit_batch(struct list_head *net_list) struct net *net; rtnl_lock(); - list_for_each_entry(net, net_list, exit_list) + list_for_each_entry(net, net_list, exit_list) { + __rtnl_net_lock(net); ip_fib_net_exit(net); - + __rtnl_net_unlock(net); + } rtnl_unlock(); + + list_for_each_entry(net, net_list, exit_list) + fib4_semantics_exit(net); } static struct pernet_operations fib_net_ops = { @@ -1658,9 +1692,9 @@ static struct pernet_operations fib_net_ops = { static const struct rtnl_msg_handler fib_rtnl_msg_handlers[] __initconst = { {.protocol = PF_INET, .msgtype = RTM_NEWROUTE, - .doit = inet_rtm_newroute}, + .doit = inet_rtm_newroute, .flags = RTNL_FLAG_DOIT_PERNET}, {.protocol = PF_INET, .msgtype = RTM_DELROUTE, - .doit = inet_rtm_delroute}, + .doit = inet_rtm_delroute, .flags = RTNL_FLAG_DOIT_PERNET}, {.protocol = PF_INET, .msgtype = RTM_GETROUTE, .dumpit = inet_dump_fib, .flags = RTNL_FLAG_DUMP_UNLOCKED | RTNL_FLAG_DUMP_SPLIT_NLM_DONE}, }; diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index 9517b8667e00..fa58d6620ed6 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -37,6 +37,7 @@ struct fib4_rule { u8 dst_len; u8 src_len; dscp_t dscp; + dscp_t dscp_mask; u8 dscp_full:1; /* DSCP or TOS selector */ __be32 src; __be32 srcmask; @@ -192,7 +193,8 @@ INDIRECT_CALLABLE_SCOPE int fib4_rule_match(struct fib_rule *rule, * to mask the upper three DSCP bits prior to matching to maintain * legacy behavior. */ - if (r->dscp_full && r->dscp != inet_dsfield_to_dscp(fl4->flowi4_tos)) + if (r->dscp_full && + (r->dscp ^ inet_dsfield_to_dscp(fl4->flowi4_tos)) & r->dscp_mask) return 0; else if (!r->dscp_full && r->dscp && !fib_dscp_masked_match(r->dscp, fl4)) @@ -201,12 +203,12 @@ INDIRECT_CALLABLE_SCOPE int fib4_rule_match(struct fib_rule *rule, if (rule->ip_proto && (rule->ip_proto != fl4->flowi4_proto)) return 0; - if (fib_rule_port_range_set(&rule->sport_range) && - !fib_rule_port_inrange(&rule->sport_range, fl4->fl4_sport)) + if (!fib_rule_port_match(&rule->sport_range, rule->sport_mask, + fl4->fl4_sport)) return 0; - if (fib_rule_port_range_set(&rule->dport_range) && - !fib_rule_port_inrange(&rule->dport_range, fl4->fl4_dport)) + if (!fib_rule_port_match(&rule->dport_range, rule->dport_mask, + fl4->fl4_dport)) return 0; return 1; @@ -235,19 +237,43 @@ static int fib4_nl2rule_dscp(const struct nlattr *nla, struct fib4_rule *rule4, } rule4->dscp = inet_dsfield_to_dscp(nla_get_u8(nla) << 2); + rule4->dscp_mask = inet_dsfield_to_dscp(INET_DSCP_MASK); rule4->dscp_full = true; return 0; } +static int fib4_nl2rule_dscp_mask(const struct nlattr *nla, + struct fib4_rule *rule4, + struct netlink_ext_ack *extack) +{ + dscp_t dscp_mask; + + if (!rule4->dscp_full) { + NL_SET_ERR_MSG_ATTR(extack, nla, + "Cannot specify DSCP mask without DSCP value"); + return -EINVAL; + } + + dscp_mask = inet_dsfield_to_dscp(nla_get_u8(nla) << 2); + if (rule4->dscp & ~dscp_mask) { + NL_SET_ERR_MSG_ATTR(extack, nla, "Invalid DSCP mask"); + return -EINVAL; + } + + rule4->dscp_mask = dscp_mask; + + return 0; +} + static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb, struct fib_rule_hdr *frh, struct nlattr **tb, struct netlink_ext_ack *extack) { - struct net *net = sock_net(skb->sk); + struct fib4_rule *rule4 = (struct fib4_rule *)rule; + struct net *net = rule->fr_net; int err = -EINVAL; - struct fib4_rule *rule4 = (struct fib4_rule *) rule; if (tb[FRA_FLOWLABEL] || tb[FRA_FLOWLABEL_MASK]) { NL_SET_ERR_MSG(extack, @@ -271,6 +297,10 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb, fib4_nl2rule_dscp(tb[FRA_DSCP], rule4, extack) < 0) goto errout; + if (tb[FRA_DSCP_MASK] && + fib4_nl2rule_dscp_mask(tb[FRA_DSCP_MASK], rule4, extack) < 0) + goto errout; + /* split local/main if they are not already split */ err = fib_unmerge(net); if (err) @@ -366,6 +396,14 @@ static int fib4_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh, return 0; } + if (tb[FRA_DSCP_MASK]) { + dscp_t dscp_mask; + + dscp_mask = inet_dsfield_to_dscp(nla_get_u8(tb[FRA_DSCP_MASK]) << 2); + if (!rule4->dscp_full || rule4->dscp_mask != dscp_mask) + return 0; + } + #ifdef CONFIG_IP_ROUTE_CLASSID if (tb[FRA_FLOW] && (rule4->tclassid != nla_get_u32(tb[FRA_FLOW]))) return 0; @@ -391,7 +429,9 @@ static int fib4_rule_fill(struct fib_rule *rule, struct sk_buff *skb, if (rule4->dscp_full) { frh->tos = 0; if (nla_put_u8(skb, FRA_DSCP, - inet_dscp_to_dsfield(rule4->dscp) >> 2)) + inet_dscp_to_dsfield(rule4->dscp) >> 2) || + nla_put_u8(skb, FRA_DSCP_MASK, + inet_dscp_to_dsfield(rule4->dscp_mask) >> 2)) goto nla_put_failure; } else { frh->tos = inet_dscp_to_dsfield(rule4->dscp); @@ -418,7 +458,8 @@ static size_t fib4_rule_nlmsg_payload(struct fib_rule *rule) return nla_total_size(4) /* dst */ + nla_total_size(4) /* src */ + nla_total_size(4) /* flow */ - + nla_total_size(1); /* dscp */ + + nla_total_size(1) /* dscp */ + + nla_total_size(1); /* dscp mask */ } static void fib4_rule_flush_cache(struct fib_rules_ops *ops) diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index d2cee5c314f5..f68bb9e34c34 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -50,12 +50,6 @@ #include "fib_lookup.h" -static struct hlist_head *fib_info_hash; -static struct hlist_head *fib_info_laddrhash; -static unsigned int fib_info_hash_size; -static unsigned int fib_info_hash_bits; -static unsigned int fib_info_cnt; - /* for_nexthops and change_nexthops only used when nexthop object * is not set in a fib_info. The logic within can reference fib_nh. */ @@ -258,8 +252,7 @@ void fib_release_info(struct fib_info *fi) ASSERT_RTNL(); if (fi && refcount_dec_and_test(&fi->fib_treeref)) { hlist_del(&fi->fib_hash); - - fib_info_cnt--; + fi->fib_net->ipv4.fib_info_cnt--; if (fi->fib_prefsrc) hlist_del(&fi->fib_lhash); @@ -335,11 +328,12 @@ static unsigned int fib_info_hashfn_1(int init_val, u8 protocol, u8 scope, static unsigned int fib_info_hashfn_result(const struct net *net, unsigned int val) { - return hash_32(val ^ net_hash_mix(net), fib_info_hash_bits); + return hash_32(val ^ net_hash_mix(net), net->ipv4.fib_info_hash_bits); } -static inline unsigned int fib_info_hashfn(struct fib_info *fi) +static struct hlist_head *fib_info_hash_bucket(struct fib_info *fi) { + struct net *net = fi->fib_net; unsigned int val; val = fib_info_hashfn_1(fi->fib_nhs, fi->fib_protocol, @@ -354,7 +348,70 @@ static inline unsigned int fib_info_hashfn(struct fib_info *fi) } endfor_nexthops(fi) } - return fib_info_hashfn_result(fi->fib_net, val); + return &net->ipv4.fib_info_hash[fib_info_hashfn_result(net, val)]; +} + +static struct hlist_head *fib_info_laddrhash_bucket(const struct net *net, + __be32 val) +{ + unsigned int hash_bits = net->ipv4.fib_info_hash_bits; + u32 slot; + + slot = hash_32(net_hash_mix(net) ^ (__force u32)val, hash_bits); + + return &net->ipv4.fib_info_hash[(1 << hash_bits) + slot]; +} + +static struct hlist_head *fib_info_hash_alloc(unsigned int hash_bits) +{ + /* The second half is used for prefsrc */ + return kvcalloc((1 << hash_bits) * 2, sizeof(struct hlist_head *), + GFP_KERNEL); +} + +static void fib_info_hash_free(struct hlist_head *head) +{ + kvfree(head); +} + +static void fib_info_hash_grow(struct net *net) +{ + unsigned int old_size = 1 << net->ipv4.fib_info_hash_bits; + struct hlist_head *new_info_hash, *old_info_hash; + unsigned int i; + + if (net->ipv4.fib_info_cnt < old_size) + return; + + new_info_hash = fib_info_hash_alloc(net->ipv4.fib_info_hash_bits + 1); + if (!new_info_hash) + return; + + old_info_hash = net->ipv4.fib_info_hash; + net->ipv4.fib_info_hash = new_info_hash; + net->ipv4.fib_info_hash_bits += 1; + + for (i = 0; i < old_size; i++) { + struct hlist_head *head = &old_info_hash[i]; + struct hlist_node *n; + struct fib_info *fi; + + hlist_for_each_entry_safe(fi, n, head, fib_hash) + hlist_add_head(&fi->fib_hash, fib_info_hash_bucket(fi)); + } + + for (i = 0; i < old_size; i++) { + struct hlist_head *lhead = &old_info_hash[old_size + i]; + struct hlist_node *n; + struct fib_info *fi; + + hlist_for_each_entry_safe(fi, n, lhead, fib_lhash) + hlist_add_head(&fi->fib_lhash, + fib_info_laddrhash_bucket(fi->fib_net, + fi->fib_prefsrc)); + } + + fib_info_hash_free(old_info_hash); } /* no metrics, only nexthop id */ @@ -370,13 +427,12 @@ static struct fib_info *fib_find_info_nh(struct net *net, (__force u32)cfg->fc_prefsrc, cfg->fc_priority); hash = fib_info_hashfn_result(net, hash); - head = &fib_info_hash[hash]; + head = &net->ipv4.fib_info_hash[hash]; hlist_for_each_entry(fi, head, fib_hash) { - if (!net_eq(fi->fib_net, net)) - continue; if (!fi->nh || fi->nh->id != cfg->fc_nh_id) continue; + if (cfg->fc_protocol == fi->fib_protocol && cfg->fc_scope == fi->fib_scope && cfg->fc_prefsrc == fi->fib_prefsrc && @@ -392,18 +448,13 @@ static struct fib_info *fib_find_info_nh(struct net *net, static struct fib_info *fib_find_info(struct fib_info *nfi) { - struct hlist_head *head; + struct hlist_head *head = fib_info_hash_bucket(nfi); struct fib_info *fi; - unsigned int hash; - - hash = fib_info_hashfn(nfi); - head = &fib_info_hash[hash]; hlist_for_each_entry(fi, head, fib_hash) { - if (!net_eq(fi->fib_net, nfi->fib_net)) - continue; if (fi->fib_nhs != nfi->fib_nhs) continue; + if (nfi->fib_protocol == fi->fib_protocol && nfi->fib_scope == fi->fib_scope && nfi->fib_prefsrc == fi->fib_prefsrc && @@ -1239,64 +1290,6 @@ int fib_check_nh(struct net *net, struct fib_nh *nh, u32 table, u8 scope, return err; } -static struct hlist_head * -fib_info_laddrhash_bucket(const struct net *net, __be32 val) -{ - u32 slot = hash_32(net_hash_mix(net) ^ (__force u32)val, - fib_info_hash_bits); - - return &fib_info_laddrhash[slot]; -} - -static void fib_info_hash_move(struct hlist_head *new_info_hash, - struct hlist_head *new_laddrhash, - unsigned int new_size) -{ - struct hlist_head *old_info_hash, *old_laddrhash; - unsigned int old_size = fib_info_hash_size; - unsigned int i; - - ASSERT_RTNL(); - old_info_hash = fib_info_hash; - old_laddrhash = fib_info_laddrhash; - fib_info_hash_size = new_size; - fib_info_hash_bits = ilog2(new_size); - - for (i = 0; i < old_size; i++) { - struct hlist_head *head = &fib_info_hash[i]; - struct hlist_node *n; - struct fib_info *fi; - - hlist_for_each_entry_safe(fi, n, head, fib_hash) { - struct hlist_head *dest; - unsigned int new_hash; - - new_hash = fib_info_hashfn(fi); - dest = &new_info_hash[new_hash]; - hlist_add_head(&fi->fib_hash, dest); - } - } - fib_info_hash = new_info_hash; - - fib_info_laddrhash = new_laddrhash; - for (i = 0; i < old_size; i++) { - struct hlist_head *lhead = &old_laddrhash[i]; - struct hlist_node *n; - struct fib_info *fi; - - hlist_for_each_entry_safe(fi, n, lhead, fib_lhash) { - struct hlist_head *ldest; - - ldest = fib_info_laddrhash_bucket(fi->fib_net, - fi->fib_prefsrc); - hlist_add_head(&fi->fib_lhash, ldest); - } - } - - kvfree(old_info_hash); - kvfree(old_laddrhash); -} - __be32 fib_info_update_nhc_saddr(struct net *net, struct fib_nh_common *nhc, unsigned char scope) { @@ -1409,32 +1402,14 @@ struct fib_info *fib_create_info(struct fib_config *cfg, } #endif - err = -ENOBUFS; - - if (fib_info_cnt >= fib_info_hash_size) { - unsigned int new_size = fib_info_hash_size << 1; - struct hlist_head *new_info_hash; - struct hlist_head *new_laddrhash; - size_t bytes; - - if (!new_size) - new_size = 16; - bytes = (size_t)new_size * sizeof(struct hlist_head *); - new_info_hash = kvzalloc(bytes, GFP_KERNEL); - new_laddrhash = kvzalloc(bytes, GFP_KERNEL); - if (!new_info_hash || !new_laddrhash) { - kvfree(new_info_hash); - kvfree(new_laddrhash); - } else { - fib_info_hash_move(new_info_hash, new_laddrhash, new_size); - } - if (!fib_info_hash_size) - goto failure; - } + fib_info_hash_grow(net); fi = kzalloc(struct_size(fi, fib_nh, nhs), GFP_KERNEL); - if (!fi) + if (!fi) { + err = -ENOBUFS; goto failure; + } + fi->fib_metrics = ip_fib_metrics_init(cfg->fc_mx, cfg->fc_mx_len, extack); if (IS_ERR(fi->fib_metrics)) { err = PTR_ERR(fi->fib_metrics); @@ -1571,9 +1546,9 @@ link_it: refcount_set(&fi->fib_treeref, 1); refcount_set(&fi->fib_clntref, 1); - fib_info_cnt++; - hlist_add_head(&fi->fib_hash, - &fib_info_hash[fib_info_hashfn(fi)]); + net->ipv4.fib_info_cnt++; + hlist_add_head(&fi->fib_hash, fib_info_hash_bucket(fi)); + if (fi->fib_prefsrc) { struct hlist_head *head; @@ -1855,7 +1830,7 @@ int fib_sync_down_addr(struct net_device *dev, __be32 local) struct fib_info *fi; int ret = 0; - if (!fib_info_laddrhash || local == 0) + if (!local) return 0; head = fib_info_laddrhash_bucket(net, local); @@ -2257,3 +2232,22 @@ check_saddr: fl4->saddr = inet_select_addr(l3mdev, 0, RT_SCOPE_LINK); } } + +int __net_init fib4_semantics_init(struct net *net) +{ + unsigned int hash_bits = 4; + + net->ipv4.fib_info_hash = fib_info_hash_alloc(hash_bits); + if (!net->ipv4.fib_info_hash) + return -ENOMEM; + + net->ipv4.fib_info_hash_bits = hash_bits; + net->ipv4.fib_info_cnt = 0; + + return 0; +} + +void __net_exit fib4_semantics_exit(struct net *net) +{ + fib_info_hash_free(net->ipv4.fib_info_hash); +} diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index d6411ac81096..59a6f0a9638f 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1187,22 +1187,6 @@ static int fib_insert_alias(struct trie *t, struct key_vector *tp, return 0; } -static bool fib_valid_key_len(u32 key, u8 plen, struct netlink_ext_ack *extack) -{ - if (plen > KEYLENGTH) { - NL_SET_ERR_MSG(extack, "Invalid prefix length"); - return false; - } - - if ((plen < KEYLENGTH) && (key << plen)) { - NL_SET_ERR_MSG(extack, - "Invalid prefix for given prefix length"); - return false; - } - - return true; -} - static void fib_remove_alias(struct trie *t, struct key_vector *tp, struct key_vector *l, struct fib_alias *old); @@ -1223,9 +1207,6 @@ int fib_table_insert(struct net *net, struct fib_table *tb, key = ntohl(cfg->fc_dst); - if (!fib_valid_key_len(key, plen, extack)) - return -EINVAL; - pr_debug("Insert table=%u %08x/%d\n", tb->tb_id, key, plen); fi = fib_create_info(cfg, extack); @@ -1717,9 +1698,6 @@ int fib_table_delete(struct net *net, struct fib_table *tb, key = ntohl(cfg->fc_dst); - if (!fib_valid_key_len(key, plen, extack)) - return -EINVAL; - l = fib_find_node(t, &tp, key); if (!l) return -ESRCH; diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 5482edb5aade..717cb7d3607a 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -405,7 +405,6 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) struct ipcm_cookie ipc; struct flowi4 fl4; struct sock *sk; - struct inet_sock *inet; __be32 daddr, saddr; u32 mark = IP4_REPLY_MARK(net, skb->mark); int type = icmp_param->data.icmph.type; @@ -424,12 +423,11 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) sk = icmp_xmit_lock(net); if (!sk) goto out_bh_enable; - inet = inet_sk(sk); icmp_param->data.icmph.checksum = 0; ipcm_init(&ipc); - inet->tos = ip_hdr(skb)->tos; + ipc.tos = ip_hdr(skb)->tos; ipc.sockc.mark = mark; daddr = ipc.addr = ip_hdr(skb)->saddr; saddr = fib_compute_spec_dst(skb); @@ -737,8 +735,8 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info, icmp_param.data.icmph.checksum = 0; icmp_param.skb = skb_in; icmp_param.offset = skb_network_offset(skb_in); - inet_sk(sk)->tos = tos; ipcm_init(&ipc); + ipc.tos = tos; ipc.addr = iph->saddr; ipc.opt = &icmp_param.replyopts.opt; ipc.sockc.mark = mark; @@ -1250,22 +1248,6 @@ int icmp_rcv(struct sk_buff *skb) goto reason_check; } - if (icmph->type == ICMP_EXT_ECHOREPLY) { - reason = ping_rcv(skb); - goto reason_check; - } - - /* - * 18 is the highest 'known' ICMP type. Anything else is a mystery - * - * RFC 1122: 3.2.2 Unknown ICMP messages types MUST be silently - * discarded. - */ - if (icmph->type > NR_ICMP_TYPES) { - reason = SKB_DROP_REASON_UNHANDLED_PROTO; - goto error; - } - /* * Parse the ICMP message */ @@ -1292,6 +1274,23 @@ int icmp_rcv(struct sk_buff *skb) } } + if (icmph->type == ICMP_EXT_ECHOREPLY || + icmph->type == ICMP_ECHOREPLY) { + reason = ping_rcv(skb); + return reason ? NET_RX_DROP : NET_RX_SUCCESS; + } + + /* + * 18 is the highest 'known' ICMP type. Anything else is a mystery + * + * RFC 1122: 3.2.2 Unknown ICMP messages types MUST be silently + * discarded. + */ + if (icmph->type > NR_ICMP_TYPES) { + reason = SKB_DROP_REASON_UNHANDLED_PROTO; + goto error; + } + reason = icmp_pointers[icmph->type].handler(skb); reason_check: if (!reason) { diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 3da126cea884..2c394c364cb9 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -81,6 +81,7 @@ #include <linux/skbuff.h> #include <linux/inetdevice.h> #include <linux/igmp.h> +#include "igmp_internal.h" #include <linux/if_arp.h> #include <linux/rtnetlink.h> #include <linux/times.h> @@ -1432,14 +1433,16 @@ static void ip_mc_hash_remove(struct in_device *in_dev, *mc_hash = im->next_hash; } -static int inet_fill_ifmcaddr(struct sk_buff *skb, struct net_device *dev, - const struct ip_mc_list *im, int event) +int inet_fill_ifmcaddr(struct sk_buff *skb, struct net_device *dev, + const struct ip_mc_list *im, + struct inet_fill_args *args) { struct ifa_cacheinfo ci; struct ifaddrmsg *ifm; struct nlmsghdr *nlh; - nlh = nlmsg_put(skb, 0, 0, event, sizeof(struct ifaddrmsg), 0); + nlh = nlmsg_put(skb, args->portid, args->seq, args->event, + sizeof(struct ifaddrmsg), args->flags); if (!nlh) return -EMSGSIZE; @@ -1468,6 +1471,9 @@ static int inet_fill_ifmcaddr(struct sk_buff *skb, struct net_device *dev, static void inet_ifmcaddr_notify(struct net_device *dev, const struct ip_mc_list *im, int event) { + struct inet_fill_args fillargs = { + .event = event, + }; struct net *net = dev_net(dev); struct sk_buff *skb; int err = -ENOMEM; @@ -1479,7 +1485,7 @@ static void inet_ifmcaddr_notify(struct net_device *dev, if (!skb) goto error; - err = inet_fill_ifmcaddr(skb, dev, im, event); + err = inet_fill_ifmcaddr(skb, dev, im, &fillargs); if (err < 0) { WARN_ON_ONCE(err == -EMSGSIZE); nlmsg_free(skb); diff --git a/net/ipv4/igmp_internal.h b/net/ipv4/igmp_internal.h new file mode 100644 index 000000000000..0a1bcc8ec8e1 --- /dev/null +++ b/net/ipv4/igmp_internal.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef _LINUX_IGMP_INTERNAL_H +#define _LINUX_IGMP_INTERNAL_H + +struct inet_fill_args { + u32 portid; + u32 seq; + int event; + unsigned int flags; + int netnsid; + int ifindex; +}; + +int inet_fill_ifmcaddr(struct sk_buff *skb, struct net_device *dev, + const struct ip_mc_list *im, + struct inet_fill_args *args); +#endif diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index e4decfb270fa..dd5cf8914a28 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -157,12 +157,10 @@ static bool inet_use_bhash2_on_bind(const struct sock *sk) { #if IS_ENABLED(CONFIG_IPV6) if (sk->sk_family == AF_INET6) { - int addr_type = ipv6_addr_type(&sk->sk_v6_rcv_saddr); - - if (addr_type == IPV6_ADDR_ANY) + if (ipv6_addr_any(&sk->sk_v6_rcv_saddr)) return false; - if (addr_type != IPV6_ADDR_MAPPED) + if (!ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr)) return true; } #endif @@ -600,7 +598,7 @@ fail_unlock: if (bhash2_created) inet_bind2_bucket_destroy(hinfo->bind2_bucket_cachep, tb2); if (bhash_created) - inet_bind_bucket_destroy(hinfo->bind_bucket_cachep, tb); + inet_bind_bucket_destroy(tb); } if (head2_lock_acquired) spin_unlock(&head2->lock); @@ -799,18 +797,6 @@ void inet_csk_clear_xmit_timers_sync(struct sock *sk) sk_stop_timer_sync(sk, &sk->sk_timer); } -void inet_csk_delete_keepalive_timer(struct sock *sk) -{ - sk_stop_timer(sk, &sk->sk_timer); -} -EXPORT_SYMBOL(inet_csk_delete_keepalive_timer); - -void inet_csk_reset_keepalive_timer(struct sock *sk, unsigned long len) -{ - sk_reset_timer(sk, &sk->sk_timer, jiffies + len); -} -EXPORT_SYMBOL(inet_csk_reset_keepalive_timer); - struct dst_entry *inet_csk_route_req(const struct sock *sk, struct flowi4 *fl4, const struct request_sock *req) @@ -1249,39 +1235,59 @@ struct sock *inet_csk_clone_lock(const struct sock *sk, const gfp_t priority) { struct sock *newsk = sk_clone_lock(sk, priority); + struct inet_connection_sock *newicsk; + struct inet_request_sock *ireq; + struct inet_sock *newinet; - if (newsk) { - struct inet_connection_sock *newicsk = inet_csk(newsk); + if (!newsk) + return NULL; - inet_sk_set_state(newsk, TCP_SYN_RECV); - newicsk->icsk_bind_hash = NULL; - newicsk->icsk_bind2_hash = NULL; + newicsk = inet_csk(newsk); + newinet = inet_sk(newsk); + ireq = inet_rsk(req); - inet_sk(newsk)->inet_dport = inet_rsk(req)->ir_rmt_port; - inet_sk(newsk)->inet_num = inet_rsk(req)->ir_num; - inet_sk(newsk)->inet_sport = htons(inet_rsk(req)->ir_num); + newicsk->icsk_bind_hash = NULL; + newicsk->icsk_bind2_hash = NULL; - /* listeners have SOCK_RCU_FREE, not the children */ - sock_reset_flag(newsk, SOCK_RCU_FREE); + newinet->inet_dport = ireq->ir_rmt_port; + newinet->inet_num = ireq->ir_num; + newinet->inet_sport = htons(ireq->ir_num); - inet_sk(newsk)->mc_list = NULL; + newsk->sk_bound_dev_if = ireq->ir_iif; - newsk->sk_mark = inet_rsk(req)->ir_mark; - atomic64_set(&newsk->sk_cookie, - atomic64_read(&inet_rsk(req)->ir_cookie)); + newsk->sk_daddr = ireq->ir_rmt_addr; + newsk->sk_rcv_saddr = ireq->ir_loc_addr; + newinet->inet_saddr = ireq->ir_loc_addr; - newicsk->icsk_retransmits = 0; - newicsk->icsk_backoff = 0; - newicsk->icsk_probes_out = 0; - newicsk->icsk_probes_tstamp = 0; +#if IS_ENABLED(CONFIG_IPV6) + newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr; + newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr; +#endif - /* Deinitialize accept_queue to trap illegal accesses. */ - memset(&newicsk->icsk_accept_queue, 0, sizeof(newicsk->icsk_accept_queue)); + /* listeners have SOCK_RCU_FREE, not the children */ + sock_reset_flag(newsk, SOCK_RCU_FREE); - inet_clone_ulp(req, newsk, priority); + inet_sk(newsk)->mc_list = NULL; + + newsk->sk_mark = inet_rsk(req)->ir_mark; + atomic64_set(&newsk->sk_cookie, + atomic64_read(&inet_rsk(req)->ir_cookie)); + + newicsk->icsk_retransmits = 0; + newicsk->icsk_backoff = 0; + newicsk->icsk_probes_out = 0; + newicsk->icsk_probes_tstamp = 0; + + /* Deinitialize accept_queue to trap illegal accesses. */ + memset(&newicsk->icsk_accept_queue, 0, + sizeof(newicsk->icsk_accept_queue)); + + inet_sk_set_state(newsk, TCP_SYN_RECV); + + inet_clone_ulp(req, newsk, priority); + + security_inet_csk_clone(newsk, req); - security_inet_csk_clone(newsk, req); - } return newsk; } EXPORT_SYMBOL_GPL(inet_csk_clone_lock); @@ -1547,17 +1553,6 @@ skip_child_forget: } EXPORT_SYMBOL_GPL(inet_csk_listen_stop); -void inet_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr) -{ - struct sockaddr_in *sin = (struct sockaddr_in *)uaddr; - const struct inet_sock *inet = inet_sk(sk); - - sin->sin_family = AF_INET; - sin->sin_addr.s_addr = inet->inet_daddr; - sin->sin_port = inet->inet_dport; -} -EXPORT_SYMBOL_GPL(inet_csk_addr2sockaddr); - static struct dst_entry *inet_csk_rebuild_route(struct sock *sk, struct flowi *fl) { const struct inet_sock *inet = inet_sk(sk); diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 321acc8abf17..c2bb91d9e9ff 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -282,7 +282,7 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, struct inet_diag_meminfo minfo = { .idiag_rmem = sk_rmem_alloc_get(sk), .idiag_wmem = READ_ONCE(sk->sk_wmem_queued), - .idiag_fmem = sk_forward_alloc_get(sk), + .idiag_fmem = READ_ONCE(sk->sk_forward_alloc), .idiag_tmem = sk_wmem_alloc_get(sk), }; @@ -315,12 +315,12 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, r->idiag_timer = 1; r->idiag_retrans = icsk->icsk_retransmits; r->idiag_expires = - jiffies_delta_to_msecs(icsk->icsk_timeout - jiffies); + jiffies_delta_to_msecs(icsk_timeout(icsk) - jiffies); } else if (icsk_pending == ICSK_TIME_PROBE0) { r->idiag_timer = 4; r->idiag_retrans = icsk->icsk_probes_out; r->idiag_expires = - jiffies_delta_to_msecs(icsk->icsk_timeout - jiffies); + jiffies_delta_to_msecs(icsk_timeout(icsk) - jiffies); } else if (timer_pending(&sk->sk_timer)) { r->idiag_timer = 2; r->idiag_retrans = icsk->icsk_probes_out; diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index d179a2c84222..19fae4811ab2 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -145,8 +145,7 @@ static void inet_frags_free_cb(void *ptr, void *arg) } spin_unlock_bh(&fq->lock); - if (refcount_sub_and_test(count, &fq->refcnt)) - inet_frag_destroy(fq); + inet_frag_putn(fq, count); } static LLIST_HEAD(fqdir_free_list); @@ -226,10 +225,10 @@ void fqdir_exit(struct fqdir *fqdir) } EXPORT_SYMBOL(fqdir_exit); -void inet_frag_kill(struct inet_frag_queue *fq) +void inet_frag_kill(struct inet_frag_queue *fq, int *refs) { if (del_timer(&fq->timer)) - refcount_dec(&fq->refcnt); + (*refs)++; if (!(fq->flags & INET_FRAG_COMPLETE)) { struct fqdir *fqdir = fq->fqdir; @@ -244,7 +243,7 @@ void inet_frag_kill(struct inet_frag_queue *fq) if (!READ_ONCE(fqdir->dead)) { rhashtable_remove_fast(&fqdir->rhashtable, &fq->node, fqdir->f->rhash_params); - refcount_dec(&fq->refcnt); + (*refs)++; } else { fq->flags |= INET_FRAG_HASH_DEAD; } @@ -328,7 +327,8 @@ static struct inet_frag_queue *inet_frag_alloc(struct fqdir *fqdir, timer_setup(&q->timer, f->frag_expire, 0); spin_lock_init(&q->lock); - refcount_set(&q->refcnt, 3); + /* One reference for the timer, one for the hash table. */ + refcount_set(&q->refcnt, 2); return q; } @@ -350,15 +350,20 @@ static struct inet_frag_queue *inet_frag_create(struct fqdir *fqdir, *prev = rhashtable_lookup_get_insert_key(&fqdir->rhashtable, &q->key, &q->node, f->rhash_params); if (*prev) { + /* We could not insert in the hash table, + * we need to cancel what inet_frag_alloc() + * anticipated. + */ + int refs = 1; + q->flags |= INET_FRAG_COMPLETE; - inet_frag_kill(q); - inet_frag_destroy(q); + inet_frag_kill(q, &refs); + inet_frag_putn(q, refs); return NULL; } return q; } -/* TODO : call from rcu_read_lock() and no longer use refcount_inc_not_zero() */ struct inet_frag_queue *inet_frag_find(struct fqdir *fqdir, void *key) { /* This pairs with WRITE_ONCE() in fqdir_pre_exit(). */ @@ -368,17 +373,11 @@ struct inet_frag_queue *inet_frag_find(struct fqdir *fqdir, void *key) if (!high_thresh || frag_mem_limit(fqdir) > high_thresh) return NULL; - rcu_read_lock(); - prev = rhashtable_lookup(&fqdir->rhashtable, key, fqdir->f->rhash_params); if (!prev) fq = inet_frag_create(fqdir, key, &prev); - if (!IS_ERR_OR_NULL(prev)) { + if (!IS_ERR_OR_NULL(prev)) fq = prev; - if (!refcount_inc_not_zero(&fq->refcnt)) - fq = NULL; - } - rcu_read_unlock(); return fq; } EXPORT_SYMBOL(inet_frag_find); diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 9bfcfd016e18..5bf163f756e9 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -35,8 +35,8 @@ u32 inet_ehashfn(const struct net *net, const __be32 laddr, { net_get_random_once(&inet_ehash_secret, sizeof(inet_ehash_secret)); - return __inet_ehashfn(laddr, lport, faddr, fport, - inet_ehash_secret + net_hash_mix(net)); + return lport + __inet_ehashfn(laddr, 0, faddr, fport, + inet_ehash_secret + net_hash_mix(net)); } EXPORT_SYMBOL_GPL(inet_ehashfn); @@ -76,7 +76,7 @@ struct inet_bind_bucket *inet_bind_bucket_create(struct kmem_cache *cachep, tb->fastreuse = 0; tb->fastreuseport = 0; INIT_HLIST_HEAD(&tb->bhash2); - hlist_add_head(&tb->node, &head->chain); + hlist_add_head_rcu(&tb->node, &head->chain); } return tb; } @@ -84,11 +84,11 @@ struct inet_bind_bucket *inet_bind_bucket_create(struct kmem_cache *cachep, /* * Caller must hold hashbucket lock for this tb with local BH disabled */ -void inet_bind_bucket_destroy(struct kmem_cache *cachep, struct inet_bind_bucket *tb) +void inet_bind_bucket_destroy(struct inet_bind_bucket *tb) { if (hlist_empty(&tb->bhash2)) { - __hlist_del(&tb->node); - kmem_cache_free(cachep, tb); + hlist_del_rcu(&tb->node); + kfree_rcu(tb, rcu); } } @@ -201,7 +201,7 @@ static void __inet_put_port(struct sock *sk) } spin_unlock(&head2->lock); - inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb); + inet_bind_bucket_destroy(tb); spin_unlock(&head->lock); } @@ -285,7 +285,7 @@ bhash2_find: error: if (created_inet_bind_bucket) - inet_bind_bucket_destroy(table->bind_bucket_cachep, tb); + inet_bind_bucket_destroy(tb); spin_unlock(&head2->lock); spin_unlock(&head->lock); return -ENOMEM; @@ -537,7 +537,9 @@ EXPORT_SYMBOL_GPL(__inet_lookup_established); /* called with local bh disabled */ static int __inet_check_established(struct inet_timewait_death_row *death_row, struct sock *sk, __u16 lport, - struct inet_timewait_sock **twp) + struct inet_timewait_sock **twp, + bool rcu_lookup, + u32 hash) { struct inet_hashinfo *hinfo = death_row->hashinfo; struct inet_sock *inet = inet_sk(sk); @@ -548,14 +550,25 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row, int sdif = l3mdev_master_ifindex_by_index(net, dif); INET_ADDR_COOKIE(acookie, saddr, daddr); const __portpair ports = INET_COMBINED_PORTS(inet->inet_dport, lport); - unsigned int hash = inet_ehashfn(net, daddr, lport, - saddr, inet->inet_dport); struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash); - spinlock_t *lock = inet_ehash_lockp(hinfo, hash); - struct sock *sk2; - const struct hlist_nulls_node *node; struct inet_timewait_sock *tw = NULL; + const struct hlist_nulls_node *node; + struct sock *sk2; + spinlock_t *lock; + if (rcu_lookup) { + sk_nulls_for_each(sk2, node, &head->chain) { + if (sk2->sk_hash != hash || + !inet_match(net, sk2, acookie, ports, dif, sdif)) + continue; + if (sk2->sk_state == TCP_TIME_WAIT) + break; + return -EADDRNOTAVAIL; + } + return 0; + } + + lock = inet_ehash_lockp(hinfo, hash); spin_lock(lock); sk_nulls_for_each(sk2, node, &head->chain) { @@ -993,8 +1006,10 @@ static u32 *table_perturb; int __inet_hash_connect(struct inet_timewait_death_row *death_row, struct sock *sk, u64 port_offset, + u32 hash_port0, int (*check_established)(struct inet_timewait_death_row *, - struct sock *, __u16, struct inet_timewait_sock **)) + struct sock *, __u16, struct inet_timewait_sock **, + bool rcu_lookup, u32 hash)) { struct inet_hashinfo *hinfo = death_row->hashinfo; struct inet_bind_hashbucket *head, *head2; @@ -1012,7 +1027,8 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, if (port) { local_bh_disable(); - ret = check_established(death_row, sk, port, NULL); + ret = check_established(death_row, sk, port, NULL, false, + hash_port0 + port); local_bh_enable(); return ret; } @@ -1048,6 +1064,22 @@ other_parity_scan: continue; head = &hinfo->bhash[inet_bhashfn(net, port, hinfo->bhash_size)]; + rcu_read_lock(); + hlist_for_each_entry_rcu(tb, &head->chain, node) { + if (!inet_bind_bucket_match(tb, net, port, l3mdev)) + continue; + if (tb->fastreuse >= 0 || tb->fastreuseport >= 0) { + rcu_read_unlock(); + goto next_port; + } + if (!check_established(death_row, sk, port, &tw, true, + hash_port0 + port)) + break; + rcu_read_unlock(); + goto next_port; + } + rcu_read_unlock(); + spin_lock_bh(&head->lock); /* Does not bother with rcv_saddr checks, because @@ -1057,12 +1089,13 @@ other_parity_scan: if (inet_bind_bucket_match(tb, net, port, l3mdev)) { if (tb->fastreuse >= 0 || tb->fastreuseport >= 0) - goto next_port; + goto next_port_unlock; WARN_ON(hlist_empty(&tb->bhash2)); if (!check_established(death_row, sk, - port, &tw)) + port, &tw, false, + hash_port0 + port)) goto ok; - goto next_port; + goto next_port_unlock; } } @@ -1076,8 +1109,9 @@ other_parity_scan: tb->fastreuse = -1; tb->fastreuseport = -1; goto ok; -next_port: +next_port_unlock: spin_unlock_bh(&head->lock); +next_port: cond_resched(); } @@ -1149,7 +1183,7 @@ error: spin_unlock(&head2->lock); if (tb_created) - inet_bind_bucket_destroy(hinfo->bind_bucket_cachep, tb); + inet_bind_bucket_destroy(tb); spin_unlock(&head->lock); if (tw) @@ -1166,11 +1200,18 @@ error: int inet_hash_connect(struct inet_timewait_death_row *death_row, struct sock *sk) { + const struct inet_sock *inet = inet_sk(sk); + const struct net *net = sock_net(sk); u64 port_offset = 0; + u32 hash_port0; if (!inet_sk(sk)->inet_num) port_offset = inet_sk_port_offset(sk); - return __inet_hash_connect(death_row, sk, port_offset, + + hash_port0 = inet_ehashfn(net, inet->inet_rcv_saddr, 0, + inet->inet_daddr, inet->inet_dport); + + return __inet_hash_connect(death_row, sk, port_offset, hash_port0, __inet_check_established); } EXPORT_SYMBOL_GPL(inet_hash_connect); @@ -1230,22 +1271,37 @@ int inet_ehash_locks_alloc(struct inet_hashinfo *hashinfo) { unsigned int locksz = sizeof(spinlock_t); unsigned int i, nblocks = 1; + spinlock_t *ptr = NULL; - if (locksz != 0) { - /* allocate 2 cache lines or at least one spinlock per cpu */ - nblocks = max(2U * L1_CACHE_BYTES / locksz, 1U); - nblocks = roundup_pow_of_two(nblocks * num_possible_cpus()); + if (locksz == 0) + goto set_mask; - /* no more locks than number of hash buckets */ - nblocks = min(nblocks, hashinfo->ehash_mask + 1); + /* Allocate 2 cache lines or at least one spinlock per cpu. */ + nblocks = max(2U * L1_CACHE_BYTES / locksz, 1U) * num_possible_cpus(); - hashinfo->ehash_locks = kvmalloc_array(nblocks, locksz, GFP_KERNEL); - if (!hashinfo->ehash_locks) - return -ENOMEM; + /* At least one page per NUMA node. */ + nblocks = max(nblocks, num_online_nodes() * PAGE_SIZE / locksz); + + nblocks = roundup_pow_of_two(nblocks); + + /* No more locks than number of hash buckets. */ + nblocks = min(nblocks, hashinfo->ehash_mask + 1); - for (i = 0; i < nblocks; i++) - spin_lock_init(&hashinfo->ehash_locks[i]); + if (num_online_nodes() > 1) { + /* Use vmalloc() to allow NUMA policy to spread pages + * on all available nodes if desired. + */ + ptr = vmalloc_array(nblocks, locksz); + } + if (!ptr) { + ptr = kvmalloc_array(nblocks, locksz, GFP_KERNEL); + if (!ptr) + return -ENOMEM; } + for (i = 0; i < nblocks; i++) + spin_lock_init(&ptr[i]); + hashinfo->ehash_locks = ptr; +set_mask: hashinfo->ehash_locks_mask = nblocks - 1; return 0; } diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index 337390ba85b4..aded4bf1bc16 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -39,7 +39,7 @@ void inet_twsk_bind_unhash(struct inet_timewait_sock *tw, tw->tw_tb = NULL; tw->tw_tb2 = NULL; inet_bind2_bucket_destroy(hashinfo->bind2_bucket_cachep, tb2); - inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb); + inet_bind_bucket_destroy(tb); __sock_put((struct sock *)tw); } diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index b8b23a77ceb4..7b1e0a2d6906 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -60,7 +60,7 @@ void inet_peer_base_init(struct inet_peer_base *bp) seqlock_init(&bp->lock); bp->total = 0; } -EXPORT_SYMBOL_GPL(inet_peer_base_init); +EXPORT_IPV6_MOD_GPL(inet_peer_base_init); #define PEER_MAX_GC 32 @@ -218,7 +218,7 @@ struct inet_peer *inet_getpeer(struct inet_peer_base *base, return p; } -EXPORT_SYMBOL_GPL(inet_getpeer); +EXPORT_IPV6_MOD_GPL(inet_getpeer); void inet_putpeer(struct inet_peer *p) { @@ -269,7 +269,7 @@ bool inet_peer_xrlim_allow(struct inet_peer *peer, int timeout) WRITE_ONCE(peer->rate_tokens, token); return rc; } -EXPORT_SYMBOL(inet_peer_xrlim_allow); +EXPORT_IPV6_MOD(inet_peer_xrlim_allow); void inetpeer_invalidate_tree(struct inet_peer_base *base) { @@ -286,4 +286,4 @@ void inetpeer_invalidate_tree(struct inet_peer_base *base) base->total = 0; } -EXPORT_SYMBOL(inetpeer_invalidate_tree); +EXPORT_IPV6_MOD(inetpeer_invalidate_tree); diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 7a435746a22d..77f395b28ec7 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -76,7 +76,8 @@ static u8 ip4_frag_ecn(u8 tos) static struct inet_frags ip4_frags; static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb, - struct sk_buff *prev_tail, struct net_device *dev); + struct sk_buff *prev_tail, struct net_device *dev, + int *refs); static void ip4_frag_init(struct inet_frag_queue *q, const void *a) @@ -107,22 +108,6 @@ static void ip4_frag_free(struct inet_frag_queue *q) inet_putpeer(qp->peer); } - -/* Destruction primitives. */ - -static void ipq_put(struct ipq *ipq) -{ - inet_frag_put(&ipq->q); -} - -/* Kill ipq entry. It is not destroyed immediately, - * because caller (and someone more) holds reference count. - */ -static void ipq_kill(struct ipq *ipq) -{ - inet_frag_kill(&ipq->q); -} - static bool frag_expire_skip_icmp(u32 user) { return user == IP_DEFRAG_AF_PACKET || @@ -143,6 +128,7 @@ static void ip_expire(struct timer_list *t) struct sk_buff *head = NULL; struct net *net; struct ipq *qp; + int refs = 1; qp = container_of(frag, struct ipq, q); net = qp->q.fqdir->net; @@ -159,7 +145,7 @@ static void ip_expire(struct timer_list *t) goto out; qp->q.flags |= INET_FRAG_DROP; - ipq_kill(qp); + inet_frag_kill(&qp->q, &refs); __IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS); __IP_INC_STATS(net, IPSTATS_MIB_REASMTIMEOUT); @@ -202,7 +188,7 @@ out: out_rcu_unlock: rcu_read_unlock(); kfree_skb_reason(head, reason); - ipq_put(qp); + inet_frag_putn(&qp->q, refs); } /* Find the correct entry in the "incomplete datagrams" queue for @@ -278,7 +264,7 @@ static int ip_frag_reinit(struct ipq *qp) } /* Add new segment to existing queue. */ -static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) +static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb, int *refs) { struct net *net = qp->q.fqdir->net; int ihl, end, flags, offset; @@ -298,7 +284,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) if (!(IPCB(skb)->flags & IPSKB_FRAG_COMPLETE) && unlikely(ip_frag_too_far(qp)) && unlikely(err = ip_frag_reinit(qp))) { - ipq_kill(qp); + inet_frag_kill(&qp->q, refs); goto err; } @@ -382,10 +368,10 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) unsigned long orefdst = skb->_skb_refdst; skb->_skb_refdst = 0UL; - err = ip_frag_reasm(qp, skb, prev_tail, dev); + err = ip_frag_reasm(qp, skb, prev_tail, dev, refs); skb->_skb_refdst = orefdst; if (err) - inet_frag_kill(&qp->q); + inet_frag_kill(&qp->q, refs); return err; } @@ -402,7 +388,7 @@ insert_error: err = -EINVAL; __IP_INC_STATS(net, IPSTATS_MIB_REASM_OVERLAPS); discard_qp: - inet_frag_kill(&qp->q); + inet_frag_kill(&qp->q, refs); __IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS); err: kfree_skb_reason(skb, reason); @@ -416,7 +402,8 @@ static bool ip_frag_coalesce_ok(const struct ipq *qp) /* Build a new IP datagram from all its fragments. */ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb, - struct sk_buff *prev_tail, struct net_device *dev) + struct sk_buff *prev_tail, struct net_device *dev, + int *refs) { struct net *net = qp->q.fqdir->net; struct iphdr *iph; @@ -424,7 +411,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb, int len, err; u8 ecn; - ipq_kill(qp); + inet_frag_kill(&qp->q, refs); ecn = ip_frag_ecn_table[qp->ecn]; if (unlikely(ecn == 0xff)) { @@ -496,18 +483,21 @@ int ip_defrag(struct net *net, struct sk_buff *skb, u32 user) __IP_INC_STATS(net, IPSTATS_MIB_REASMREQDS); /* Lookup (or create) queue header */ + rcu_read_lock(); qp = ip_find(net, ip_hdr(skb), user, vif); if (qp) { - int ret; + int ret, refs = 0; spin_lock(&qp->q.lock); - ret = ip_frag_queue(qp, skb); + ret = ip_frag_queue(qp, skb, &refs); spin_unlock(&qp->q.lock); - ipq_put(qp); + rcu_read_unlock(); + inet_frag_putn(&qp->q, refs); return ret; } + rcu_read_unlock(); __IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS); kfree_skb(skb); diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index ed1b6b44faf8..26d15f907551 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -141,7 +141,6 @@ static int ipgre_err(struct sk_buff *skb, u32 info, const struct iphdr *iph; const int type = icmp_hdr(skb)->type; const int code = icmp_hdr(skb)->code; - unsigned int data_len = 0; struct ip_tunnel *t; if (tpi->proto == htons(ETH_P_TEB)) @@ -182,7 +181,6 @@ static int ipgre_err(struct sk_buff *skb, u32 info, case ICMP_TIME_EXCEEDED: if (code != ICMP_EXC_TTL) return 0; - data_len = icmp_hdr(skb)->un.reserved[1] * 4; /* RFC 4884 4.1 */ break; case ICMP_REDIRECT: @@ -190,10 +188,16 @@ static int ipgre_err(struct sk_buff *skb, u32 info, } #if IS_ENABLED(CONFIG_IPV6) - if (tpi->proto == htons(ETH_P_IPV6) && - !ip6_err_gen_icmpv6_unreach(skb, iph->ihl * 4 + tpi->hdr_len, - type, data_len)) - return 0; + if (tpi->proto == htons(ETH_P_IPV6)) { + unsigned int data_len = 0; + + if (type == ICMP_TIME_EXCEEDED) + data_len = icmp_hdr(skb)->un.reserved[1] * 4; /* RFC 4884 4.1 */ + + if (!ip6_err_gen_icmpv6_unreach(skb, iph->ihl * 4 + tpi->hdr_len, + type, data_len)) + return 0; + } #endif if (t->parms.iph.daddr == 0 || @@ -1392,10 +1396,12 @@ ipgre_newlink_encap_setup(struct net_device *dev, struct nlattr *data[]) return 0; } -static int ipgre_newlink(struct net *src_net, struct net_device *dev, - struct nlattr *tb[], struct nlattr *data[], +static int ipgre_newlink(struct net_device *dev, + struct rtnl_newlink_params *params, struct netlink_ext_ack *extack) { + struct nlattr **data = params->data; + struct nlattr **tb = params->tb; struct ip_tunnel_parm_kern p; __u32 fwmark = 0; int err; @@ -1407,13 +1413,16 @@ static int ipgre_newlink(struct net *src_net, struct net_device *dev, err = ipgre_netlink_parms(dev, data, tb, &p, &fwmark); if (err < 0) return err; - return ip_tunnel_newlink(dev, tb, &p, fwmark); + return ip_tunnel_newlink(params->link_net ? : dev_net(dev), dev, tb, &p, + fwmark); } -static int erspan_newlink(struct net *src_net, struct net_device *dev, - struct nlattr *tb[], struct nlattr *data[], +static int erspan_newlink(struct net_device *dev, + struct rtnl_newlink_params *params, struct netlink_ext_ack *extack) { + struct nlattr **data = params->data; + struct nlattr **tb = params->tb; struct ip_tunnel_parm_kern p; __u32 fwmark = 0; int err; @@ -1425,7 +1434,8 @@ static int erspan_newlink(struct net *src_net, struct net_device *dev, err = erspan_netlink_parms(dev, data, tb, &p, &fwmark); if (err) return err; - return ip_tunnel_newlink(dev, tb, &p, fwmark); + return ip_tunnel_newlink(params->link_net ? : dev_net(dev), dev, tb, &p, + fwmark); } static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[], @@ -1693,6 +1703,7 @@ static struct rtnl_link_ops erspan_link_ops __read_mostly = { struct net_device *gretap_fb_dev_create(struct net *net, const char *name, u8 name_assign_type) { + struct rtnl_newlink_params params = { .src_net = net }; struct nlattr *tb[IFLA_MAX + 1]; struct net_device *dev; LIST_HEAD(list_kill); @@ -1700,6 +1711,7 @@ struct net_device *gretap_fb_dev_create(struct net *net, const char *name, int err; memset(&tb, 0, sizeof(tb)); + params.tb = tb; dev = rtnl_create_link(net, name, name_assign_type, &ipgre_tap_ops, tb, NULL); @@ -1710,7 +1722,7 @@ struct net_device *gretap_fb_dev_create(struct net *net, const char *name, t = netdev_priv(dev); t->collect_md = true; - err = ipgre_newlink(net, dev, tb, NULL, NULL); + err = ipgre_newlink(dev, ¶ms, NULL); if (err < 0) { free_netdev(dev); return ERR_PTR(err); diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index ea7a260bec8a..6e18d7ec5062 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -75,7 +75,6 @@ #include <net/checksum.h> #include <net/gso.h> #include <net/inetpeer.h> -#include <net/inet_ecn.h> #include <net/lwtunnel.h> #include <net/inet_dscp.h> #include <linux/bpf-cgroup.h> @@ -1640,7 +1639,7 @@ void ip_send_unicast_reply(struct sock *sk, const struct sock *orig_sk, if (IS_ERR(rt)) return; - inet_sk(sk)->tos = arg->tos & ~INET_ECN_MASK; + inet_sk(sk)->tos = arg->tos; sk->sk_protocol = ip_hdr(skb)->protocol; sk->sk_bound_dev_if = arg->bound_dev_if; diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 09b73acf037a..1024f961ec9a 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -40,6 +40,7 @@ #include <net/xfrm.h> #include <net/net_namespace.h> #include <net/netns/generic.h> +#include <net/netdev_lock.h> #include <net/rtnetlink.h> #include <net/udp.h> #include <net/dst_metadata.h> @@ -1162,7 +1163,7 @@ int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id, * Allowing to move it to another netns is clearly unsafe. */ if (!IS_ERR(itn->fb_tunnel_dev)) { - itn->fb_tunnel_dev->netns_local = true; + itn->fb_tunnel_dev->netns_immutable = true; itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev); ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev)); itn->type = itn->fb_tunnel_dev->type; @@ -1213,11 +1214,11 @@ void ip_tunnel_delete_nets(struct list_head *net_list, unsigned int id, } EXPORT_SYMBOL_GPL(ip_tunnel_delete_nets); -int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[], - struct ip_tunnel_parm_kern *p, __u32 fwmark) +int ip_tunnel_newlink(struct net *net, struct net_device *dev, + struct nlattr *tb[], struct ip_tunnel_parm_kern *p, + __u32 fwmark) { struct ip_tunnel *nt; - struct net *net = dev_net(dev); struct ip_tunnel_net *itn; int mtu; int err; @@ -1326,7 +1327,6 @@ int ip_tunnel_init(struct net_device *dev) } tunnel->dev = dev; - tunnel->net = dev_net(dev); strscpy(tunnel->parms.name, dev->name); iph->version = 4; iph->ihl = 5; diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index f0b4419cef34..159b4473290e 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -575,15 +575,18 @@ static void vti_netlink_parms(struct nlattr *data[], *fwmark = nla_get_u32(data[IFLA_VTI_FWMARK]); } -static int vti_newlink(struct net *src_net, struct net_device *dev, - struct nlattr *tb[], struct nlattr *data[], +static int vti_newlink(struct net_device *dev, + struct rtnl_newlink_params *params, struct netlink_ext_ack *extack) { + struct nlattr **data = params->data; struct ip_tunnel_parm_kern parms; + struct nlattr **tb = params->tb; __u32 fwmark = 0; vti_netlink_parms(data, &parms, &fwmark); - return ip_tunnel_newlink(dev, tb, &parms, fwmark); + return ip_tunnel_newlink(params->link_net ? : dev_net(dev), dev, tb, + &parms, fwmark); } static int vti_changelink(struct net_device *dev, struct nlattr *tb[], diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index dc0db5895e0e..bab0bf90c908 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -436,11 +436,13 @@ static void ipip_netlink_parms(struct nlattr *data[], *fwmark = nla_get_u32(data[IFLA_IPTUN_FWMARK]); } -static int ipip_newlink(struct net *src_net, struct net_device *dev, - struct nlattr *tb[], struct nlattr *data[], +static int ipip_newlink(struct net_device *dev, + struct rtnl_newlink_params *params, struct netlink_ext_ack *extack) { struct ip_tunnel *t = netdev_priv(dev); + struct nlattr **data = params->data; + struct nlattr **tb = params->tb; struct ip_tunnel_encap ipencap; struct ip_tunnel_parm_kern p; __u32 fwmark = 0; @@ -453,7 +455,8 @@ static int ipip_newlink(struct net *src_net, struct net_device *dev, } ipip_netlink_parms(data, &p, &t->collect_md, &fwmark); - return ip_tunnel_newlink(dev, tb, &p, fwmark); + return ip_tunnel_newlink(params->link_net ? : dev_net(dev), dev, tb, &p, + fwmark); } static int ipip_changelink(struct net_device *dev, struct nlattr *tb[], diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 21ae7594a852..b81c8131e23f 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -563,7 +563,7 @@ static void reg_vif_setup(struct net_device *dev) dev->flags = IFF_NOARP; dev->netdev_ops = ®_vif_netdev_ops; dev->needs_free_netdev = true; - dev->netns_local = true; + dev->netns_immutable = true; } static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt) diff --git a/net/ipv4/netfilter/nft_fib_ipv4.c b/net/ipv4/netfilter/nft_fib_ipv4.c index 625adbc42037..9082ca17e845 100644 --- a/net/ipv4/netfilter/nft_fib_ipv4.c +++ b/net/ipv4/netfilter/nft_fib_ipv4.c @@ -71,6 +71,11 @@ void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct net_device *oif; const struct net_device *found; + if (nft_fib_can_skip(pkt)) { + nft_fib_store_result(dest, priv, nft_in(pkt)); + return; + } + /* * Do not set flowi4_oif, it restricts results (for example, asking * for oif 3 will get RTN_UNICAST result even if the daddr exits @@ -85,12 +90,6 @@ void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs, else oif = NULL; - if (nft_hook(pkt) == NF_INET_PRE_ROUTING && - nft_fib_is_loopback(pkt->skb, nft_in(pkt))) { - nft_fib_store_result(dest, priv, nft_in(pkt)); - return; - } - iph = skb_header_pointer(pkt->skb, noff, sizeof(_iph), &_iph); if (!iph) { regs->verdict.code = NFT_BREAK; diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index 09a3d73b45ba..467151517023 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -1272,10 +1272,8 @@ static int nh_check_attr_group(struct net *net, u16 nh_grp_type, struct netlink_ext_ack *extack) { unsigned int len = nla_len(tb[NHA_GROUP]); - u8 nh_family = AF_UNSPEC; struct nexthop_grp *nhg; unsigned int i, j; - u8 nhg_fdb = 0; if (!len || len & (sizeof(struct nexthop_grp) - 1)) { NL_SET_ERR_MSG(extack, @@ -1307,10 +1305,41 @@ static int nh_check_attr_group(struct net *net, } } - if (tb[NHA_FDB]) - nhg_fdb = 1; nhg = nla_data(tb[NHA_GROUP]); - for (i = 0; i < len; ++i) { + for (i = NHA_GROUP_TYPE + 1; i < tb_size; ++i) { + if (!tb[i]) + continue; + switch (i) { + case NHA_HW_STATS_ENABLE: + case NHA_FDB: + continue; + case NHA_RES_GROUP: + if (nh_grp_type == NEXTHOP_GRP_TYPE_RES) + continue; + break; + } + NL_SET_ERR_MSG(extack, + "No other attributes can be set in nexthop groups"); + return -EINVAL; + } + + return 0; +} + +static int nh_check_attr_group_rtnl(struct net *net, struct nlattr *tb[], + struct netlink_ext_ack *extack) +{ + u8 nh_family = AF_UNSPEC; + struct nexthop_grp *nhg; + unsigned int len; + unsigned int i; + u8 nhg_fdb; + + len = nla_len(tb[NHA_GROUP]) / sizeof(*nhg); + nhg = nla_data(tb[NHA_GROUP]); + nhg_fdb = !!tb[NHA_FDB]; + + for (i = 0; i < len; i++) { struct nexthop *nh; bool is_fdb_nh; @@ -1330,22 +1359,6 @@ static int nh_check_attr_group(struct net *net, return -EINVAL; } } - for (i = NHA_GROUP_TYPE + 1; i < tb_size; ++i) { - if (!tb[i]) - continue; - switch (i) { - case NHA_HW_STATS_ENABLE: - case NHA_FDB: - continue; - case NHA_RES_GROUP: - if (nh_grp_type == NEXTHOP_GRP_TYPE_RES) - continue; - break; - } - NL_SET_ERR_MSG(extack, - "No other attributes can be set in nexthop groups"); - return -EINVAL; - } return 0; } @@ -2679,9 +2692,6 @@ static struct nexthop *nexthop_create_group(struct net *net, int err; int i; - if (WARN_ON(!num_nh)) - return ERR_PTR(-EINVAL); - nh = nexthop_alloc(); if (!nh) return ERR_PTR(-ENOMEM); @@ -2915,11 +2925,6 @@ static struct nexthop *nexthop_add(struct net *net, struct nh_config *cfg, struct nexthop *nh; int err; - if (cfg->nlflags & NLM_F_REPLACE && !cfg->nh_id) { - NL_SET_ERR_MSG(extack, "Replace requires nexthop id"); - return ERR_PTR(-EINVAL); - } - if (!cfg->nh_id) { cfg->nh_id = nh_find_unused_id(net); if (!cfg->nh_id) { @@ -3016,19 +3021,13 @@ static int rtm_to_nh_config_grp_res(struct nlattr *res, struct nh_config *cfg, } static int rtm_to_nh_config(struct net *net, struct sk_buff *skb, - struct nlmsghdr *nlh, struct nh_config *cfg, + struct nlmsghdr *nlh, struct nlattr **tb, + struct nh_config *cfg, struct netlink_ext_ack *extack) { struct nhmsg *nhm = nlmsg_data(nlh); - struct nlattr *tb[ARRAY_SIZE(rtm_nh_policy_new)]; int err; - err = nlmsg_parse(nlh, sizeof(*nhm), tb, - ARRAY_SIZE(rtm_nh_policy_new) - 1, - rtm_nh_policy_new, extack); - if (err < 0) - return err; - err = -EINVAL; if (nhm->resvd || nhm->nh_scope) { NL_SET_ERR_MSG(extack, "Invalid values in ancillary header"); @@ -3093,7 +3092,8 @@ static int rtm_to_nh_config(struct net *net, struct sk_buff *skb, NL_SET_ERR_MSG(extack, "Invalid group type"); goto out; } - err = nh_check_attr_group(net, tb, ARRAY_SIZE(tb), + + err = nh_check_attr_group(net, tb, ARRAY_SIZE(rtm_nh_policy_new), cfg->nh_grp_type, extack); if (err) goto out; @@ -3126,25 +3126,6 @@ static int rtm_to_nh_config(struct net *net, struct sk_buff *skb, goto out; } - if (!cfg->nh_fdb && tb[NHA_OIF]) { - cfg->nh_ifindex = nla_get_u32(tb[NHA_OIF]); - if (cfg->nh_ifindex) - cfg->dev = __dev_get_by_index(net, cfg->nh_ifindex); - - if (!cfg->dev) { - NL_SET_ERR_MSG(extack, "Invalid device index"); - goto out; - } else if (!(cfg->dev->flags & IFF_UP)) { - NL_SET_ERR_MSG(extack, "Nexthop device is not up"); - err = -ENETDOWN; - goto out; - } else if (!netif_carrier_ok(cfg->dev)) { - NL_SET_ERR_MSG(extack, "Carrier for nexthop device is down"); - err = -ENETDOWN; - goto out; - } - } - err = -EINVAL; if (tb[NHA_GATEWAY]) { struct nlattr *gwa = tb[NHA_GATEWAY]; @@ -3187,7 +3168,8 @@ static int rtm_to_nh_config(struct net *net, struct sk_buff *skb, } cfg->nh_encap_type = nla_get_u16(tb[NHA_ENCAP_TYPE]); - err = lwtunnel_valid_encap_type(cfg->nh_encap_type, extack); + err = lwtunnel_valid_encap_type(cfg->nh_encap_type, + extack, false); if (err < 0) goto out; @@ -3206,22 +3188,76 @@ out: return err; } +static int rtm_to_nh_config_rtnl(struct net *net, struct nlattr **tb, + struct nh_config *cfg, + struct netlink_ext_ack *extack) +{ + if (tb[NHA_GROUP]) + return nh_check_attr_group_rtnl(net, tb, extack); + + if (tb[NHA_OIF]) { + cfg->nh_ifindex = nla_get_u32(tb[NHA_OIF]); + if (cfg->nh_ifindex) + cfg->dev = __dev_get_by_index(net, cfg->nh_ifindex); + + if (!cfg->dev) { + NL_SET_ERR_MSG(extack, "Invalid device index"); + return -EINVAL; + } + + if (!(cfg->dev->flags & IFF_UP)) { + NL_SET_ERR_MSG(extack, "Nexthop device is not up"); + return -ENETDOWN; + } + + if (!netif_carrier_ok(cfg->dev)) { + NL_SET_ERR_MSG(extack, "Carrier for nexthop device is down"); + return -ENETDOWN; + } + } + + return 0; +} + /* rtnl */ static int rtm_new_nexthop(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { + struct nlattr *tb[ARRAY_SIZE(rtm_nh_policy_new)]; struct net *net = sock_net(skb->sk); struct nh_config cfg; struct nexthop *nh; int err; - err = rtm_to_nh_config(net, skb, nlh, &cfg, extack); - if (!err) { - nh = nexthop_add(net, &cfg, extack); - if (IS_ERR(nh)) - err = PTR_ERR(nh); + err = nlmsg_parse(nlh, sizeof(struct nhmsg), tb, + ARRAY_SIZE(rtm_nh_policy_new) - 1, + rtm_nh_policy_new, extack); + if (err < 0) + goto out; + + err = rtm_to_nh_config(net, skb, nlh, tb, &cfg, extack); + if (err) + goto out; + + if (cfg.nlflags & NLM_F_REPLACE && !cfg.nh_id) { + NL_SET_ERR_MSG(extack, "Replace requires nexthop id"); + err = -EINVAL; + goto out; } + rtnl_net_lock(net); + + err = rtm_to_nh_config_rtnl(net, tb, &cfg, extack); + if (err) + goto unlock; + + nh = nexthop_add(net, &cfg, extack); + if (IS_ERR(nh)) + err = PTR_ERR(nh); + +unlock: + rtnl_net_unlock(net); +out: return err; } @@ -3278,13 +3314,17 @@ static int rtm_del_nexthop(struct sk_buff *skb, struct nlmsghdr *nlh, if (err) return err; + rtnl_net_lock(net); + nh = nexthop_find_by_id(net, id); - if (!nh) - return -ENOENT; + if (nh) + remove_nexthop(net, nh, &nlinfo); + else + err = -ENOENT; - remove_nexthop(net, nh, &nlinfo); + rtnl_net_unlock(net); - return 0; + return err; } /* rtnl */ @@ -4036,18 +4076,20 @@ static struct pernet_operations nexthop_net_ops = { }; static const struct rtnl_msg_handler nexthop_rtnl_msg_handlers[] __initconst = { - {.msgtype = RTM_NEWNEXTHOP, .doit = rtm_new_nexthop}, - {.msgtype = RTM_DELNEXTHOP, .doit = rtm_del_nexthop}, + {.msgtype = RTM_NEWNEXTHOP, .doit = rtm_new_nexthop, + .flags = RTNL_FLAG_DOIT_PERNET}, + {.msgtype = RTM_DELNEXTHOP, .doit = rtm_del_nexthop, + .flags = RTNL_FLAG_DOIT_PERNET}, {.msgtype = RTM_GETNEXTHOP, .doit = rtm_get_nexthop, .dumpit = rtm_dump_nexthop}, {.msgtype = RTM_GETNEXTHOPBUCKET, .doit = rtm_get_nexthop_bucket, .dumpit = rtm_dump_nexthop_bucket}, {.protocol = PF_INET, .msgtype = RTM_NEWNEXTHOP, - .doit = rtm_new_nexthop}, + .doit = rtm_new_nexthop, .flags = RTNL_FLAG_DOIT_PERNET}, {.protocol = PF_INET, .msgtype = RTM_GETNEXTHOP, .dumpit = rtm_dump_nexthop}, {.protocol = PF_INET6, .msgtype = RTM_NEWNEXTHOP, - .doit = rtm_new_nexthop}, + .doit = rtm_new_nexthop, .flags = RTNL_FLAG_DOIT_PERNET}, {.protocol = PF_INET6, .msgtype = RTM_GETNEXTHOP, .dumpit = rtm_dump_nexthop}, }; diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 619ddc087957..c14baa6589c7 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -705,7 +705,7 @@ static int ping_v4_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) struct ip_options_data opt_copy; int free = 0; __be32 saddr, daddr, faddr; - u8 tos, scope; + u8 scope; int err; pr_debug("ping_v4_sendmsg(sk=%p,sk->num=%u)\n", inet, inet->inet_num); @@ -768,7 +768,6 @@ static int ping_v4_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) } faddr = ipc.opt->opt.faddr; } - tos = get_rttos(&ipc, inet); scope = ip_sendmsg_scope(inet, &ipc, msg); if (ipv4_is_multicast(daddr)) { @@ -779,7 +778,8 @@ static int ping_v4_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) } else if (!ipc.oif) ipc.oif = READ_ONCE(inet->uc_index); - flowi4_init_output(&fl4, ipc.oif, ipc.sockc.mark, tos, scope, + flowi4_init_output(&fl4, ipc.oif, ipc.sockc.mark, + ipc.tos & INET_DSCP_MASK, scope, sk->sk_protocol, inet_sk_flowi_flags(sk), faddr, saddr, 0, 0, sk->sk_uid); @@ -966,10 +966,9 @@ EXPORT_SYMBOL_GPL(ping_queue_rcv_skb); enum skb_drop_reason ping_rcv(struct sk_buff *skb) { - enum skb_drop_reason reason = SKB_DROP_REASON_NO_SOCKET; - struct sock *sk; struct net *net = dev_net(skb->dev); struct icmphdr *icmph = icmp_hdr(skb); + struct sock *sk; /* We assume the packet has already been checked by icmp_rcv */ @@ -980,20 +979,11 @@ enum skb_drop_reason ping_rcv(struct sk_buff *skb) skb_push(skb, skb->data - (u8 *)icmph); sk = ping_lookup(net, skb, ntohs(icmph->un.echo.id)); - if (sk) { - struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); - - pr_debug("rcv on socket %p\n", sk); - if (skb2) - reason = __ping_queue_rcv_skb(sk, skb2); - else - reason = SKB_DROP_REASON_NOMEM; - } - - if (reason) - pr_debug("no socket, dropping\n"); + if (sk) + return __ping_queue_rcv_skb(sk, skb); - return reason; + kfree_skb_reason(skb, SKB_DROP_REASON_NO_SOCKET); + return SKB_DROP_REASON_NO_SOCKET; } EXPORT_SYMBOL_GPL(ping_rcv); diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index affd21a0f572..10cbeb76c274 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -189,6 +189,7 @@ static const struct snmp_mib snmp4_net_list[] = { SNMP_MIB_ITEM("TWKilled", LINUX_MIB_TIMEWAITKILLED), SNMP_MIB_ITEM("PAWSActive", LINUX_MIB_PAWSACTIVEREJECTED), SNMP_MIB_ITEM("PAWSEstab", LINUX_MIB_PAWSESTABREJECTED), + SNMP_MIB_ITEM("TSEcrRejected", LINUX_MIB_TSECRREJECTED), SNMP_MIB_ITEM("PAWSOldAck", LINUX_MIB_PAWS_OLD_ACK), SNMP_MIB_ITEM("DelayedACKs", LINUX_MIB_DELAYEDACKS), SNMP_MIB_ITEM("DelayedACKLocked", LINUX_MIB_DELAYEDACKLOCKED), diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 4304a68d1db0..6aace4d55733 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -486,7 +486,7 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) struct ipcm_cookie ipc; struct rtable *rt = NULL; struct flowi4 fl4; - u8 tos, scope; + u8 scope; int free = 0; __be32 daddr; __be32 saddr; @@ -581,7 +581,6 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) daddr = ipc.opt->opt.faddr; } } - tos = get_rttos(&ipc, inet); scope = ip_sendmsg_scope(inet, &ipc, msg); uc_index = READ_ONCE(inet->uc_index); @@ -606,7 +605,8 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) } } - flowi4_init_output(&fl4, ipc.oif, ipc.sockc.mark, tos, scope, + flowi4_init_output(&fl4, ipc.oif, ipc.sockc.mark, + ipc.tos & INET_DSCP_MASK, scope, hdrincl ? ipc.protocol : sk->sk_protocol, inet_sk_flowi_flags(sk) | (hdrincl ? FLOWI_FLAG_KNOWN_NH : 0), diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 1948d15f1f28..5459a78b9809 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -222,7 +222,7 @@ struct sock *tcp_get_cookie_sock(struct sock *sk, struct sk_buff *skb, return NULL; } -EXPORT_SYMBOL(tcp_get_cookie_sock); +EXPORT_IPV6_MOD(tcp_get_cookie_sock); /* * when syncookies are in effect and tcp timestamps are enabled we stored @@ -259,7 +259,7 @@ bool cookie_timestamp_decode(const struct net *net, return READ_ONCE(net->ipv4.sysctl_tcp_window_scaling) != 0; } -EXPORT_SYMBOL(cookie_timestamp_decode); +EXPORT_IPV6_MOD(cookie_timestamp_decode); static int cookie_tcp_reqsk_init(struct sock *sk, struct sk_buff *skb, struct request_sock *req) @@ -279,6 +279,7 @@ static int cookie_tcp_reqsk_init(struct sock *sk, struct sk_buff *skb, ireq->smc_ok = 0; treq->snt_synack = 0; + treq->snt_tsval_first = 0; treq->tfo_listener = false; treq->txhash = net_tx_rndhash(); treq->rcv_isn = ntohl(th->seq) - 1; @@ -310,7 +311,7 @@ struct request_sock *cookie_bpf_check(struct sock *sk, struct sk_buff *skb) return req; } -EXPORT_SYMBOL_GPL(cookie_bpf_check); +EXPORT_IPV6_MOD_GPL(cookie_bpf_check); #endif struct request_sock *cookie_tcp_reqsk_alloc(const struct request_sock_ops *ops, @@ -351,7 +352,7 @@ struct request_sock *cookie_tcp_reqsk_alloc(const struct request_sock_ops *ops, return req; } -EXPORT_SYMBOL_GPL(cookie_tcp_reqsk_alloc); +EXPORT_IPV6_MOD_GPL(cookie_tcp_reqsk_alloc); static struct request_sock *cookie_tcp_check(struct net *net, struct sock *sk, struct sk_buff *skb) diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 42cb5dc9cb24..3a43010d726f 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -28,6 +28,7 @@ static int tcp_adv_win_scale_max = 31; static int tcp_app_win_max = 31; static int tcp_min_snd_mss_min = TCP_MIN_SND_MSS; static int tcp_min_snd_mss_max = 65535; +static int tcp_rto_max_max = TCP_RTO_MAX_SEC * MSEC_PER_SEC; static int ip_privileged_port_min; static int ip_privileged_port_max = 65535; static int ip_ttl_min = 1; @@ -1583,6 +1584,15 @@ static struct ctl_table ipv4_net_table[] = { .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ONE, }, + { + .procname = "tcp_rto_max_ms", + .data = &init_net.ipv4.sysctl_tcp_rto_max_ms, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ONE_THOUSAND, + .extra2 = &tcp_rto_max_max, + }, }; static __net_init int ipv4_sysctl_init_net(struct net *net) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 57df7c1d2faa..ea8de00f669d 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -300,10 +300,10 @@ DEFINE_PER_CPU(u32, tcp_tw_isn); EXPORT_PER_CPU_SYMBOL_GPL(tcp_tw_isn); long sysctl_tcp_mem[3] __read_mostly; -EXPORT_SYMBOL(sysctl_tcp_mem); +EXPORT_IPV6_MOD(sysctl_tcp_mem); atomic_long_t tcp_memory_allocated ____cacheline_aligned_in_smp; /* Current allocated memory. */ -EXPORT_SYMBOL(tcp_memory_allocated); +EXPORT_IPV6_MOD(tcp_memory_allocated); DEFINE_PER_CPU(int, tcp_memory_per_cpu_fw_alloc); EXPORT_PER_CPU_SYMBOL_GPL(tcp_memory_per_cpu_fw_alloc); @@ -316,7 +316,7 @@ EXPORT_SYMBOL(tcp_have_smc); * Current number of TCP sockets. */ struct percpu_counter tcp_sockets_allocated ____cacheline_aligned_in_smp; -EXPORT_SYMBOL(tcp_sockets_allocated); +EXPORT_IPV6_MOD(tcp_sockets_allocated); /* * TCP splice context @@ -349,7 +349,7 @@ void tcp_enter_memory_pressure(struct sock *sk) if (!cmpxchg(&tcp_memory_pressure, 0, val)) NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMEMORYPRESSURES); } -EXPORT_SYMBOL_GPL(tcp_enter_memory_pressure); +EXPORT_IPV6_MOD_GPL(tcp_enter_memory_pressure); void tcp_leave_memory_pressure(struct sock *sk) { @@ -362,7 +362,7 @@ void tcp_leave_memory_pressure(struct sock *sk) NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPMEMORYPRESSURESCHRONO, jiffies_to_msecs(jiffies - val)); } -EXPORT_SYMBOL_GPL(tcp_leave_memory_pressure); +EXPORT_IPV6_MOD_GPL(tcp_leave_memory_pressure); /* Convert seconds to retransmits based on initial and max timeout */ static u8 secs_to_retrans(int seconds, int timeout, int rto_max) @@ -423,7 +423,7 @@ void tcp_init_sock(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); - int rto_min_us; + int rto_min_us, rto_max_ms; tp->out_of_order_queue = RB_ROOT; sk->tcp_rtx_queue = RB_ROOT; @@ -432,6 +432,10 @@ void tcp_init_sock(struct sock *sk) INIT_LIST_HEAD(&tp->tsorted_sent_queue); icsk->icsk_rto = TCP_TIMEOUT_INIT; + + rto_max_ms = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rto_max_ms); + icsk->icsk_rto_max = msecs_to_jiffies(rto_max_ms); + rto_min_us = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rto_min_us); icsk->icsk_rto_min = usecs_to_jiffies(rto_min_us); icsk->icsk_delack_max = TCP_DELACK_MAX; @@ -475,7 +479,7 @@ void tcp_init_sock(struct sock *sk) sk_sockets_allocated_inc(sk); xa_init_flags(&sk->sk_user_frags, XA_FLAGS_ALLOC1); } -EXPORT_SYMBOL(tcp_init_sock); +EXPORT_IPV6_MOD(tcp_init_sock); static void tcp_tx_timestamp(struct sock *sk, struct sockcm_cookie *sockc) { @@ -488,10 +492,14 @@ static void tcp_tx_timestamp(struct sock *sk, struct sockcm_cookie *sockc) sock_tx_timestamp(sk, sockc, &shinfo->tx_flags); if (tsflags & SOF_TIMESTAMPING_TX_ACK) - tcb->txstamp_ack = 1; + tcb->txstamp_ack |= TSTAMP_ACK_SK; if (tsflags & SOF_TIMESTAMPING_TX_RECORD_MASK) shinfo->tskey = TCP_SKB_CB(skb)->seq + skb->len - 1; } + + if (cgroup_bpf_enabled(CGROUP_SOCK_OPS) && + SK_BPF_CB_FLAG_TEST(sk, SK_BPF_CB_TX_TIMESTAMPING) && skb) + bpf_skops_tx_timestamping(sk, skb, BPF_SOCK_OPS_TSTAMP_SENDMSG_CB); } static bool tcp_stream_is_readable(struct sock *sk, int target) @@ -660,7 +668,7 @@ int tcp_ioctl(struct sock *sk, int cmd, int *karg) *karg = answ; return 0; } -EXPORT_SYMBOL(tcp_ioctl); +EXPORT_IPV6_MOD(tcp_ioctl); void tcp_mark_push(struct tcp_sock *tp, struct sk_buff *skb) { @@ -876,7 +884,7 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos, return ret; } -EXPORT_SYMBOL(tcp_splice_read); +EXPORT_IPV6_MOD(tcp_splice_read); struct sk_buff *tcp_stream_alloc_skb(struct sock *sk, gfp_t gfp, bool force_schedule) @@ -1123,7 +1131,7 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size) /* 'common' sending to sendq */ } - sockcm_init(&sockc, sk); + sockc = (struct sockcm_cookie) { .tsflags = READ_ONCE(sk->sk_tsflags)}; if (msg->msg_controllen) { err = sock_cmsg_send(sk, msg, &sockc); if (unlikely(err)) { @@ -1376,7 +1384,7 @@ void tcp_splice_eof(struct socket *sock) tcp_push(sk, 0, mss_now, tp->nonagle, size_goal); release_sock(sk); } -EXPORT_SYMBOL_GPL(tcp_splice_eof); +EXPORT_IPV6_MOD_GPL(tcp_splice_eof); /* * Handle reading urgent data. BSD has very simple semantics for @@ -1517,11 +1525,25 @@ void tcp_cleanup_rbuf(struct sock *sk, int copied) __tcp_cleanup_rbuf(sk, copied); } +/* private version of sock_rfree() avoiding one atomic_sub() */ +void tcp_sock_rfree(struct sk_buff *skb) +{ + struct sock *sk = skb->sk; + unsigned int len = skb->truesize; + + sock_owned_by_me(sk); + atomic_set(&sk->sk_rmem_alloc, + atomic_read(&sk->sk_rmem_alloc) - len); + + sk_forward_alloc_add(sk, len); + sk_mem_reclaim(sk); +} + static void tcp_eat_recv_skb(struct sock *sk, struct sk_buff *skb) { __skb_unlink(skb, &sk->sk_receive_queue); - if (likely(skb->destructor == sock_rfree)) { - sock_rfree(skb); + if (likely(skb->destructor == tcp_sock_rfree)) { + tcp_sock_rfree(skb); skb->destructor = NULL; skb->sk = NULL; return skb_attempt_defer_free(skb); @@ -1686,7 +1708,7 @@ int tcp_read_skb(struct sock *sk, skb_read_actor_t recv_actor) } return copied; } -EXPORT_SYMBOL(tcp_read_skb); +EXPORT_IPV6_MOD(tcp_read_skb); void tcp_read_done(struct sock *sk, size_t len) { @@ -1731,7 +1753,7 @@ int tcp_peek_len(struct socket *sock) { return tcp_inq(sock->sk); } -EXPORT_SYMBOL(tcp_peek_len); +EXPORT_IPV6_MOD(tcp_peek_len); /* Make sure sk_rcvbuf is big enough to satisfy SO_RCVLOWAT hint */ int tcp_set_rcvlowat(struct sock *sk, int val) @@ -1758,7 +1780,7 @@ int tcp_set_rcvlowat(struct sock *sk, int val) } return 0; } -EXPORT_SYMBOL(tcp_set_rcvlowat); +EXPORT_IPV6_MOD(tcp_set_rcvlowat); void tcp_update_recv_tstamps(struct sk_buff *skb, struct scm_timestamping_internal *tss) @@ -1791,7 +1813,7 @@ int tcp_mmap(struct file *file, struct socket *sock, vma->vm_ops = &tcp_vm_ops; return 0; } -EXPORT_SYMBOL(tcp_mmap); +EXPORT_IPV6_MOD(tcp_mmap); static skb_frag_t *skb_advance_to_frag(struct sk_buff *skb, u32 offset_skb, u32 *offset_frag) @@ -2493,6 +2515,11 @@ static int tcp_recvmsg_dmabuf(struct sock *sk, const struct sk_buff *skb, } niov = skb_frag_net_iov(frag); + if (!net_is_devmem_iov(niov)) { + err = -ENODEV; + goto out; + } + end = start + skb_frag_size(frag); copy = end - offset; @@ -2511,7 +2538,7 @@ static int tcp_recvmsg_dmabuf(struct sock *sk, const struct sk_buff *skb, /* Will perform the exchange later */ dmabuf_cmsg.frag_token = tcp_xa_pool.tokens[tcp_xa_pool.idx]; - dmabuf_cmsg.dmabuf_id = net_iov_binding_id(niov); + dmabuf_cmsg.dmabuf_id = net_devmem_iov_binding_id(niov); offset += copy; remaining_len -= copy; @@ -2877,7 +2904,7 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags, } return ret; } -EXPORT_SYMBOL(tcp_recvmsg); +EXPORT_IPV6_MOD(tcp_recvmsg); void tcp_set_state(struct sock *sk, int state) { @@ -3007,7 +3034,7 @@ void tcp_shutdown(struct sock *sk, int how) tcp_send_fin(sk); } } -EXPORT_SYMBOL(tcp_shutdown); +EXPORT_IPV6_MOD(tcp_shutdown); int tcp_orphan_count_sum(void) { @@ -3187,7 +3214,7 @@ adjudge_to_death: const int tmo = tcp_fin_time(sk); if (tmo > TCP_TIMEWAIT_LEN) { - inet_csk_reset_keepalive_timer(sk, + tcp_reset_keepalive_timer(sk, tmo - TCP_TIMEWAIT_LEN); } else { tcp_time_wait(sk, TCP_FIN_WAIT2, tmo); @@ -3339,8 +3366,8 @@ int tcp_disconnect(struct sock *sk, int flags) icsk->icsk_probes_out = 0; icsk->icsk_probes_tstamp = 0; icsk->icsk_rto = TCP_TIMEOUT_INIT; - icsk->icsk_rto_min = TCP_RTO_MIN; - icsk->icsk_delack_max = TCP_DELACK_MAX; + WRITE_ONCE(icsk->icsk_rto_min, TCP_RTO_MIN); + WRITE_ONCE(icsk->icsk_delack_max, TCP_DELACK_MAX); tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; tcp_snd_cwnd_set(tp, TCP_INIT_CWND); tp->snd_cwnd_cnt = 0; @@ -3506,7 +3533,7 @@ static int tcp_repair_options_est(struct sock *sk, sockptr_t optbuf, } DEFINE_STATIC_KEY_FALSE(tcp_tx_delay_enabled); -EXPORT_SYMBOL(tcp_tx_delay_enabled); +EXPORT_IPV6_MOD(tcp_tx_delay_enabled); static void tcp_enable_tx_delay(void) { @@ -3640,7 +3667,7 @@ int tcp_sock_set_keepidle_locked(struct sock *sk, int val) elapsed = tp->keepalive_time - elapsed; else elapsed = 0; - inet_csk_reset_keepalive_timer(sk, elapsed); + tcp_reset_keepalive_timer(sk, elapsed); } return 0; @@ -3680,32 +3707,32 @@ EXPORT_SYMBOL(tcp_sock_set_keepcnt); int tcp_set_window_clamp(struct sock *sk, int val) { + u32 old_window_clamp, new_window_clamp, new_rcv_ssthresh; struct tcp_sock *tp = tcp_sk(sk); if (!val) { if (sk->sk_state != TCP_CLOSE) return -EINVAL; WRITE_ONCE(tp->window_clamp, 0); - } else { - u32 new_rcv_ssthresh, old_window_clamp = tp->window_clamp; - u32 new_window_clamp = val < SOCK_MIN_RCVBUF / 2 ? - SOCK_MIN_RCVBUF / 2 : val; + return 0; + } - if (new_window_clamp == old_window_clamp) - return 0; + old_window_clamp = tp->window_clamp; + new_window_clamp = max_t(int, SOCK_MIN_RCVBUF / 2, val); - WRITE_ONCE(tp->window_clamp, new_window_clamp); - if (new_window_clamp < old_window_clamp) { - /* need to apply the reserved mem provisioning only - * when shrinking the window clamp - */ - __tcp_adjust_rcv_ssthresh(sk, tp->window_clamp); + if (new_window_clamp == old_window_clamp) + return 0; - } else { - new_rcv_ssthresh = min(tp->rcv_wnd, tp->window_clamp); - tp->rcv_ssthresh = max(new_rcv_ssthresh, - tp->rcv_ssthresh); - } + WRITE_ONCE(tp->window_clamp, new_window_clamp); + + /* Need to apply the reserved mem provisioning only + * when shrinking the window clamp. + */ + if (new_window_clamp < old_window_clamp) { + __tcp_adjust_rcv_ssthresh(sk, new_window_clamp); + } else { + new_rcv_ssthresh = min(tp->rcv_wnd, new_window_clamp); + tp->rcv_ssthresh = max(new_rcv_ssthresh, tp->rcv_ssthresh); } return 0; } @@ -3815,6 +3842,27 @@ int do_tcp_setsockopt(struct sock *sk, int level, int optname, secs_to_retrans(val, TCP_TIMEOUT_INIT / HZ, TCP_RTO_MAX / HZ)); return 0; + case TCP_RTO_MAX_MS: + if (val < MSEC_PER_SEC || val > TCP_RTO_MAX_SEC * MSEC_PER_SEC) + return -EINVAL; + WRITE_ONCE(inet_csk(sk)->icsk_rto_max, msecs_to_jiffies(val)); + return 0; + case TCP_RTO_MIN_US: { + int rto_min = usecs_to_jiffies(val); + + if (rto_min > TCP_RTO_MIN || rto_min < TCP_TIMEOUT_MIN) + return -EINVAL; + WRITE_ONCE(inet_csk(sk)->icsk_rto_min, rto_min); + return 0; + } + case TCP_DELACK_MAX_US: { + int delack_max = usecs_to_jiffies(val); + + if (delack_max > TCP_DELACK_MAX || delack_max < TCP_TIMEOUT_MIN) + return -EINVAL; + WRITE_ONCE(inet_csk(sk)->icsk_delack_max, delack_max); + return 0; + } } sockopt_lock_sock(sk); @@ -4044,7 +4092,7 @@ int tcp_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, optval, optlen); return do_tcp_setsockopt(sk, level, optname, optval, optlen); } -EXPORT_SYMBOL(tcp_setsockopt); +EXPORT_IPV6_MOD(tcp_setsockopt); static void tcp_get_info_chrono_stats(const struct tcp_sock *tp, struct tcp_info *info) @@ -4120,7 +4168,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) info->tcpi_rcv_wscale = tp->rx_opt.rcv_wscale; } - if (tp->ecn_flags & TCP_ECN_OK) + if (tcp_ecn_mode_any(tp)) info->tcpi_options |= TCPI_OPT_ECN; if (tp->ecn_flags & TCP_ECN_SEEN) info->tcpi_options |= TCPI_OPT_ECN_SEEN; @@ -4651,6 +4699,15 @@ zerocopy_rcv_out: case TCP_IS_MPTCP: val = 0; break; + case TCP_RTO_MAX_MS: + val = jiffies_to_msecs(tcp_rto_max(sk)); + break; + case TCP_RTO_MIN_US: + val = jiffies_to_usecs(READ_ONCE(inet_csk(sk)->icsk_rto_min)); + break; + case TCP_DELACK_MAX_US: + val = jiffies_to_usecs(READ_ONCE(inet_csk(sk)->icsk_delack_max)); + break; default: return -ENOPROTOOPT; } @@ -4672,7 +4729,7 @@ bool tcp_bpf_bypass_getsockopt(int level, int optname) return false; } -EXPORT_SYMBOL(tcp_bpf_bypass_getsockopt); +EXPORT_IPV6_MOD(tcp_bpf_bypass_getsockopt); int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen) @@ -4686,11 +4743,11 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, return do_tcp_getsockopt(sk, level, optname, USER_SOCKPTR(optval), USER_SOCKPTR(optlen)); } -EXPORT_SYMBOL(tcp_getsockopt); +EXPORT_IPV6_MOD(tcp_getsockopt); #ifdef CONFIG_TCP_MD5SIG int tcp_md5_sigpool_id = -1; -EXPORT_SYMBOL_GPL(tcp_md5_sigpool_id); +EXPORT_IPV6_MOD_GPL(tcp_md5_sigpool_id); int tcp_md5_alloc_sigpool(void) { @@ -4736,7 +4793,7 @@ int tcp_md5_hash_key(struct tcp_sigpool *hp, */ return data_race(crypto_ahash_update(hp->req)); } -EXPORT_SYMBOL(tcp_md5_hash_key); +EXPORT_IPV6_MOD(tcp_md5_hash_key); /* Called with rcu_read_lock() */ static enum skb_drop_reason @@ -4856,7 +4913,7 @@ tcp_inbound_hash(struct sock *sk, const struct request_sock *req, return tcp_inbound_md5_hash(sk, skb, saddr, daddr, family, l3index, md5_location); } -EXPORT_SYMBOL_GPL(tcp_inbound_hash); +EXPORT_IPV6_MOD_GPL(tcp_inbound_hash); void tcp_done(struct sock *sk) { @@ -5005,7 +5062,12 @@ static void __init tcp_struct_check(void) CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, rtt_min); CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, out_of_order_queue); CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, snd_ssthresh); +#if IS_ENABLED(CONFIG_TLS_DEVICE) + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, tcp_clean_acked); + CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_read_rx, 77); +#else CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_read_rx, 69); +#endif /* TX read-write hotpath cache lines */ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, segs_out); diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c index 8a45a4aea933..03abe0848420 100644 --- a/net/ipv4/tcp_dctcp.c +++ b/net/ipv4/tcp_dctcp.c @@ -90,7 +90,7 @@ __bpf_kfunc static void dctcp_init(struct sock *sk) { const struct tcp_sock *tp = tcp_sk(sk); - if ((tp->ecn_flags & TCP_ECN_OK) || + if (tcp_ecn_mode_any(tp) || (sk->sk_state == TCP_LISTEN || sk->sk_state == TCP_CLOSE)) { struct dctcp *ca = inet_csk_ca(sk); diff --git a/net/ipv4/tcp_dctcp.h b/net/ipv4/tcp_dctcp.h index d69a77cbd0c7..4b0259111d81 100644 --- a/net/ipv4/tcp_dctcp.h +++ b/net/ipv4/tcp_dctcp.h @@ -28,7 +28,7 @@ static inline void dctcp_ece_ack_update(struct sock *sk, enum tcp_ca_event evt, */ if (inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER) { dctcp_ece_ack_cwr(sk, *ce_state); - __tcp_send_ack(sk, *prior_rcv_nxt); + __tcp_send_ack(sk, *prior_rcv_nxt, 0); } inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_NOW; } diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c index f428ecf9120f..45e174b8cd22 100644 --- a/net/ipv4/tcp_diag.c +++ b/net/ipv4/tcp_diag.c @@ -83,7 +83,7 @@ static int tcp_diag_put_md5sig(struct sk_buff *skb, #endif static int tcp_diag_put_ulp(struct sk_buff *skb, struct sock *sk, - const struct tcp_ulp_ops *ulp_ops) + const struct tcp_ulp_ops *ulp_ops, bool net_admin) { struct nlattr *nest; int err; @@ -97,7 +97,7 @@ static int tcp_diag_put_ulp(struct sk_buff *skb, struct sock *sk, goto nla_failure; if (ulp_ops->get_info) - err = ulp_ops->get_info(sk, skb); + err = ulp_ops->get_info(sk, skb, net_admin); if (err) goto nla_failure; @@ -113,6 +113,7 @@ static int tcp_diag_get_aux(struct sock *sk, bool net_admin, struct sk_buff *skb) { struct inet_connection_sock *icsk = inet_csk(sk); + const struct tcp_ulp_ops *ulp_ops; int err = 0; #ifdef CONFIG_TCP_MD5SIG @@ -129,15 +130,13 @@ static int tcp_diag_get_aux(struct sock *sk, bool net_admin, } #endif - if (net_admin) { - const struct tcp_ulp_ops *ulp_ops; - - ulp_ops = icsk->icsk_ulp_ops; - if (ulp_ops) - err = tcp_diag_put_ulp(skb, sk, ulp_ops); - if (err) + ulp_ops = icsk->icsk_ulp_ops; + if (ulp_ops) { + err = tcp_diag_put_ulp(skb, sk, ulp_ops, net_admin); + if (err < 0) return err; } + return 0; } @@ -164,7 +163,7 @@ static size_t tcp_diag_get_aux_size(struct sock *sk, bool net_admin) } #endif - if (net_admin && sk_fullsock(sk)) { + if (sk_fullsock(sk)) { const struct tcp_ulp_ops *ulp_ops; ulp_ops = icsk->icsk_ulp_ops; @@ -172,7 +171,7 @@ static size_t tcp_diag_get_aux_size(struct sock *sk, bool net_admin) size += nla_total_size(0) + nla_total_size(TCP_ULP_NAME_MAX); if (ulp_ops->get_info_size) - size += ulp_ops->get_info_size(sk); + size += ulp_ops->get_info_size(sk, net_admin); } } return size; diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index 32b28fc21b63..ca40665145c6 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -189,7 +189,7 @@ void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb) tcp_segs_in(tp, skb); __skb_pull(skb, tcp_hdrlen(skb)); sk_forced_mem_schedule(sk, skb->truesize); - skb_set_owner_r(skb, sk); + tcp_skb_set_owner_r(skb, sk); TCP_SKB_CB(skb)->seq++; TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_SYN; @@ -274,8 +274,8 @@ static struct sock *tcp_fastopen_create_child(struct sock *sk, * because it's been added to the accept queue directly. */ req->timeout = tcp_timeout_init(child); - inet_csk_reset_xmit_timer(child, ICSK_TIME_RETRANS, - req->timeout, TCP_RTO_MAX); + tcp_reset_xmit_timer(child, ICSK_TIME_RETRANS, + req->timeout, false); refcount_set(&req->rsk_refcnt, 2); @@ -468,7 +468,7 @@ bool tcp_fastopen_defer_connect(struct sock *sk, int *err) } return false; } -EXPORT_SYMBOL(tcp_fastopen_defer_connect); +EXPORT_IPV6_MOD(tcp_fastopen_defer_connect); /* * The following code block is to deal with middle box issues with TFO: diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 0cbf81bf3d45..e1f952fbac48 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -102,6 +102,7 @@ int sysctl_tcp_max_orphans __read_mostly = NR_FILE; #define FLAG_NO_CHALLENGE_ACK 0x8000 /* do not call tcp_send_challenge_ack() */ #define FLAG_ACK_MAYBE_DELAYED 0x10000 /* Likely a delayed ACK */ #define FLAG_DSACK_TLP 0x20000 /* DSACK for tail loss probe */ +#define FLAG_TS_PROGRESS 0x40000 /* Positive timestamp delta */ #define FLAG_ACKED (FLAG_DATA_ACKED|FLAG_SYN_ACKED) #define FLAG_NOT_DUP (FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED) @@ -118,18 +119,18 @@ int sysctl_tcp_max_orphans __read_mostly = NR_FILE; #if IS_ENABLED(CONFIG_TLS_DEVICE) static DEFINE_STATIC_KEY_DEFERRED_FALSE(clean_acked_data_enabled, HZ); -void clean_acked_data_enable(struct inet_connection_sock *icsk, +void clean_acked_data_enable(struct tcp_sock *tp, void (*cad)(struct sock *sk, u32 ack_seq)) { - icsk->icsk_clean_acked = cad; + tp->tcp_clean_acked = cad; static_branch_deferred_inc(&clean_acked_data_enabled); } EXPORT_SYMBOL_GPL(clean_acked_data_enable); -void clean_acked_data_disable(struct inet_connection_sock *icsk) +void clean_acked_data_disable(struct tcp_sock *tp) { static_branch_slow_dec_deferred(&clean_acked_data_enabled); - icsk->icsk_clean_acked = NULL; + tp->tcp_clean_acked = NULL; } EXPORT_SYMBOL_GPL(clean_acked_data_disable); @@ -169,6 +170,7 @@ static void bpf_skops_parse_hdr(struct sock *sk, struct sk_buff *skb) memset(&sock_ops, 0, offsetof(struct bpf_sock_ops_kern, temp)); sock_ops.op = BPF_SOCK_OPS_PARSE_HDR_OPT_CB; sock_ops.is_fullsock = 1; + sock_ops.is_locked_tcp_sock = 1; sock_ops.sk = sk; bpf_skops_init_skb(&sock_ops, skb, tcp_hdrlen(skb)); @@ -185,6 +187,7 @@ static void bpf_skops_established(struct sock *sk, int bpf_op, memset(&sock_ops, 0, offsetof(struct bpf_sock_ops_kern, temp)); sock_ops.op = bpf_op; sock_ops.is_fullsock = 1; + sock_ops.is_locked_tcp_sock = 1; sock_ops.sk = sk; /* sk with TCP_REPAIR_ON does not have skb in tcp_finish_connect */ if (skb) @@ -331,15 +334,14 @@ static void tcp_enter_quickack_mode(struct sock *sk, unsigned int max_quickacks) static bool tcp_in_quickack_mode(struct sock *sk) { const struct inet_connection_sock *icsk = inet_csk(sk); - const struct dst_entry *dst = __sk_dst_get(sk); - return (dst && dst_metric(dst, RTAX_QUICKACK)) || + return icsk->icsk_ack.dst_quick_ack || (icsk->icsk_ack.quick && !inet_csk_in_pingpong_mode(sk)); } static void tcp_ecn_queue_cwr(struct tcp_sock *tp) { - if (tp->ecn_flags & TCP_ECN_OK) + if (tcp_ecn_mode_rfc3168(tp)) tp->ecn_flags |= TCP_ECN_QUEUE_CWR; } @@ -362,10 +364,13 @@ static void tcp_ecn_withdraw_cwr(struct tcp_sock *tp) tp->ecn_flags &= ~TCP_ECN_QUEUE_CWR; } -static void __tcp_ecn_check_ce(struct sock *sk, const struct sk_buff *skb) +static void tcp_data_ecn_check(struct sock *sk, const struct sk_buff *skb) { struct tcp_sock *tp = tcp_sk(sk); + if (tcp_ecn_disabled(tp)) + return; + switch (TCP_SKB_CB(skb)->ip_dsfield & INET_ECN_MASK) { case INET_ECN_NOT_ECT: /* Funny extension: if ECT is not set on a segment, @@ -394,31 +399,39 @@ static void __tcp_ecn_check_ce(struct sock *sk, const struct sk_buff *skb) } } -static void tcp_ecn_check_ce(struct sock *sk, const struct sk_buff *skb) -{ - if (tcp_sk(sk)->ecn_flags & TCP_ECN_OK) - __tcp_ecn_check_ce(sk, skb); -} - static void tcp_ecn_rcv_synack(struct tcp_sock *tp, const struct tcphdr *th) { - if ((tp->ecn_flags & TCP_ECN_OK) && (!th->ece || th->cwr)) - tp->ecn_flags &= ~TCP_ECN_OK; + if (tcp_ecn_mode_rfc3168(tp) && (!th->ece || th->cwr)) + tcp_ecn_mode_set(tp, TCP_ECN_DISABLED); } static void tcp_ecn_rcv_syn(struct tcp_sock *tp, const struct tcphdr *th) { - if ((tp->ecn_flags & TCP_ECN_OK) && (!th->ece || !th->cwr)) - tp->ecn_flags &= ~TCP_ECN_OK; + if (tcp_ecn_mode_rfc3168(tp) && (!th->ece || !th->cwr)) + tcp_ecn_mode_set(tp, TCP_ECN_DISABLED); } static bool tcp_ecn_rcv_ecn_echo(const struct tcp_sock *tp, const struct tcphdr *th) { - if (th->ece && !th->syn && (tp->ecn_flags & TCP_ECN_OK)) + if (th->ece && !th->syn && tcp_ecn_mode_rfc3168(tp)) return true; return false; } +static void tcp_count_delivered_ce(struct tcp_sock *tp, u32 ecn_count) +{ + tp->delivered_ce += ecn_count; +} + +/* Updates the delivered and delivered_ce counts */ +static void tcp_count_delivered(struct tcp_sock *tp, u32 delivered, + bool ece_ack) +{ + tp->delivered += delivered; + if (ece_ack) + tcp_count_delivered_ce(tp, delivered); +} + /* Buffer size and advertised window tuning. * * 1. Tuning sk->sk_sndbuf, when connection enters established state. @@ -636,7 +649,7 @@ void tcp_initialize_rcv_mss(struct sock *sk) inet_csk(sk)->icsk_ack.rcv_mss = hint; } -EXPORT_SYMBOL(tcp_initialize_rcv_mss); +EXPORT_IPV6_MOD(tcp_initialize_rcv_mss); /* Receiver "autotuning" code. * @@ -857,7 +870,7 @@ static void tcp_event_data_recv(struct sock *sk, struct sk_buff *skb) icsk->icsk_ack.lrcvtime = now; tcp_save_lrcv_flowlabel(sk, skb); - tcp_ecn_check_ce(sk, skb); + tcp_data_ecn_check(sk, skb); if (skb->len >= 128) tcp_grow_window(sk, skb, true); @@ -1154,15 +1167,6 @@ void tcp_mark_skb_lost(struct sock *sk, struct sk_buff *skb) } } -/* Updates the delivered and delivered_ce counts */ -static void tcp_count_delivered(struct tcp_sock *tp, u32 delivered, - bool ece_ack) -{ - tp->delivered += delivered; - if (ece_ack) - tp->delivered_ce += delivered; -} - /* This procedure tags the retransmission queue when SACKs arrive. * * We have three tag bits: SACKED(S), RETRANS(R) and LOST(L). @@ -2258,8 +2262,7 @@ static bool tcp_check_sack_reneging(struct sock *sk, int *ack_flag) unsigned long delay = max(usecs_to_jiffies(tp->srtt_us >> 4), msecs_to_jiffies(10)); - inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, - delay, TCP_RTO_MAX); + tcp_reset_xmit_timer(sk, ICSK_TIME_RETRANS, delay, false); *ack_flag &= ~FLAG_SET_XMIT_TIMER; return true; } @@ -2716,6 +2719,8 @@ void tcp_cwnd_reduction(struct sock *sk, int newly_acked_sacked, int newly_lost, if (newly_acked_sacked <= 0 || WARN_ON_ONCE(!tp->prior_cwnd)) return; + trace_tcp_cwnd_reduction_tp(sk, newly_acked_sacked, newly_lost, flag); + tp->prr_delivered += newly_acked_sacked; if (delta < 0) { u64 dividend = (u64)tp->snd_ssthresh * tp->prr_delivered + @@ -2898,7 +2903,7 @@ void tcp_simple_retransmit(struct sock *sk) */ tcp_non_congestion_loss_retransmit(sk); } -EXPORT_SYMBOL(tcp_simple_retransmit); +EXPORT_IPV6_MOD(tcp_simple_retransmit); void tcp_enter_recovery(struct sock *sk, bool ece_ack) { @@ -3288,8 +3293,7 @@ void tcp_rearm_rto(struct sock *sk) */ rto = usecs_to_jiffies(max_t(int, delta_us, 1)); } - tcp_reset_xmit_timer(sk, ICSK_TIME_RETRANS, rto, - TCP_RTO_MAX); + tcp_reset_xmit_timer(sk, ICSK_TIME_RETRANS, rto, true); } } @@ -3566,10 +3570,10 @@ static void tcp_ack_probe(struct sock *sk) * This function is not for random using! */ } else { - unsigned long when = tcp_probe0_when(sk, TCP_RTO_MAX); + unsigned long when = tcp_probe0_when(sk, tcp_rto_max(sk)); when = tcp_clamp_probe0_to_user_timeout(sk, when); - tcp_reset_xmit_timer(sk, ICSK_TIME_PROBE0, when, TCP_RTO_MAX); + tcp_reset_xmit_timer(sk, ICSK_TIME_PROBE0, when, true); } } @@ -3814,8 +3818,16 @@ static void tcp_store_ts_recent(struct tcp_sock *tp) tp->rx_opt.ts_recent_stamp = ktime_get_seconds(); } -static void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq) +static int __tcp_replace_ts_recent(struct tcp_sock *tp, s32 tstamp_delta) +{ + tcp_store_ts_recent(tp); + return tstamp_delta > 0 ? FLAG_TS_PROGRESS : 0; +} + +static int tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq) { + s32 delta; + if (tp->rx_opt.saw_tstamp && !after(seq, tp->rcv_wup)) { /* PAWS bug workaround wrt. ACK frames, the PAWS discard * extra check below makes sure this can only happen @@ -3824,9 +3836,13 @@ static void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq) * Not only, also it occurs for expired timestamps. */ - if (tcp_paws_check(&tp->rx_opt, 0)) - tcp_store_ts_recent(tp); + if (tcp_paws_check(&tp->rx_opt, 0)) { + delta = tp->rx_opt.rcv_tsval - tp->rx_opt.ts_recent; + return __tcp_replace_ts_recent(tp, delta); + } } + + return 0; } /* This routine deals with acks during a TLP episode and ends an episode by @@ -3862,12 +3878,23 @@ static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag) } } -static inline void tcp_in_ack_event(struct sock *sk, u32 flags) +static void tcp_in_ack_event(struct sock *sk, int flag) { const struct inet_connection_sock *icsk = inet_csk(sk); - if (icsk->icsk_ca_ops->in_ack_event) - icsk->icsk_ca_ops->in_ack_event(sk, flags); + if (icsk->icsk_ca_ops->in_ack_event) { + u32 ack_ev_flags = 0; + + if (flag & FLAG_WIN_UPDATE) + ack_ev_flags |= CA_ACK_WIN_UPDATE; + if (flag & FLAG_SLOWPATH) { + ack_ev_flags |= CA_ACK_SLOWPATH; + if (flag & FLAG_ECE) + ack_ev_flags |= CA_ACK_ECE; + } + + icsk->icsk_ca_ops->in_ack_event(sk, ack_ev_flags); + } } /* Congestion control has updated the cwnd already. So if we're in @@ -3960,8 +3987,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) #if IS_ENABLED(CONFIG_TLS_DEVICE) if (static_branch_unlikely(&clean_acked_data_enabled.key)) - if (icsk->icsk_clean_acked) - icsk->icsk_clean_acked(sk, ack); + if (tp->tcp_clean_acked) + tp->tcp_clean_acked(sk, ack); #endif } @@ -3972,7 +3999,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) * is in window. */ if (flag & FLAG_UPDATE_TS_RECENT) - tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq); + flag |= tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq); if ((flag & (FLAG_SLOWPATH | FLAG_SND_UNA_ADVANCED)) == FLAG_SND_UNA_ADVANCED) { @@ -3984,12 +4011,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) tcp_snd_una_update(tp, ack); flag |= FLAG_WIN_UPDATE; - tcp_in_ack_event(sk, CA_ACK_WIN_UPDATE); - NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPHPACKS); } else { - u32 ack_ev_flags = CA_ACK_SLOWPATH; - if (ack_seq != TCP_SKB_CB(skb)->end_seq) flag |= FLAG_DATA; else @@ -4001,19 +4024,12 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una, &sack_state); - if (tcp_ecn_rcv_ecn_echo(tp, tcp_hdr(skb))) { + if (tcp_ecn_rcv_ecn_echo(tp, tcp_hdr(skb))) flag |= FLAG_ECE; - ack_ev_flags |= CA_ACK_ECE; - } if (sack_state.sack_delivered) tcp_count_delivered(tp, sack_state.sack_delivered, flag & FLAG_ECE); - - if (flag & FLAG_WIN_UPDATE) - ack_ev_flags |= CA_ACK_WIN_UPDATE; - - tcp_in_ack_event(sk, ack_ev_flags); } /* This is a deviation from RFC3168 since it states that: @@ -4040,6 +4056,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) tcp_rack_update_reo_wnd(sk, &rs); + tcp_in_ack_event(sk, flag); + if (tp->tlp_high_seq) tcp_process_tlp_ack(sk, ack, flag); @@ -4071,6 +4089,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) return 1; no_queue: + tcp_in_ack_event(sk, flag); /* If data was DSACKed, see if we can undo a cwnd reduction. */ if (flag & FLAG_DSACKING_ACK) { tcp_fastretrans_alert(sk, prior_snd_una, num_dupack, &flag, @@ -4180,7 +4199,6 @@ u16 tcp_parse_mss_option(const struct tcphdr *th, u16 user_mss) } return mss; } -EXPORT_SYMBOL_GPL(tcp_parse_mss_option); /* Look for tcp options. Normally only called on SYN and SYNACK packets. * But, this can also be called on packets in the established flow when @@ -4530,7 +4548,7 @@ void tcp_done_with_error(struct sock *sk, int err) if (!sock_flag(sk, SOCK_DEAD)) sk_error_report(sk); } -EXPORT_SYMBOL(tcp_done_with_error); +EXPORT_IPV6_MOD(tcp_done_with_error); /* When we get a reset we do this. */ void tcp_reset(struct sock *sk, struct sk_buff *skb) @@ -5019,7 +5037,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) bool fragstolen; tcp_save_lrcv_flowlabel(sk, skb); - tcp_ecn_check_ce(sk, skb); + tcp_data_ecn_check(sk, skb); if (unlikely(tcp_try_rmem_schedule(sk, skb, skb->truesize))) { NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFODROP); @@ -5153,7 +5171,7 @@ end: if (tcp_is_sack(tp)) tcp_grow_window(sk, skb, false); skb_condense(skb); - skb_set_owner_r(skb, sk); + tcp_skb_set_owner_r(skb, sk); } } @@ -5169,7 +5187,7 @@ static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, tcp_rcv_nxt_update(tcp_sk(sk), TCP_SKB_CB(skb)->end_seq); if (!eaten) { tcp_add_receive_queue(sk, skb); - skb_set_owner_r(skb, sk); + tcp_skb_set_owner_r(skb, sk); } return eaten; } @@ -5486,7 +5504,7 @@ skip_this: __skb_queue_before(list, skb, nskb); else __skb_queue_tail(&tmp, nskb); /* defer rbtree insertion */ - skb_set_owner_r(nskb, sk); + tcp_skb_set_owner_r(nskb, sk); mptcp_skb_ext_move(nskb, skb); /* Copy data, releasing collapsed skbs. */ @@ -6156,6 +6174,8 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb) TCP_SKB_CB(skb)->seq == tp->rcv_nxt && !after(TCP_SKB_CB(skb)->ack_seq, tp->snd_nxt)) { int tcp_header_len = tp->tcp_header_len; + s32 delta = 0; + int flag = 0; /* Timestamp header prediction: tcp_header_len * is automatically equal to th->doff*4 due to pred_flags @@ -6168,8 +6188,10 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb) if (!tcp_parse_aligned_timestamp(tp, th)) goto slow_path; + delta = tp->rx_opt.rcv_tsval - + tp->rx_opt.ts_recent; /* If PAWS failed, check it more carefully in slow path */ - if ((s32)(tp->rx_opt.rcv_tsval - tp->rx_opt.ts_recent) < 0) + if (delta < 0) goto slow_path; /* DO NOT update ts_recent here, if checksum fails @@ -6189,12 +6211,13 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb) if (tcp_header_len == (sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) && tp->rcv_nxt == tp->rcv_wup) - tcp_store_ts_recent(tp); + flag |= __tcp_replace_ts_recent(tp, + delta); /* We know that such packets are checksummed * on entry. */ - tcp_ack(sk, skb, 0); + tcp_ack(sk, skb, flag); __kfree_skb(skb); tcp_data_snd_check(sk); /* When receiving pure ack in fast path, update @@ -6225,7 +6248,8 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb) if (tcp_header_len == (sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) && tp->rcv_nxt == tp->rcv_wup) - tcp_store_ts_recent(tp); + flag |= __tcp_replace_ts_recent(tp, + delta); tcp_rcv_rtt_measure_ts(sk, skb); @@ -6240,7 +6264,7 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb) if (TCP_SKB_CB(skb)->ack_seq != tp->snd_una) { /* Well, only one small jumplet in fast path... */ - tcp_ack(sk, skb, FLAG_DATA); + tcp_ack(sk, skb, flag | FLAG_DATA); tcp_data_snd_check(sk); if (!inet_csk_ack_scheduled(sk)) goto no_ack; @@ -6300,7 +6324,7 @@ csum_error: discard: tcp_drop_reason(sk, skb, reason); } -EXPORT_SYMBOL(tcp_rcv_established); +EXPORT_IPV6_MOD(tcp_rcv_established); void tcp_init_transfer(struct sock *sk, int bpf_op, struct sk_buff *skb) { @@ -6353,7 +6377,7 @@ void tcp_finish_connect(struct sock *sk, struct sk_buff *skb) tp->lsndtime = tcp_jiffies32; if (sock_flag(sk, SOCK_KEEPOPEN)) - inet_csk_reset_keepalive_timer(sk, keepalive_time_when(tp)); + tcp_reset_keepalive_timer(sk, keepalive_time_when(tp)); if (!tp->rx_opt.snd_wscale) __tcp_fast_path_on(tp, tp->snd_wnd); @@ -6476,9 +6500,8 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, after(TCP_SKB_CB(skb)->ack_seq, tp->snd_nxt)) { /* Previous FIN/ACK or RST/ACK might be ignored. */ if (icsk->icsk_retransmits == 0) - inet_csk_reset_xmit_timer(sk, - ICSK_TIME_RETRANS, - TCP_TIMEOUT_MIN, TCP_RTO_MAX); + tcp_reset_xmit_timer(sk, ICSK_TIME_RETRANS, + TCP_TIMEOUT_MIN, false); SKB_DR_SET(reason, TCP_INVALID_ACK_SEQUENCE); goto reset_and_undo; } @@ -6593,8 +6616,8 @@ consume: */ inet_csk_schedule_ack(sk); tcp_enter_quickack_mode(sk, TCP_MAX_QUICKACKS); - inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, - TCP_DELACK_MAX, TCP_RTO_MAX); + tcp_reset_xmit_timer(sk, ICSK_TIME_DACK, + TCP_DELACK_MAX, false); goto consume; } tcp_send_ack(sk); @@ -6812,10 +6835,9 @@ tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) WARN_ON_ONCE(sk->sk_state != TCP_SYN_RECV && sk->sk_state != TCP_FIN_WAIT1); - if (!tcp_check_req(sk, skb, req, true, &req_stolen)) { - SKB_DR_SET(reason, TCP_FASTOPEN); + SKB_DR_SET(reason, TCP_FASTOPEN); + if (!tcp_check_req(sk, skb, req, true, &req_stolen, &reason)) goto discard; - } } if (!th->ack && !th->rst && !th->syn) { @@ -6928,7 +6950,7 @@ tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) tmo = tcp_fin_time(sk); if (tmo > TCP_TIMEWAIT_LEN) { - inet_csk_reset_keepalive_timer(sk, tmo - TCP_TIMEWAIT_LEN); + tcp_reset_keepalive_timer(sk, tmo - TCP_TIMEWAIT_LEN); } else if (th->fin || sock_owned_by_user(sk)) { /* Bad case. We could lose such FIN otherwise. * It is not a big problem, but it looks confusing @@ -6936,7 +6958,7 @@ tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) * if it spins in bh_lock_sock(), but it is really * marginal case. */ - inet_csk_reset_keepalive_timer(sk, tmo); + tcp_reset_keepalive_timer(sk, tmo); } else { tcp_time_wait(sk, TCP_FIN_WAIT2, tmo); goto consume; @@ -7014,7 +7036,7 @@ consume: __kfree_skb(skb); return 0; } -EXPORT_SYMBOL(tcp_rcv_state_process); +EXPORT_IPV6_MOD(tcp_rcv_state_process); static inline void pr_drop_req(struct request_sock *req, __u16 port, int family) { @@ -7081,6 +7103,7 @@ static void tcp_openreq_init(struct request_sock *req, tcp_rsk(req)->rcv_isn = TCP_SKB_CB(skb)->seq; tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->seq + 1; tcp_rsk(req)->snt_synack = 0; + tcp_rsk(req)->snt_tsval_first = 0; tcp_rsk(req)->last_oow_ack_time = 0; req->mss = rx_opt->mss_clamp; req->ts_recent = rx_opt->saw_tstamp ? rx_opt->rcv_tsval : 0; @@ -7196,7 +7219,7 @@ u16 tcp_get_syncookie_mss(struct request_sock_ops *rsk_ops, return mss; } -EXPORT_SYMBOL_GPL(tcp_get_syncookie_mss); +EXPORT_IPV6_MOD_GPL(tcp_get_syncookie_mss); int tcp_conn_request(struct request_sock_ops *rsk_ops, const struct tcp_request_sock_ops *af_ops, @@ -7377,4 +7400,4 @@ drop: tcp_listendrop(sk); return 0; } -EXPORT_SYMBOL(tcp_conn_request); +EXPORT_IPV6_MOD(tcp_conn_request); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 2632844d2c35..8cce0d5489da 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -66,6 +66,7 @@ #include <net/transp_v6.h> #include <net/ipv6.h> #include <net/inet_common.h> +#include <net/inet_ecn.h> #include <net/timewait_sock.h> #include <net/xfrm.h> #include <net/secure_seq.h> @@ -92,7 +93,6 @@ static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key, #endif struct inet_hashinfo tcp_hashinfo; -EXPORT_SYMBOL(tcp_hashinfo); static DEFINE_PER_CPU(struct sock_bh_locked, ipv4_tcp_sk) = { .bh_lock = INIT_LOCAL_LOCK(bh_lock), @@ -199,7 +199,7 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp) return 0; } -EXPORT_SYMBOL_GPL(tcp_twsk_unique); +EXPORT_IPV6_MOD_GPL(tcp_twsk_unique); static int tcp_v4_pre_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) @@ -359,7 +359,7 @@ failure: inet->inet_dport = 0; return err; } -EXPORT_SYMBOL(tcp_v4_connect); +EXPORT_IPV6_MOD(tcp_v4_connect); /* * This routine reacts to ICMP_FRAG_NEEDED mtu indications as defined in RFC1191. @@ -400,7 +400,7 @@ void tcp_v4_mtu_reduced(struct sock *sk) tcp_simple_retransmit(sk); } /* else let the usual retransmit timer handle it */ } -EXPORT_SYMBOL(tcp_v4_mtu_reduced); +EXPORT_IPV6_MOD(tcp_v4_mtu_reduced); static void do_redirect(struct sk_buff *skb, struct sock *sk) { @@ -434,7 +434,7 @@ void tcp_req_err(struct sock *sk, u32 seq, bool abort) } reqsk_put(req); } -EXPORT_SYMBOL(tcp_req_err); +EXPORT_IPV6_MOD(tcp_req_err); /* TCP-LD (RFC 6069) logic */ void tcp_ld_RTO_revert(struct sock *sk, u32 seq) @@ -458,15 +458,14 @@ void tcp_ld_RTO_revert(struct sock *sk, u32 seq) icsk->icsk_backoff--; icsk->icsk_rto = tp->srtt_us ? __tcp_set_rto(tp) : TCP_TIMEOUT_INIT; - icsk->icsk_rto = inet_csk_rto_backoff(icsk, TCP_RTO_MAX); + icsk->icsk_rto = inet_csk_rto_backoff(icsk, tcp_rto_max(sk)); tcp_mstamp_refresh(tp); delta_us = (u32)(tp->tcp_mstamp - tcp_skb_timestamp_us(skb)); remaining = icsk->icsk_rto - usecs_to_jiffies(delta_us); if (remaining > 0) { - inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, - remaining, TCP_RTO_MAX); + tcp_reset_xmit_timer(sk, ICSK_TIME_RETRANS, remaining, false); } else { /* RTO revert clocked out retransmission. * Will retransmit now. @@ -474,7 +473,7 @@ void tcp_ld_RTO_revert(struct sock *sk, u32 seq) tcp_retransmit_timer(sk); } } -EXPORT_SYMBOL(tcp_ld_RTO_revert); +EXPORT_IPV6_MOD(tcp_ld_RTO_revert); /* * This routine is called by the ICMP module when it gets some @@ -496,14 +495,14 @@ int tcp_v4_err(struct sk_buff *skb, u32 info) { const struct iphdr *iph = (const struct iphdr *)skb->data; struct tcphdr *th = (struct tcphdr *)(skb->data + (iph->ihl << 2)); - struct tcp_sock *tp; + struct net *net = dev_net_rcu(skb->dev); const int type = icmp_hdr(skb)->type; const int code = icmp_hdr(skb)->code; - struct sock *sk; struct request_sock *fastopen; + struct tcp_sock *tp; u32 seq, snd_una; + struct sock *sk; int err; - struct net *net = dev_net(skb->dev); sk = __inet_lookup_established(net, net->ipv4.tcp_death_row.hashinfo, iph->daddr, th->dest, iph->saddr, @@ -676,7 +675,7 @@ void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb) __tcp_v4_send_check(skb, inet->inet_saddr, inet->inet_daddr); } -EXPORT_SYMBOL(tcp_v4_send_check); +EXPORT_IPV6_MOD(tcp_v4_send_check); #define REPLY_OPTIONS_LEN (MAX_TCP_OPTION_SPACE / sizeof(__be32)) @@ -788,7 +787,7 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb, arg.iov[0].iov_base = (unsigned char *)&rep; arg.iov[0].iov_len = sizeof(rep.th); - net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev); + net = sk ? sock_net(sk) : dev_net_rcu(skb_dst(skb)->dev); /* Invalid TCP option size or twice included auth */ if (tcp_parse_auth_options(tcp_hdr(skb), &md5_hash_location, &aoh)) @@ -889,7 +888,8 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb, BUILD_BUG_ON(offsetof(struct sock, sk_bound_dev_if) != offsetof(struct inet_timewait_sock, tw_bound_dev_if)); - arg.tos = ip_hdr(skb)->tos; + /* ECN bits of TW reset are cleared */ + arg.tos = ip_hdr(skb)->tos & ~INET_ECN_MASK; arg.uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL); local_bh_disable(); local_lock_nested_bh(&ipv4_tcp_sk.bh_lock); @@ -1035,11 +1035,21 @@ static void tcp_v4_send_ack(const struct sock *sk, local_bh_enable(); } -static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb) +static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb, + enum tcp_tw_status tw_status) { struct inet_timewait_sock *tw = inet_twsk(sk); struct tcp_timewait_sock *tcptw = tcp_twsk(sk); struct tcp_key key = {}; + u8 tos = tw->tw_tos; + + /* Cleaning only ECN bits of TW ACKs of oow data or is paws_reject, + * while not cleaning ECN bits of other TW ACKs to avoid these ACKs + * being placed in a different service queues (Classic rather than L4S) + */ + if (tw_status == TCP_TW_ACK_OOW) + tos &= ~INET_ECN_MASK; + #ifdef CONFIG_TCP_AO struct tcp_ao_info *ao_info; @@ -1083,7 +1093,7 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb) READ_ONCE(tcptw->tw_ts_recent), tw->tw_bound_dev_if, &key, tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0, - tw->tw_tos, + tos, tw->tw_txhash); inet_twsk_put(tw); @@ -1153,14 +1163,15 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, key.type = TCP_KEY_MD5; } + /* Cleaning ECN bits of TW ACKs of oow data or is paws_reject */ tcp_v4_send_ack(sk, skb, seq, tcp_rsk(req)->rcv_nxt, tcp_synack_window(req) >> inet_rsk(req)->rcv_wscale, tcp_rsk_tsval(tcp_rsk(req)), - READ_ONCE(req->ts_recent), + req->ts_recent, 0, &key, inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0, - ip_hdr(skb)->tos, + ip_hdr(skb)->tos & ~INET_ECN_MASK, READ_ONCE(tcp_rsk(req)->txhash)); if (tcp_key_is_ao(&key)) kfree(key.traffic_key); @@ -1231,7 +1242,7 @@ static void tcp_v4_reqsk_destructor(struct request_sock *req) */ DEFINE_STATIC_KEY_DEFERRED_FALSE(tcp_md5_needed, HZ); -EXPORT_SYMBOL(tcp_md5_needed); +EXPORT_IPV6_MOD(tcp_md5_needed); static bool better_md5_match(struct tcp_md5sig_key *old, struct tcp_md5sig_key *new) { @@ -1290,7 +1301,7 @@ struct tcp_md5sig_key *__tcp_md5_do_lookup(const struct sock *sk, int l3index, } return best_match; } -EXPORT_SYMBOL(__tcp_md5_do_lookup); +EXPORT_IPV6_MOD(__tcp_md5_do_lookup); static struct tcp_md5sig_key *tcp_md5_do_lookup_exact(const struct sock *sk, const union tcp_md5_addr *addr, @@ -1337,7 +1348,7 @@ struct tcp_md5sig_key *tcp_v4_md5_lookup(const struct sock *sk, addr = (const union tcp_md5_addr *)&addr_sk->sk_daddr; return tcp_md5_do_lookup(sk, l3index, addr, AF_INET); } -EXPORT_SYMBOL(tcp_v4_md5_lookup); +EXPORT_IPV6_MOD(tcp_v4_md5_lookup); static int tcp_md5sig_info_add(struct sock *sk, gfp_t gfp) { @@ -1433,7 +1444,7 @@ int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr, return __tcp_md5_do_add(sk, addr, family, prefixlen, l3index, flags, newkey, newkeylen, GFP_KERNEL); } -EXPORT_SYMBOL(tcp_md5_do_add); +EXPORT_IPV6_MOD(tcp_md5_do_add); int tcp_md5_key_copy(struct sock *sk, const union tcp_md5_addr *addr, int family, u8 prefixlen, int l3index, @@ -1465,7 +1476,7 @@ int tcp_md5_key_copy(struct sock *sk, const union tcp_md5_addr *addr, key->flags, key->key, key->keylen, sk_gfp_mask(sk, GFP_ATOMIC)); } -EXPORT_SYMBOL(tcp_md5_key_copy); +EXPORT_IPV6_MOD(tcp_md5_key_copy); int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family, u8 prefixlen, int l3index, u8 flags) @@ -1480,7 +1491,7 @@ int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family, kfree_rcu(key, rcu); return 0; } -EXPORT_SYMBOL(tcp_md5_do_del); +EXPORT_IPV6_MOD(tcp_md5_do_del); void tcp_clear_md5_list(struct sock *sk) { @@ -1659,7 +1670,7 @@ clear_hash_nostart: memset(md5_hash, 0, 16); return 1; } -EXPORT_SYMBOL(tcp_v4_md5_hash_skb); +EXPORT_IPV6_MOD(tcp_v4_md5_hash_skb); #endif @@ -1732,7 +1743,7 @@ drop: tcp_listendrop(sk); return 0; } -EXPORT_SYMBOL(tcp_v4_conn_request); +EXPORT_IPV6_MOD(tcp_v4_conn_request); /* @@ -1770,10 +1781,6 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, newtp = tcp_sk(newsk); newinet = inet_sk(newsk); ireq = inet_rsk(req); - sk_daddr_set(newsk, ireq->ir_rmt_addr); - sk_rcv_saddr_set(newsk, ireq->ir_loc_addr); - newsk->sk_bound_dev_if = ireq->ir_iif; - newinet->inet_saddr = ireq->ir_loc_addr; inet_opt = rcu_dereference(ireq->ireq_opt); RCU_INIT_POINTER(newinet->inet_opt, inet_opt); newinet->mc_index = inet_iif(skb); @@ -1856,7 +1863,7 @@ put_and_exit: tcp_done(newsk); goto exit; } -EXPORT_SYMBOL(tcp_v4_syn_recv_sock); +EXPORT_IPV6_MOD(tcp_v4_syn_recv_sock); static struct sock *tcp_v4_cookie_check(struct sock *sk, struct sk_buff *skb) { @@ -1967,7 +1974,7 @@ EXPORT_SYMBOL(tcp_v4_do_rcv); int tcp_v4_early_demux(struct sk_buff *skb) { - struct net *net = dev_net(skb->dev); + struct net *net = dev_net_rcu(skb->dev); const struct iphdr *iph; const struct tcphdr *th; struct sock *sk; @@ -2057,7 +2064,8 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb, !((TCP_SKB_CB(tail)->tcp_flags & TCP_SKB_CB(skb)->tcp_flags) & TCPHDR_ACK) || ((TCP_SKB_CB(tail)->tcp_flags ^ - TCP_SKB_CB(skb)->tcp_flags) & (TCPHDR_ECE | TCPHDR_CWR)) || + TCP_SKB_CB(skb)->tcp_flags) & + (TCPHDR_ECE | TCPHDR_CWR | TCPHDR_AE)) || !tcp_skb_can_collapse_rx(tail, skb) || thtail->doff != th->doff || memcmp(thtail + 1, th + 1, hdrlen - sizeof(*th))) @@ -2135,7 +2143,7 @@ no_coalesce: } return false; } -EXPORT_SYMBOL(tcp_add_backlog); +EXPORT_IPV6_MOD(tcp_add_backlog); int tcp_filter(struct sock *sk, struct sk_buff *skb) { @@ -2143,7 +2151,7 @@ int tcp_filter(struct sock *sk, struct sk_buff *skb) return sk_filter_trim_cap(sk, skb, th->doff * 4); } -EXPORT_SYMBOL(tcp_filter); +EXPORT_IPV6_MOD(tcp_filter); static void tcp_v4_restore_cb(struct sk_buff *skb) { @@ -2165,7 +2173,7 @@ static void tcp_v4_fill_cb(struct sk_buff *skb, const struct iphdr *iph, TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin + skb->len - th->doff * 4); TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq); - TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th); + TCP_SKB_CB(skb)->tcp_flags = tcp_flags_ntohs(th); TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph); TCP_SKB_CB(skb)->sacked = 0; TCP_SKB_CB(skb)->has_rxtstamp = @@ -2178,8 +2186,9 @@ static void tcp_v4_fill_cb(struct sk_buff *skb, const struct iphdr *iph, int tcp_v4_rcv(struct sk_buff *skb) { - struct net *net = dev_net(skb->dev); + struct net *net = dev_net_rcu(skb->dev); enum skb_drop_reason drop_reason; + enum tcp_tw_status tw_status; int sdif = inet_sdif(skb); int dif = inet_iif(skb); const struct iphdr *iph; @@ -2271,7 +2280,8 @@ lookup: th = (const struct tcphdr *)skb->data; iph = ip_hdr(skb); tcp_v4_fill_cb(skb, iph, th); - nsk = tcp_check_req(sk, skb, req, false, &req_stolen); + nsk = tcp_check_req(sk, skb, req, false, &req_stolen, + &drop_reason); } else { drop_reason = SKB_DROP_REASON_SOCKET_FILTER; } @@ -2406,7 +2416,9 @@ do_time_wait: inet_twsk_put(inet_twsk(sk)); goto csum_error; } - switch (tcp_timewait_state_process(inet_twsk(sk), skb, th, &isn)) { + + tw_status = tcp_timewait_state_process(inet_twsk(sk), skb, th, &isn); + switch (tw_status) { case TCP_TW_SYN: { struct sock *sk2 = inet_lookup_listener(net, net->ipv4.tcp_death_row.hashinfo, @@ -2427,7 +2439,8 @@ do_time_wait: /* to ACK */ fallthrough; case TCP_TW_ACK: - tcp_v4_timewait_ack(sk, skb); + case TCP_TW_ACK_OOW: + tcp_v4_timewait_ack(sk, skb, tw_status); break; case TCP_TW_RST: tcp_v4_send_reset(sk, skb, SK_RST_REASON_TCP_TIMEWAIT_SOCKET); @@ -2452,7 +2465,7 @@ void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) sk->sk_rx_dst_ifindex = skb->skb_iif; } } -EXPORT_SYMBOL(inet_sk_rx_dst_set); +EXPORT_IPV6_MOD(inet_sk_rx_dst_set); const struct inet_connection_sock_af_ops ipv4_specific = { .queue_xmit = ip_queue_xmit, @@ -2464,11 +2477,9 @@ const struct inet_connection_sock_af_ops ipv4_specific = { .net_header_len = sizeof(struct iphdr), .setsockopt = ip_setsockopt, .getsockopt = ip_getsockopt, - .addr2sockaddr = inet_csk_addr2sockaddr, - .sockaddr_len = sizeof(struct sockaddr_in), .mtu_reduced = tcp_v4_mtu_reduced, }; -EXPORT_SYMBOL(ipv4_specific); +EXPORT_IPV6_MOD(ipv4_specific); #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = { @@ -2578,7 +2589,7 @@ void tcp_v4_destroy_sock(struct sock *sk) sk_sockets_allocated_dec(sk); } -EXPORT_SYMBOL(tcp_v4_destroy_sock); +EXPORT_IPV6_MOD(tcp_v4_destroy_sock); #ifdef CONFIG_PROC_FS /* Proc filesystem TCP sock list dumping. */ @@ -2814,7 +2825,7 @@ out: st->last_pos = *pos; return rc; } -EXPORT_SYMBOL(tcp_seq_start); +EXPORT_IPV6_MOD(tcp_seq_start); void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos) { @@ -2845,7 +2856,7 @@ out: st->last_pos = *pos; return rc; } -EXPORT_SYMBOL(tcp_seq_next); +EXPORT_IPV6_MOD(tcp_seq_next); void tcp_seq_stop(struct seq_file *seq, void *v) { @@ -2863,7 +2874,7 @@ void tcp_seq_stop(struct seq_file *seq, void *v) break; } } -EXPORT_SYMBOL(tcp_seq_stop); +EXPORT_IPV6_MOD(tcp_seq_stop); static void get_openreq4(const struct request_sock *req, struct seq_file *f, int i) @@ -2912,10 +2923,10 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i) icsk_pending == ICSK_TIME_REO_TIMEOUT || icsk_pending == ICSK_TIME_LOSS_PROBE) { timer_active = 1; - timer_expires = icsk->icsk_timeout; + timer_expires = icsk_timeout(icsk); } else if (icsk_pending == ICSK_TIME_PROBE0) { timer_active = 4; - timer_expires = icsk->icsk_timeout; + timer_expires = icsk_timeout(icsk); } else if (timer_pending(&sk->sk_timer)) { timer_active = 2; timer_expires = sk->sk_timer.expires; @@ -3533,6 +3544,7 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.sysctl_tcp_pingpong_thresh = 1; net->ipv4.sysctl_tcp_rto_min_us = jiffies_to_usecs(TCP_RTO_MIN); + net->ipv4.sysctl_tcp_rto_max_ms = TCP_RTO_MAX_SEC * MSEC_PER_SEC; return 0; } diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index 95669935494e..4251670e328c 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -170,7 +170,7 @@ static struct tcp_metrics_block *tcpm_new(struct dst_entry *dst, bool reclaim = false; spin_lock_bh(&tcp_metrics_lock); - net = dev_net(dst->dev); + net = dev_net_rcu(dst->dev); /* While waiting for the spin-lock the cache might have been populated * with this entry and so we have to check again. @@ -273,7 +273,7 @@ static struct tcp_metrics_block *__tcp_get_metrics_req(struct request_sock *req, return NULL; } - net = dev_net(dst->dev); + net = dev_net_rcu(dst->dev); hash ^= net_hash_mix(net); hash = hash_32(hash, tcp_metrics_hash_log); @@ -318,7 +318,7 @@ static struct tcp_metrics_block *tcp_get_metrics(struct sock *sk, else return NULL; - net = dev_net(dst->dev); + net = dev_net_rcu(dst->dev); hash ^= net_hash_mix(net); hash = hash_32(hash, tcp_metrics_hash_log); diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index dfdb7a4608a8..fb9349be36b8 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -44,7 +44,7 @@ tcp_timewait_check_oow_rate_limit(struct inet_timewait_sock *tw, /* Send ACK. Note, we do not put the bucket, * it will be released by caller. */ - return TCP_TW_ACK; + return TCP_TW_ACK_OOW; } /* We are rate-limiting, so just release the tw sock and drop skb. */ @@ -264,7 +264,7 @@ kill: inet_twsk_put(tw); return TCP_TW_SUCCESS; } -EXPORT_SYMBOL(tcp_timewait_state_process); +EXPORT_IPV6_MOD(tcp_timewait_state_process); static void tcp_time_wait_init(struct sock *sk, struct tcp_timewait_sock *tcptw) { @@ -398,7 +398,7 @@ void tcp_twsk_destructor(struct sock *sk) #endif tcp_ao_destroy_sock(sk, true); } -EXPORT_SYMBOL_GPL(tcp_twsk_destructor); +EXPORT_IPV6_MOD_GPL(tcp_twsk_destructor); void tcp_twsk_purge(struct list_head *net_exit_list) { @@ -457,12 +457,13 @@ void tcp_openreq_init_rwin(struct request_sock *req, rcv_wnd); ireq->rcv_wscale = rcv_wscale; } -EXPORT_SYMBOL(tcp_openreq_init_rwin); static void tcp_ecn_openreq_child(struct tcp_sock *tp, const struct request_sock *req) { - tp->ecn_flags = inet_rsk(req)->ecn_ok ? TCP_ECN_OK : 0; + tcp_ecn_mode_set(tp, inet_rsk(req)->ecn_ok ? + TCP_ECN_MODE_RFC3168 : + TCP_ECN_DISABLED); } void tcp_ca_openreq_child(struct sock *sk, const struct dst_entry *dst) @@ -492,7 +493,7 @@ void tcp_ca_openreq_child(struct sock *sk, const struct dst_entry *dst) tcp_set_ca_state(sk, TCP_CA_Open); } -EXPORT_SYMBOL_GPL(tcp_ca_openreq_child); +EXPORT_IPV6_MOD_GPL(tcp_ca_openreq_child); static void smc_check_reset_syn_req(const struct tcp_sock *oldtp, struct request_sock *req, @@ -566,8 +567,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, WRITE_ONCE(newtp->write_seq, newtp->pushed_seq = treq->snt_isn + 1); if (sock_flag(newsk, SOCK_KEEPOPEN)) - inet_csk_reset_keepalive_timer(newsk, - keepalive_time_when(newtp)); + tcp_reset_keepalive_timer(newsk, keepalive_time_when(newtp)); newtp->rx_opt.tstamp_ok = ireq->tstamp_ok; newtp->rx_opt.sack_ok = ireq->sack_ok; @@ -587,7 +587,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, if (newtp->rx_opt.tstamp_ok) { newtp->tcp_usec_ts = treq->req_usec_ts; - newtp->rx_opt.ts_recent = READ_ONCE(req->ts_recent); + newtp->rx_opt.ts_recent = req->ts_recent; newtp->rx_opt.ts_recent_stamp = ktime_get_seconds(); newtp->tcp_header_len = sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED; } else { @@ -659,12 +659,14 @@ EXPORT_SYMBOL(tcp_create_openreq_child); struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, struct request_sock *req, - bool fastopen, bool *req_stolen) + bool fastopen, bool *req_stolen, + enum skb_drop_reason *drop_reason) { struct tcp_options_received tmp_opt; struct sock *child; const struct tcphdr *th = tcp_hdr(skb); __be32 flg = tcp_flag_word(th) & (TCP_FLAG_RST|TCP_FLAG_SYN|TCP_FLAG_ACK); + bool tsecr_reject = false; bool paws_reject = false; bool own_req; @@ -673,9 +675,14 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, tcp_parse_options(sock_net(sk), skb, &tmp_opt, 0, NULL); if (tmp_opt.saw_tstamp) { - tmp_opt.ts_recent = READ_ONCE(req->ts_recent); - if (tmp_opt.rcv_tsecr) + tmp_opt.ts_recent = req->ts_recent; + if (tmp_opt.rcv_tsecr) { + if (inet_rsk(req)->tstamp_ok && !fastopen) + tsecr_reject = !between(tmp_opt.rcv_tsecr, + tcp_rsk(req)->snt_tsval_first, + READ_ONCE(tcp_rsk(req)->snt_tsval_last)); tmp_opt.rcv_tsecr -= tcp_rsk(req)->ts_off; + } /* We do not store true stamp, but it is not required, * it can be estimated (approximately) * from another data. @@ -790,26 +797,29 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, tcp_rsk(req)->snt_isn + 1)) return sk; - /* Also, it would be not so bad idea to check rcv_tsecr, which - * is essentially ACK extension and too early or too late values - * should cause reset in unsynchronized states. - */ - /* RFC793: "first check sequence number". */ - if (paws_reject || !tcp_in_window(TCP_SKB_CB(skb)->seq, - TCP_SKB_CB(skb)->end_seq, - tcp_rsk(req)->rcv_nxt, - tcp_rsk(req)->rcv_nxt + - tcp_synack_window(req))) { + if (paws_reject || tsecr_reject || + !tcp_in_window(TCP_SKB_CB(skb)->seq, + TCP_SKB_CB(skb)->end_seq, + tcp_rsk(req)->rcv_nxt, + tcp_rsk(req)->rcv_nxt + + tcp_synack_window(req))) { /* Out of window: send ACK and drop. */ if (!(flg & TCP_FLAG_RST) && !tcp_oow_rate_limited(sock_net(sk), skb, LINUX_MIB_TCPACKSKIPPEDSYNRECV, &tcp_rsk(req)->last_oow_ack_time)) req->rsk_ops->send_ack(sk, skb, req); - if (paws_reject) + if (paws_reject) { + SKB_DR_SET(*drop_reason, TCP_RFC7323_PAWS); NET_INC_STATS(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED); + } else if (tsecr_reject) { + SKB_DR_SET(*drop_reason, TCP_RFC7323_TSECR); + NET_INC_STATS(sock_net(sk), LINUX_MIB_TSECRREJECTED); + } else { + SKB_DR_SET(*drop_reason, TCP_OVERWINDOW); + } return NULL; } @@ -879,6 +889,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, return inet_csk_complete_hashdance(sk, child, req, own_req); listen_overflow: + SKB_DR_SET(*drop_reason, TCP_LISTEN_OVERFLOW); if (sk != req->rsk_listener) __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMIGRATEREQFAILURE); @@ -908,7 +919,7 @@ embryonic_reset: } return NULL; } -EXPORT_SYMBOL(tcp_check_req); +EXPORT_IPV6_MOD(tcp_check_req); /* * Queue segment on the new socket if the new socket is active, @@ -950,4 +961,4 @@ enum skb_drop_reason tcp_child_process(struct sock *parent, struct sock *child, sock_put(child); return reason; } -EXPORT_SYMBOL(tcp_child_process); +EXPORT_IPV6_MOD(tcp_child_process); diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c index 2dfac79dc78b..934f777f29d3 100644 --- a/net/ipv4/tcp_offload.c +++ b/net/ipv4/tcp_offload.c @@ -142,6 +142,7 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb, struct sk_buff *gso_skb = skb; __sum16 newcheck; bool ooo_okay, copy_destructor; + bool ecn_cwr_mask; __wsum delta; th = tcp_hdr(skb); @@ -201,6 +202,8 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb, newcheck = ~csum_fold(csum_add(csum_unfold(th->check), delta)); + ecn_cwr_mask = !!(skb_shinfo(gso_skb)->gso_type & SKB_GSO_TCP_ACCECN); + while (skb->next) { th->fin = th->psh = 0; th->check = newcheck; @@ -220,7 +223,8 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb, th = tcp_hdr(skb); th->seq = htonl(seq); - th->cwr = 0; + + th->cwr &= ecn_cwr_mask; } /* Following permits TCP Small Queues to work well with GSO : @@ -328,7 +332,7 @@ struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb, th2 = tcp_hdr(p); flush = (__force int)(flags & TCP_FLAG_CWR); flush |= (__force int)((flags ^ tcp_flag_word(th2)) & - ~(TCP_FLAG_CWR | TCP_FLAG_FIN | TCP_FLAG_PSH)); + ~(TCP_FLAG_FIN | TCP_FLAG_PSH)); flush |= (__force int)(th->ack_seq ^ th2->ack_seq); for (i = sizeof(*th); i < thlen; i += 4) flush |= *(u32 *)((u8 *)th + i) ^ @@ -404,7 +408,7 @@ void tcp_gro_complete(struct sk_buff *skb) shinfo->gso_segs = NAPI_GRO_CB(skb)->count; if (th->cwr) - shinfo->gso_type |= SKB_GSO_TCP_ECN; + shinfo->gso_type |= SKB_GSO_TCP_ACCECN; } EXPORT_SYMBOL(tcp_gro_complete); @@ -428,7 +432,7 @@ static void tcp4_check_fraglist_gro(struct list_head *head, struct sk_buff *skb, inet_get_iif_sdif(skb, &iif, &sdif); iph = skb_gro_network_header(skb); - net = dev_net(skb->dev); + net = dev_net_rcu(skb->dev); sk = __inet_lookup_established(net, net->ipv4.tcp_death_row.hashinfo, iph->saddr, th->source, iph->daddr, ntohs(th->dest), diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index bc95d2a5924f..13295a59d22e 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -250,7 +250,7 @@ void tcp_select_initial_window(const struct sock *sk, int __space, __u32 mss, WRITE_ONCE(*__window_clamp, min_t(__u32, U16_MAX << (*rcv_wscale), window_clamp)); } -EXPORT_SYMBOL(tcp_select_initial_window); +EXPORT_IPV6_MOD(tcp_select_initial_window); /* Chose a new window to advertise, update state in tcp_sock for the * socket, and return result with RFC1323 scaling applied. The return @@ -325,7 +325,7 @@ static void tcp_ecn_send_synack(struct sock *sk, struct sk_buff *skb) const struct tcp_sock *tp = tcp_sk(sk); TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_CWR; - if (!(tp->ecn_flags & TCP_ECN_OK)) + if (tcp_ecn_disabled(tp)) TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_ECE; else if (tcp_ca_needs_ecn(sk) || tcp_bpf_ca_needs_ecn(sk)) @@ -351,7 +351,7 @@ static void tcp_ecn_send_syn(struct sock *sk, struct sk_buff *skb) if (use_ecn) { TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_ECE | TCPHDR_CWR; - tp->ecn_flags = TCP_ECN_OK; + tcp_ecn_mode_set(tp, TCP_ECN_MODE_RFC3168); if (tcp_ca_needs_ecn(sk) || bpf_needs_ecn) INET_ECN_xmit(sk); } @@ -381,7 +381,7 @@ static void tcp_ecn_send(struct sock *sk, struct sk_buff *skb, { struct tcp_sock *tp = tcp_sk(sk); - if (tp->ecn_flags & TCP_ECN_OK) { + if (tcp_ecn_mode_rfc3168(tp)) { /* Not-retransmitted data segment: set ECT and inject CWR. */ if (skb->len != tcp_header_len && !before(TCP_SKB_CB(skb)->seq, tp->snd_nxt)) { @@ -403,7 +403,7 @@ static void tcp_ecn_send(struct sock *sk, struct sk_buff *skb, /* Constructs common control bits of non-data skb. If SYN/FIN is present, * auto increment end seqno. */ -static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags) +static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u16 flags) { skb->ip_summed = CHECKSUM_PARTIAL; @@ -525,6 +525,7 @@ static void bpf_skops_hdr_opt_len(struct sock *sk, struct sk_buff *skb, sock_owned_by_me(sk); sock_ops.is_fullsock = 1; + sock_ops.is_locked_tcp_sock = 1; sock_ops.sk = sk; } @@ -570,6 +571,7 @@ static void bpf_skops_write_hdr_opt(struct sock *sk, struct sk_buff *skb, sock_owned_by_me(sk); sock_ops.is_fullsock = 1; + sock_ops.is_locked_tcp_sock = 1; sock_ops.sk = sk; } @@ -941,7 +943,13 @@ static unsigned int tcp_synack_options(const struct sock *sk, opts->options |= OPTION_TS; opts->tsval = tcp_skb_timestamp_ts(tcp_rsk(req)->req_usec_ts, skb) + tcp_rsk(req)->ts_off; - opts->tsecr = READ_ONCE(req->ts_recent); + if (!tcp_rsk(req)->snt_tsval_first) { + if (!opts->tsval) + opts->tsval = ~0U; + tcp_rsk(req)->snt_tsval_first = opts->tsval; + } + WRITE_ONCE(tcp_rsk(req)->snt_tsval_last, opts->tsval); + opts->tsecr = req->ts_recent; remaining -= TCPOLEN_TSTAMP_ALIGNED; } if (likely(ireq->sack_ok)) { @@ -1171,7 +1179,7 @@ void tcp_release_cb(struct sock *sk) if ((flags & TCPF_ACK_DEFERRED) && inet_csk_ack_scheduled(sk)) tcp_send_ack(sk); } -EXPORT_SYMBOL(tcp_release_cb); +EXPORT_IPV6_MOD(tcp_release_cb); void __init tcp_tasklet_init(void) { @@ -1387,7 +1395,7 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, th->seq = htonl(tcb->seq); th->ack_seq = htonl(rcv_nxt); *(((__be16 *)th) + 6) = htons(((tcp_header_size >> 2) << 12) | - tcb->tcp_flags); + (tcb->tcp_flags & TCPHDR_FLAGS_MASK)); th->check = 0; th->urg_ptr = 0; @@ -1608,8 +1616,8 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue, struct sk_buff *buff; int old_factor; long limit; + u16 flags; int nlen; - u8 flags; if (WARN_ON(len > skb->len)) return -EINVAL; @@ -1783,7 +1791,7 @@ int tcp_mtu_to_mss(struct sock *sk, int pmtu) return __tcp_mtu_to_mss(sk, pmtu) - (tcp_sk(sk)->tcp_header_len - sizeof(struct tcphdr)); } -EXPORT_SYMBOL(tcp_mtu_to_mss); +EXPORT_IPV6_MOD(tcp_mtu_to_mss); /* Inverse of above */ int tcp_mss_to_mtu(struct sock *sk, int mss) @@ -1813,7 +1821,6 @@ void tcp_mtup_init(struct sock *sk) if (icsk->icsk_mtup.enabled) icsk->icsk_mtup.probe_timestamp = tcp_jiffies32; } -EXPORT_SYMBOL(tcp_mtup_init); /* This function synchronize snd mss to current pmtu/exthdr set. @@ -1857,7 +1864,7 @@ unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu) return mss_now; } -EXPORT_SYMBOL(tcp_sync_mss); +EXPORT_IPV6_MOD(tcp_sync_mss); /* Compute the current effective MSS, taking SACKs and IP options, * and even PMTU discovery events into account. @@ -2164,7 +2171,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, { int nlen = skb->len - len; struct sk_buff *buff; - u8 flags; + u16 flags; /* All of a TSO frame must be composed of paged data. */ DEBUG_NET_WARN_ON_ONCE(skb->len != skb->data_len); @@ -2911,7 +2918,7 @@ bool tcp_schedule_loss_probe(struct sock *sk, bool advancing_rto) if (rto_delta_us > 0) timeout = min_t(u32, timeout, usecs_to_jiffies(rto_delta_us)); - tcp_reset_xmit_timer(sk, ICSK_TIME_LOSS_PROBE, timeout, TCP_RTO_MAX); + tcp_reset_xmit_timer(sk, ICSK_TIME_LOSS_PROBE, timeout, true); return true; } @@ -3545,8 +3552,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk) } if (rearm_timer) tcp_reset_xmit_timer(sk, ICSK_TIME_RETRANS, - inet_csk(sk)->icsk_rto, - TCP_RTO_MAX); + inet_csk(sk)->icsk_rto, true); } /* We allow to exceed memory limits for FIN packets to expedite @@ -3853,7 +3859,7 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, return skb; } -EXPORT_SYMBOL(tcp_make_synack); +EXPORT_IPV6_MOD(tcp_make_synack); static void tcp_ca_dst_init(struct sock *sk, const struct dst_entry *dst) { @@ -4163,8 +4169,8 @@ int tcp_connect(struct sock *sk) TCP_INC_STATS(sock_net(sk), TCP_MIB_ACTIVEOPENS); /* Timer for repeating the SYN until an answer. */ - inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, - inet_csk(sk)->icsk_rto, TCP_RTO_MAX); + tcp_reset_xmit_timer(sk, ICSK_TIME_RETRANS, + inet_csk(sk)->icsk_rto, false); return 0; } EXPORT_SYMBOL(tcp_connect); @@ -4173,7 +4179,7 @@ u32 tcp_delack_max(const struct sock *sk) { u32 delack_from_rto_min = max(tcp_rto_min(sk), 2) - 1; - return min(inet_csk(sk)->icsk_delack_max, delack_from_rto_min); + return min(READ_ONCE(inet_csk(sk)->icsk_delack_max), delack_from_rto_min); } /* Send out a delayed ack, the caller does the policy checking @@ -4219,22 +4225,21 @@ void tcp_send_delayed_ack(struct sock *sk) /* Use new timeout only if there wasn't a older one earlier. */ if (icsk->icsk_ack.pending & ICSK_ACK_TIMER) { /* If delack timer is about to expire, send ACK now. */ - if (time_before_eq(icsk->icsk_ack.timeout, jiffies + (ato >> 2))) { + if (time_before_eq(icsk_delack_timeout(icsk), jiffies + (ato >> 2))) { tcp_send_ack(sk); return; } - if (!time_before(timeout, icsk->icsk_ack.timeout)) - timeout = icsk->icsk_ack.timeout; + if (!time_before(timeout, icsk_delack_timeout(icsk))) + timeout = icsk_delack_timeout(icsk); } smp_store_release(&icsk->icsk_ack.pending, icsk->icsk_ack.pending | ICSK_ACK_SCHED | ICSK_ACK_TIMER); - icsk->icsk_ack.timeout = timeout; sk_reset_timer(sk, &icsk->icsk_delack_timer, timeout); } /* This routine sends an ack and also updates the window. */ -void __tcp_send_ack(struct sock *sk, u32 rcv_nxt) +void __tcp_send_ack(struct sock *sk, u32 rcv_nxt, u16 flags) { struct sk_buff *buff; @@ -4253,17 +4258,17 @@ void __tcp_send_ack(struct sock *sk, u32 rcv_nxt) unsigned long delay; delay = TCP_DELACK_MAX << icsk->icsk_ack.retry; - if (delay < TCP_RTO_MAX) + if (delay < tcp_rto_max(sk)) icsk->icsk_ack.retry++; inet_csk_schedule_ack(sk); icsk->icsk_ack.ato = TCP_ATO_MIN; - inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, delay, TCP_RTO_MAX); + tcp_reset_xmit_timer(sk, ICSK_TIME_DACK, delay, false); return; } /* Reserve space for headers and prepare control bits. */ skb_reserve(buff, MAX_TCP_HEADER); - tcp_init_nondata_skb(buff, tcp_acceptable_seq(sk), TCPHDR_ACK); + tcp_init_nondata_skb(buff, tcp_acceptable_seq(sk), TCPHDR_ACK | flags); /* We do not want pure acks influencing TCP Small Queues or fq/pacing * too much. @@ -4278,7 +4283,7 @@ EXPORT_SYMBOL_GPL(__tcp_send_ack); void tcp_send_ack(struct sock *sk) { - __tcp_send_ack(sk, tcp_sk(sk)->rcv_nxt); + __tcp_send_ack(sk, tcp_sk(sk)->rcv_nxt, 0); } /* This routine sends a packet with an out of date sequence @@ -4393,7 +4398,7 @@ void tcp_send_probe0(struct sock *sk) if (err <= 0) { if (icsk->icsk_backoff < READ_ONCE(net->ipv4.sysctl_tcp_retries2)) icsk->icsk_backoff++; - timeout = tcp_probe0_when(sk, TCP_RTO_MAX); + timeout = tcp_probe0_when(sk, tcp_rto_max(sk)); } else { /* If packet was not sent due to local congestion, * Let senders fight for local resources conservatively. @@ -4402,7 +4407,7 @@ void tcp_send_probe0(struct sock *sk) } timeout = tcp_clamp_probe0_to_user_timeout(sk, timeout); - tcp_reset_xmit_timer(sk, ICSK_TIME_PROBE0, timeout, TCP_RTO_MAX); + tcp_reset_xmit_timer(sk, ICSK_TIME_PROBE0, timeout, true); } int tcp_rtx_synack(const struct sock *sk, struct request_sock *req) @@ -4430,4 +4435,4 @@ int tcp_rtx_synack(const struct sock *sk, struct request_sock *req) } return res; } -EXPORT_SYMBOL(tcp_rtx_synack); +EXPORT_IPV6_MOD(tcp_rtx_synack); diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index b412ed88ccd9..e4c616bbd727 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -109,7 +109,7 @@ static int tcp_out_of_resources(struct sock *sk, bool do_reset) /* If peer does not open window for long time, or did not transmit * anything for long time, penalize it. */ - if ((s32)(tcp_jiffies32 - tp->lsndtime) > 2*TCP_RTO_MAX || !do_reset) + if ((s32)(tcp_jiffies32 - tp->lsndtime) > 2*tcp_rto_max(sk) || !do_reset) shift++; /* If some dubious ICMP arrived, penalize even more. */ @@ -189,12 +189,12 @@ static unsigned int tcp_model_timeout(struct sock *sk, { unsigned int linear_backoff_thresh, timeout; - linear_backoff_thresh = ilog2(TCP_RTO_MAX / rto_base); + linear_backoff_thresh = ilog2(tcp_rto_max(sk) / rto_base); if (boundary <= linear_backoff_thresh) timeout = ((2 << boundary) - 1) * rto_base; else timeout = ((2 << linear_backoff_thresh) - 1) * rto_base + - (boundary - linear_backoff_thresh) * TCP_RTO_MAX; + (boundary - linear_backoff_thresh) * tcp_rto_max(sk); return jiffies_to_msecs(timeout); } /** @@ -268,7 +268,7 @@ static int tcp_write_timeout(struct sock *sk) retry_until = READ_ONCE(net->ipv4.sysctl_tcp_retries2); if (sock_flag(sk, SOCK_DEAD)) { - const bool alive = icsk->icsk_rto < TCP_RTO_MAX; + const bool alive = icsk->icsk_rto < tcp_rto_max(sk); retry_until = tcp_orphan_retries(sk, alive); do_reset = alive || @@ -322,8 +322,9 @@ void tcp_delack_timer_handler(struct sock *sk) if (!(icsk->icsk_ack.pending & ICSK_ACK_TIMER)) return; - if (time_after(icsk->icsk_ack.timeout, jiffies)) { - sk_reset_timer(sk, &icsk->icsk_delack_timer, icsk->icsk_ack.timeout); + if (time_after(icsk_delack_timeout(icsk), jiffies)) { + sk_reset_timer(sk, &icsk->icsk_delack_timer, + icsk_delack_timeout(icsk)); return; } icsk->icsk_ack.pending &= ~ICSK_ACK_TIMER; @@ -416,7 +417,8 @@ static void tcp_probe_timer(struct sock *sk) } max_probes = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_retries2); if (sock_flag(sk, SOCK_DEAD)) { - const bool alive = inet_csk_rto_backoff(icsk, TCP_RTO_MAX) < TCP_RTO_MAX; + unsigned int rto_max = tcp_rto_max(sk); + const bool alive = inet_csk_rto_backoff(icsk, rto_max) < rto_max; max_probes = tcp_orphan_retries(sk, alive); if (!alive && icsk->icsk_backoff >= max_probes) @@ -481,8 +483,8 @@ static void tcp_fastopen_synack_timer(struct sock *sk, struct request_sock *req) tcp_update_rto_stats(sk); if (!tp->retrans_stamp) tp->retrans_stamp = tcp_time_stamp_ts(tp); - inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, - req->timeout << req->num_timeout, TCP_RTO_MAX); + tcp_reset_xmit_timer(sk, ICSK_TIME_RETRANS, + req->timeout << req->num_timeout, false); } static bool tcp_rtx_probe0_timed_out(const struct sock *sk, @@ -492,7 +494,7 @@ static bool tcp_rtx_probe0_timed_out(const struct sock *sk, const struct inet_connection_sock *icsk = inet_csk(sk); u32 user_timeout = READ_ONCE(icsk->icsk_user_timeout); const struct tcp_sock *tp = tcp_sk(sk); - int timeout = TCP_RTO_MAX * 2; + int timeout = tcp_rto_max(sk) * 2; s32 rcv_delta; if (user_timeout) { @@ -508,7 +510,7 @@ static bool tcp_rtx_probe0_timed_out(const struct sock *sk, * and tp->rcv_tstamp might very well have been written recently. * rcv_delta can thus be negative. */ - rcv_delta = icsk->icsk_timeout - tp->rcv_tstamp; + rcv_delta = icsk_timeout(icsk) - tp->rcv_tstamp; if (rcv_delta <= timeout) return false; @@ -626,9 +628,9 @@ void tcp_retransmit_timer(struct sock *sk) /* Retransmission failed because of local congestion, * Let senders fight for local resources conservatively. */ - inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, - TCP_RESOURCE_PROBE_INTERVAL, - TCP_RTO_MAX); + tcp_reset_xmit_timer(sk, ICSK_TIME_RETRANS, + TCP_RESOURCE_PROBE_INTERVAL, + false); goto out; } @@ -665,7 +667,7 @@ out_reset_timer: icsk->icsk_backoff = 0; icsk->icsk_rto = clamp(__tcp_set_rto(tp), tcp_rto_min(sk), - TCP_RTO_MAX); + tcp_rto_max(sk)); } else if (sk->sk_state != TCP_SYN_SENT || tp->total_rto > READ_ONCE(net->ipv4.sysctl_tcp_syn_linear_timeouts)) { @@ -673,10 +675,10 @@ out_reset_timer: * activated. */ icsk->icsk_backoff++; - icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX); + icsk->icsk_rto = min(icsk->icsk_rto << 1, tcp_rto_max(sk)); } - inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, - tcp_clamp_rto_to_user_timeout(sk), TCP_RTO_MAX); + tcp_reset_xmit_timer(sk, ICSK_TIME_RETRANS, + tcp_clamp_rto_to_user_timeout(sk), false); if (retransmits_timed_out(sk, READ_ONCE(net->ipv4.sysctl_tcp_retries1) + 1, 0)) __sk_dst_reset(sk); @@ -684,7 +686,8 @@ out:; } /* Called with bottom-half processing disabled. - Called by tcp_write_timer() */ + * Called by tcp_write_timer() and tcp_release_cb(). + */ void tcp_write_timer_handler(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); @@ -694,11 +697,11 @@ void tcp_write_timer_handler(struct sock *sk) !icsk->icsk_pending) return; - if (time_after(icsk->icsk_timeout, jiffies)) { - sk_reset_timer(sk, &icsk->icsk_retransmit_timer, icsk->icsk_timeout); + if (time_after(icsk_timeout(icsk), jiffies)) { + sk_reset_timer(sk, &icsk->icsk_retransmit_timer, + icsk_timeout(icsk)); return; } - tcp_mstamp_refresh(tcp_sk(sk)); event = icsk->icsk_pending; @@ -749,7 +752,17 @@ void tcp_syn_ack_timeout(const struct request_sock *req) __NET_INC_STATS(net, LINUX_MIB_TCPTIMEOUTS); } -EXPORT_SYMBOL(tcp_syn_ack_timeout); +EXPORT_IPV6_MOD(tcp_syn_ack_timeout); + +void tcp_reset_keepalive_timer(struct sock *sk, unsigned long len) +{ + sk_reset_timer(sk, &sk->sk_timer, jiffies + len); +} + +static void tcp_delete_keepalive_timer(struct sock *sk) +{ + sk_stop_timer(sk, &sk->sk_timer); +} void tcp_set_keepalive(struct sock *sk, int val) { @@ -757,14 +770,13 @@ void tcp_set_keepalive(struct sock *sk, int val) return; if (val && !sock_flag(sk, SOCK_KEEPOPEN)) - inet_csk_reset_keepalive_timer(sk, keepalive_time_when(tcp_sk(sk))); + tcp_reset_keepalive_timer(sk, keepalive_time_when(tcp_sk(sk))); else if (!val) - inet_csk_delete_keepalive_timer(sk); + tcp_delete_keepalive_timer(sk); } -EXPORT_SYMBOL_GPL(tcp_set_keepalive); - +EXPORT_IPV6_MOD_GPL(tcp_set_keepalive); -static void tcp_keepalive_timer (struct timer_list *t) +static void tcp_keepalive_timer(struct timer_list *t) { struct sock *sk = from_timer(sk, t, sk_timer); struct inet_connection_sock *icsk = inet_csk(sk); @@ -775,7 +787,7 @@ static void tcp_keepalive_timer (struct timer_list *t) bh_lock_sock(sk); if (sock_owned_by_user(sk)) { /* Try again later. */ - inet_csk_reset_keepalive_timer (sk, HZ/20); + tcp_reset_keepalive_timer(sk, HZ/20); goto out; } @@ -841,7 +853,7 @@ static void tcp_keepalive_timer (struct timer_list *t) } resched: - inet_csk_reset_keepalive_timer (sk, elapsed); + tcp_reset_keepalive_timer(sk, elapsed); goto out; death: @@ -884,11 +896,9 @@ void tcp_init_xmit_timers(struct sock *sk) { inet_csk_init_xmit_timers(sk, &tcp_write_timer, &tcp_delack_timer, &tcp_keepalive_timer); - hrtimer_init(&tcp_sk(sk)->pacing_timer, CLOCK_MONOTONIC, - HRTIMER_MODE_ABS_PINNED_SOFT); - tcp_sk(sk)->pacing_timer.function = tcp_pace_kick; + hrtimer_setup(&tcp_sk(sk)->pacing_timer, tcp_pace_kick, CLOCK_MONOTONIC, + HRTIMER_MODE_ABS_PINNED_SOFT); - hrtimer_init(&tcp_sk(sk)->compressed_ack_timer, CLOCK_MONOTONIC, - HRTIMER_MODE_REL_PINNED_SOFT); - tcp_sk(sk)->compressed_ack_timer.function = tcp_compressed_ack_kick; + hrtimer_setup(&tcp_sk(sk)->compressed_ack_timer, tcp_compressed_ack_kick, CLOCK_MONOTONIC, + HRTIMER_MODE_REL_PINNED_SOFT); } diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index a9bb9ce5438e..d0bffcfa56d8 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -121,13 +121,12 @@ #endif struct udp_table udp_table __read_mostly; -EXPORT_SYMBOL(udp_table); long sysctl_udp_mem[3] __read_mostly; -EXPORT_SYMBOL(sysctl_udp_mem); +EXPORT_IPV6_MOD(sysctl_udp_mem); atomic_long_t udp_memory_allocated ____cacheline_aligned_in_smp; -EXPORT_SYMBOL(udp_memory_allocated); +EXPORT_IPV6_MOD(udp_memory_allocated); DEFINE_PER_CPU(int, udp_memory_per_cpu_fw_alloc); EXPORT_PER_CPU_SYMBOL_GPL(udp_memory_per_cpu_fw_alloc); @@ -352,7 +351,7 @@ fail_unlock: fail: return error; } -EXPORT_SYMBOL(udp_lib_get_port); +EXPORT_IPV6_MOD(udp_lib_get_port); int udp_v4_get_port(struct sock *sk, unsigned short snum) { @@ -418,7 +417,7 @@ u32 udp_ehashfn(const struct net *net, const __be32 laddr, const __u16 lport, return __inet_ehashfn(laddr, lport, faddr, fport, udp_ehash_secret + net_hash_mix(net)); } -EXPORT_SYMBOL(udp_ehashfn); +EXPORT_IPV6_MOD(udp_ehashfn); /** * udp4_lib_lookup1() - Simplified lookup using primary hash (destination port) @@ -653,7 +652,7 @@ void udp_lib_hash4(struct sock *sk, u16 hash) spin_unlock_bh(&hslot->lock); } -EXPORT_SYMBOL(udp_lib_hash4); +EXPORT_IPV6_MOD(udp_lib_hash4); /* call with sock lock */ void udp4_hash4(struct sock *sk) @@ -669,7 +668,7 @@ void udp4_hash4(struct sock *sk) udp_lib_hash4(sk, hash); } -EXPORT_SYMBOL(udp4_hash4); +EXPORT_IPV6_MOD(udp4_hash4); #endif /* CONFIG_BASE_SMALL */ /* UDP is nearly always wildcards out the wazoo, it makes no sense to try @@ -809,11 +808,11 @@ static inline bool __udp_is_mcast_sock(struct net *net, const struct sock *sk, } DEFINE_STATIC_KEY_FALSE(udp_encap_needed_key); -EXPORT_SYMBOL(udp_encap_needed_key); +EXPORT_IPV6_MOD(udp_encap_needed_key); #if IS_ENABLED(CONFIG_IPV6) DEFINE_STATIC_KEY_FALSE(udpv6_encap_needed_key); -EXPORT_SYMBOL(udpv6_encap_needed_key); +EXPORT_IPV6_MOD(udpv6_encap_needed_key); #endif void udp_encap_enable(void) @@ -1041,7 +1040,7 @@ void udp_flush_pending_frames(struct sock *sk) ip_flush_pending_frames(sk); } } -EXPORT_SYMBOL(udp_flush_pending_frames); +EXPORT_IPV6_MOD(udp_flush_pending_frames); /** * udp4_hwcsum - handle outgoing HW checksumming @@ -1229,7 +1228,7 @@ out: WRITE_ONCE(up->pending, 0); return err; } -EXPORT_SYMBOL(udp_push_pending_frames); +EXPORT_IPV6_MOD(udp_push_pending_frames); static int __udp_cmsg_send(struct cmsghdr *cmsg, u16 *gso_size) { @@ -1266,7 +1265,7 @@ int udp_cmsg_send(struct sock *sk, struct msghdr *msg, u16 *gso_size) return need_ip; } -EXPORT_SYMBOL_GPL(udp_cmsg_send); +EXPORT_IPV6_MOD_GPL(udp_cmsg_send); int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) { @@ -1281,7 +1280,7 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) int free = 0; int connected = 0; __be32 daddr, faddr, saddr; - u8 tos, scope; + u8 scope; __be16 dport; int err, is_udplite = IS_UDPLITE(sk); int corkreq = udp_test_bit(CORK, sk) || msg->msg_flags & MSG_MORE; @@ -1405,7 +1404,6 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) faddr = ipc.opt->opt.faddr; connected = 0; } - tos = get_rttos(&ipc, inet); scope = ip_sendmsg_scope(inet, &ipc, msg); if (scope == RT_SCOPE_LINK) connected = 0; @@ -1442,7 +1440,8 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) fl4 = &fl4_stack; - flowi4_init_output(fl4, ipc.oif, ipc.sockc.mark, tos, scope, + flowi4_init_output(fl4, ipc.oif, ipc.sockc.mark, + ipc.tos & INET_DSCP_MASK, scope, sk->sk_protocol, flow_flags, faddr, saddr, dport, inet->inet_sport, sk->sk_uid); @@ -1561,7 +1560,7 @@ void udp_splice_eof(struct socket *sock) udp_push_pending_frames(sk); release_sock(sk); } -EXPORT_SYMBOL_GPL(udp_splice_eof); +EXPORT_IPV6_MOD_GPL(udp_splice_eof); #define UDP_SKB_IS_STATELESS 0x80000000 @@ -1678,7 +1677,7 @@ void udp_skb_destructor(struct sock *sk, struct sk_buff *skb) prefetch(&skb->data); udp_rmem_release(sk, udp_skb_truesize(skb), 1, false); } -EXPORT_SYMBOL(udp_skb_destructor); +EXPORT_IPV6_MOD(udp_skb_destructor); /* as above, but the caller held the rx queue lock, too */ static void udp_skb_dtor_locked(struct sock *sk, struct sk_buff *skb) @@ -1785,7 +1784,7 @@ drop: busylock_release(busy); return err; } -EXPORT_SYMBOL_GPL(__udp_enqueue_schedule_skb); +EXPORT_IPV6_MOD_GPL(__udp_enqueue_schedule_skb); void udp_destruct_common(struct sock *sk) { @@ -1801,7 +1800,7 @@ void udp_destruct_common(struct sock *sk) } udp_rmem_release(sk, total, 0, true); } -EXPORT_SYMBOL_GPL(udp_destruct_common); +EXPORT_IPV6_MOD_GPL(udp_destruct_common); static void udp_destruct_sock(struct sock *sk) { @@ -1832,7 +1831,7 @@ void skb_consume_udp(struct sock *sk, struct sk_buff *skb, int len) skb_release_head_state(skb); __consume_stateless_skb(skb); } -EXPORT_SYMBOL_GPL(skb_consume_udp); +EXPORT_IPV6_MOD_GPL(skb_consume_udp); static struct sk_buff *__first_packet_length(struct sock *sk, struct sk_buff_head *rcvq, @@ -1849,7 +1848,7 @@ static struct sk_buff *__first_packet_length(struct sock *sk, atomic_inc(&sk->sk_drops); __skb_unlink(skb, rcvq); *total += skb->truesize; - kfree_skb(skb); + kfree_skb_reason(skb, SKB_DROP_REASON_UDP_CSUM); } else { udp_skb_csum_unnecessary_set(skb); break; @@ -1914,7 +1913,7 @@ int udp_ioctl(struct sock *sk, int cmd, int *karg) return 0; } -EXPORT_SYMBOL(udp_ioctl); +EXPORT_IPV6_MOD(udp_ioctl); struct sk_buff *__skb_recv_udp(struct sock *sk, unsigned int flags, int *off, int *err) @@ -2003,14 +2002,14 @@ try_again: __UDP_INC_STATS(net, UDP_MIB_CSUMERRORS, is_udplite); __UDP_INC_STATS(net, UDP_MIB_INERRORS, is_udplite); atomic_inc(&sk->sk_drops); - kfree_skb(skb); + kfree_skb_reason(skb, SKB_DROP_REASON_UDP_CSUM); goto try_again; } WARN_ON_ONCE(!skb_set_owner_sk_safe(skb, sk)); return recv_actor(sk, skb); } -EXPORT_SYMBOL(udp_read_skb); +EXPORT_IPV6_MOD(udp_read_skb); /* * This should be easy, if there is something there we @@ -2118,7 +2117,7 @@ csum_copy_err: UDP_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite); UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite); } - kfree_skb(skb); + kfree_skb_reason(skb, SKB_DROP_REASON_UDP_CSUM); /* starting over for a new packet, but check if we need to yield */ cond_resched(); @@ -2137,7 +2136,7 @@ int udp_pre_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) return BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr, &addr_len); } -EXPORT_SYMBOL(udp_pre_connect); +EXPORT_IPV6_MOD(udp_pre_connect); static int udp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) { @@ -2186,7 +2185,7 @@ int udp_disconnect(struct sock *sk, int flags) release_sock(sk); return 0; } -EXPORT_SYMBOL(udp_disconnect); +EXPORT_IPV6_MOD(udp_disconnect); void udp_lib_unhash(struct sock *sk) { @@ -2216,7 +2215,7 @@ void udp_lib_unhash(struct sock *sk) spin_unlock_bh(&hslot->lock); } } -EXPORT_SYMBOL(udp_lib_unhash); +EXPORT_IPV6_MOD(udp_lib_unhash); /* * inet_rcv_saddr was changed, we must rehash secondary hash @@ -2280,7 +2279,7 @@ void udp_lib_rehash(struct sock *sk, u16 newhash, u16 newhash4) } } } -EXPORT_SYMBOL(udp_lib_rehash); +EXPORT_IPV6_MOD(udp_lib_rehash); void udp_v4_rehash(struct sock *sk) { @@ -2485,7 +2484,7 @@ bool udp_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst) } return false; } -EXPORT_SYMBOL(udp_sk_rx_dst_set); +EXPORT_IPV6_MOD(udp_sk_rx_dst_set); /* * Multicasts and broadcasts go to each listener. @@ -3041,7 +3040,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, return err; } -EXPORT_SYMBOL(udp_lib_setsockopt); +EXPORT_IPV6_MOD(udp_lib_setsockopt); int udp_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, unsigned int optlen) @@ -3112,7 +3111,7 @@ int udp_lib_getsockopt(struct sock *sk, int level, int optname, return -EFAULT; return 0; } -EXPORT_SYMBOL(udp_lib_getsockopt); +EXPORT_IPV6_MOD(udp_lib_getsockopt); int udp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen) @@ -3154,7 +3153,7 @@ __poll_t udp_poll(struct file *file, struct socket *sock, poll_table *wait) return mask; } -EXPORT_SYMBOL(udp_poll); +EXPORT_IPV6_MOD(udp_poll); int udp_abort(struct sock *sk, int err) { @@ -3177,7 +3176,7 @@ out: return 0; } -EXPORT_SYMBOL_GPL(udp_abort); +EXPORT_IPV6_MOD_GPL(udp_abort); struct proto udp_prot = { .name = "UDP", @@ -3311,7 +3310,7 @@ void *udp_seq_start(struct seq_file *seq, loff_t *pos) return *pos ? udp_get_idx(seq, *pos-1) : SEQ_START_TOKEN; } -EXPORT_SYMBOL(udp_seq_start); +EXPORT_IPV6_MOD(udp_seq_start); void *udp_seq_next(struct seq_file *seq, void *v, loff_t *pos) { @@ -3325,7 +3324,7 @@ void *udp_seq_next(struct seq_file *seq, void *v, loff_t *pos) ++*pos; return sk; } -EXPORT_SYMBOL(udp_seq_next); +EXPORT_IPV6_MOD(udp_seq_next); void udp_seq_stop(struct seq_file *seq, void *v) { @@ -3337,7 +3336,7 @@ void udp_seq_stop(struct seq_file *seq, void *v) if (state->bucket <= udptable->mask) spin_unlock_bh(&udptable->hash[state->bucket].lock); } -EXPORT_SYMBOL(udp_seq_stop); +EXPORT_IPV6_MOD(udp_seq_stop); /* ------------------------------------------------------------------------ */ static void udp4_format_sock(struct sock *sp, struct seq_file *f, @@ -3616,7 +3615,7 @@ const struct seq_operations udp_seq_ops = { .stop = udp_seq_stop, .show = udp4_seq_show, }; -EXPORT_SYMBOL(udp_seq_ops); +EXPORT_IPV6_MOD(udp_seq_ops); static struct udp_seq_afinfo udp4_seq_afinfo = { .family = AF_INET, diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index ecfca59f31f1..2c0725583be3 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -634,7 +634,7 @@ static struct sock *udp4_gro_lookup_skb(struct sk_buff *skb, __be16 sport, __be16 dport) { const struct iphdr *iph = skb_gro_network_header(skb); - struct net *net = dev_net(skb->dev); + struct net *net = dev_net_rcu(skb->dev); int iif, sdif; inet_get_iif_sdif(skb, &iif, &sdif); diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 6789623b2b0d..457de0745a33 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -1204,10 +1204,9 @@ ipv6_dup_options(struct sock *sk, struct ipv6_txoptions *opt) { struct ipv6_txoptions *opt2; - opt2 = sock_kmalloc(sk, opt->tot_len, GFP_ATOMIC); + opt2 = sock_kmemdup(sk, opt, opt->tot_len, GFP_ATOMIC); if (opt2) { long dif = (char *)opt2 - (char *)opt; - memcpy(opt2, opt, opt->tot_len); if (opt2->hopopt) *((char **)&opt2->hopopt) += dif; if (opt2->dst0opt) diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index 67d39114d9a6..fd5f7112a51f 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -29,6 +29,7 @@ struct fib6_rule { __be32 flowlabel; __be32 flowlabel_mask; dscp_t dscp; + dscp_t dscp_mask; u8 dscp_full:1; /* DSCP or TOS selector */ }; @@ -331,7 +332,7 @@ INDIRECT_CALLABLE_SCOPE int fib6_rule_match(struct fib_rule *rule, return 0; } - if (r->dscp && r->dscp != ip6_dscp(fl6->flowlabel)) + if ((r->dscp ^ ip6_dscp(fl6->flowlabel)) & r->dscp_mask) return 0; if ((r->flowlabel ^ flowi6_get_flowlabel(fl6)) & r->flowlabel_mask) @@ -340,12 +341,12 @@ INDIRECT_CALLABLE_SCOPE int fib6_rule_match(struct fib_rule *rule, if (rule->ip_proto && (rule->ip_proto != fl6->flowi6_proto)) return 0; - if (fib_rule_port_range_set(&rule->sport_range) && - !fib_rule_port_inrange(&rule->sport_range, fl6->fl6_sport)) + if (!fib_rule_port_match(&rule->sport_range, rule->sport_mask, + fl6->fl6_sport)) return 0; - if (fib_rule_port_range_set(&rule->dport_range) && - !fib_rule_port_inrange(&rule->dport_range, fl6->fl6_dport)) + if (!fib_rule_port_match(&rule->dport_range, rule->dport_mask, + fl6->fl6_dport)) return 0; return 1; @@ -360,11 +361,35 @@ static int fib6_nl2rule_dscp(const struct nlattr *nla, struct fib6_rule *rule6, } rule6->dscp = inet_dsfield_to_dscp(nla_get_u8(nla) << 2); + rule6->dscp_mask = inet_dsfield_to_dscp(INET_DSCP_MASK); rule6->dscp_full = true; return 0; } +static int fib6_nl2rule_dscp_mask(const struct nlattr *nla, + struct fib6_rule *rule6, + struct netlink_ext_ack *extack) +{ + dscp_t dscp_mask; + + if (!rule6->dscp_full) { + NL_SET_ERR_MSG_ATTR(extack, nla, + "Cannot specify DSCP mask without DSCP value"); + return -EINVAL; + } + + dscp_mask = inet_dsfield_to_dscp(nla_get_u8(nla) << 2); + if (rule6->dscp & ~dscp_mask) { + NL_SET_ERR_MSG_ATTR(extack, nla, "Invalid DSCP mask"); + return -EINVAL; + } + + rule6->dscp_mask = dscp_mask; + + return 0; +} + static int fib6_nl2rule_flowlabel(struct nlattr **tb, struct fib6_rule *rule6, struct netlink_ext_ack *extack) { @@ -399,9 +424,9 @@ static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb, struct nlattr **tb, struct netlink_ext_ack *extack) { + struct fib6_rule *rule6 = (struct fib6_rule *)rule; + struct net *net = rule->fr_net; int err = -EINVAL; - struct net *net = sock_net(skb->sk); - struct fib6_rule *rule6 = (struct fib6_rule *) rule; if (!inet_validate_dscp(frh->tos)) { NL_SET_ERR_MSG(extack, @@ -409,10 +434,15 @@ static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb, goto errout; } rule6->dscp = inet_dsfield_to_dscp(frh->tos); + rule6->dscp_mask = frh->tos ? inet_dsfield_to_dscp(INET_DSCP_MASK) : 0; if (tb[FRA_DSCP] && fib6_nl2rule_dscp(tb[FRA_DSCP], rule6, extack) < 0) goto errout; + if (tb[FRA_DSCP_MASK] && + fib6_nl2rule_dscp_mask(tb[FRA_DSCP_MASK], rule6, extack) < 0) + goto errout; + if ((tb[FRA_FLOWLABEL] || tb[FRA_FLOWLABEL_MASK]) && fib6_nl2rule_flowlabel(tb, rule6, extack) < 0) goto errout; @@ -482,6 +512,14 @@ static int fib6_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh, return 0; } + if (tb[FRA_DSCP_MASK]) { + dscp_t dscp_mask; + + dscp_mask = inet_dsfield_to_dscp(nla_get_u8(tb[FRA_DSCP_MASK]) << 2); + if (!rule6->dscp_full || rule6->dscp_mask != dscp_mask) + return 0; + } + if (tb[FRA_FLOWLABEL] && nla_get_be32(tb[FRA_FLOWLABEL]) != rule6->flowlabel) return 0; @@ -512,7 +550,9 @@ static int fib6_rule_fill(struct fib_rule *rule, struct sk_buff *skb, if (rule6->dscp_full) { frh->tos = 0; if (nla_put_u8(skb, FRA_DSCP, - inet_dscp_to_dsfield(rule6->dscp) >> 2)) + inet_dscp_to_dsfield(rule6->dscp) >> 2) || + nla_put_u8(skb, FRA_DSCP_MASK, + inet_dscp_to_dsfield(rule6->dscp_mask) >> 2)) goto nla_put_failure; } else { frh->tos = inet_dscp_to_dsfield(rule6->dscp); @@ -539,6 +579,7 @@ static size_t fib6_rule_nlmsg_payload(struct fib_rule *rule) return nla_total_size(16) /* dst */ + nla_total_size(16) /* src */ + nla_total_size(1) /* dscp */ + + nla_total_size(1) /* dscp mask */ + nla_total_size(4) /* flowlabel */ + nla_total_size(4); /* flowlabel mask */ } diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 4d14ab7f7e99..3fd19a84b358 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -957,12 +957,9 @@ static int icmpv6_rcv(struct sk_buff *skb) break; case ICMPV6_ECHO_REPLY: - reason = ping_rcv(skb); - break; - case ICMPV6_EXT_ECHO_REPLY: - reason = ping_rcv(skb); - break; + ping_rcv(skb); + return 0; case ICMPV6_PKT_TOOBIG: /* BUGGG_FUTURE: if packet contains rthdr, we cannot update diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index 80043e46117c..dbcf556a35bb 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -56,20 +56,6 @@ struct dst_entry *inet6_csk_route_req(const struct sock *sk, } EXPORT_SYMBOL(inet6_csk_route_req); -void inet6_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr) -{ - struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) uaddr; - - sin6->sin6_family = AF_INET6; - sin6->sin6_addr = sk->sk_v6_daddr; - sin6->sin6_port = inet_sk(sk)->inet_dport; - /* We do not store received flowlabel for TCP */ - sin6->sin6_flowinfo = 0; - sin6->sin6_scope_id = ipv6_iface_scope_id(&sin6->sin6_addr, - sk->sk_bound_dev_if); -} -EXPORT_SYMBOL_GPL(inet6_csk_addr2sockaddr); - static inline struct dst_entry *__inet6_csk_dst_check(struct sock *sk, u32 cookie) { diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index 9ec05e354baa..76ee521189eb 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -35,8 +35,8 @@ u32 inet6_ehashfn(const struct net *net, lhash = (__force u32)laddr->s6_addr32[3]; fhash = __ipv6_addr_jhash(faddr, tcp_ipv6_hash_secret); - return __inet6_ehashfn(lhash, lport, fhash, fport, - inet6_ehash_secret + net_hash_mix(net)); + return lport + __inet6_ehashfn(lhash, 0, fhash, fport, + inet6_ehash_secret + net_hash_mix(net)); } EXPORT_SYMBOL_GPL(inet6_ehashfn); @@ -263,7 +263,9 @@ EXPORT_SYMBOL_GPL(inet6_lookup); static int __inet6_check_established(struct inet_timewait_death_row *death_row, struct sock *sk, const __u16 lport, - struct inet_timewait_sock **twp) + struct inet_timewait_sock **twp, + bool rcu_lookup, + u32 hash) { struct inet_hashinfo *hinfo = death_row->hashinfo; struct inet_sock *inet = inet_sk(sk); @@ -273,14 +275,26 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row, struct net *net = sock_net(sk); const int sdif = l3mdev_master_ifindex_by_index(net, dif); const __portpair ports = INET_COMBINED_PORTS(inet->inet_dport, lport); - const unsigned int hash = inet6_ehashfn(net, daddr, lport, saddr, - inet->inet_dport); struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash); - spinlock_t *lock = inet_ehash_lockp(hinfo, hash); - struct sock *sk2; - const struct hlist_nulls_node *node; struct inet_timewait_sock *tw = NULL; + const struct hlist_nulls_node *node; + struct sock *sk2; + spinlock_t *lock; + + if (rcu_lookup) { + sk_nulls_for_each(sk2, node, &head->chain) { + if (sk2->sk_hash != hash || + !inet6_match(net, sk2, saddr, daddr, + ports, dif, sdif)) + continue; + if (sk2->sk_state == TCP_TIME_WAIT) + break; + return -EADDRNOTAVAIL; + } + return 0; + } + lock = inet_ehash_lockp(hinfo, hash); spin_lock(lock); sk_nulls_for_each(sk2, node, &head->chain) { @@ -339,11 +353,19 @@ static u64 inet6_sk_port_offset(const struct sock *sk) int inet6_hash_connect(struct inet_timewait_death_row *death_row, struct sock *sk) { + const struct in6_addr *daddr = &sk->sk_v6_rcv_saddr; + const struct in6_addr *saddr = &sk->sk_v6_daddr; + const struct inet_sock *inet = inet_sk(sk); + const struct net *net = sock_net(sk); u64 port_offset = 0; + u32 hash_port0; if (!inet_sk(sk)->inet_num) port_offset = inet6_sk_port_offset(sk); - return __inet_hash_connect(death_row, sk, port_offset, + + hash_port0 = inet6_ehashfn(net, daddr, 0, saddr, inet->inet_dport); + + return __inet_hash_connect(death_row, sk, port_offset, hash_port0, __inet6_check_established); } EXPORT_SYMBOL_GPL(inet6_hash_connect); diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 235808cfec70..957ca98fa70f 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -43,6 +43,7 @@ #include <net/xfrm.h> #include <net/net_namespace.h> #include <net/netns/generic.h> +#include <net/netdev_lock.h> #include <net/rtnetlink.h> #include <net/ipv6.h> @@ -1498,7 +1499,6 @@ static int ip6gre_tunnel_init_common(struct net_device *dev) tunnel = netdev_priv(dev); tunnel->dev = dev; - tunnel->net = dev_net(dev); strcpy(tunnel->parms.name, dev->name); ret = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL); @@ -1621,7 +1621,7 @@ static int __net_init ip6gre_init_net(struct net *net) /* FB netdevice is special: we have one, and only one per netns. * Allowing to move it to another netns is clearly unsafe. */ - ign->fb_tunnel_dev->netns_local = true; + ign->fb_tunnel_dev->netns_immutable = true; ip6gre_fb_tunnel_init(ign->fb_tunnel_dev); ign->fb_tunnel_dev->rtnl_link_ops = &ip6gre_link_ops; @@ -1882,7 +1882,6 @@ static int ip6erspan_tap_init(struct net_device *dev) tunnel = netdev_priv(dev); tunnel->dev = dev; - tunnel->net = dev_net(dev); strcpy(tunnel->parms.name, dev->name); ret = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL); @@ -1971,7 +1970,7 @@ static bool ip6gre_netlink_encap_parms(struct nlattr *data[], return ret; } -static int ip6gre_newlink_common(struct net *src_net, struct net_device *dev, +static int ip6gre_newlink_common(struct net *link_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { @@ -1992,7 +1991,7 @@ static int ip6gre_newlink_common(struct net *src_net, struct net_device *dev, eth_hw_addr_random(dev); nt->dev = dev; - nt->net = dev_net(dev); + nt->net = link_net; err = register_netdevice(dev); if (err) @@ -2005,12 +2004,14 @@ out: return err; } -static int ip6gre_newlink(struct net *src_net, struct net_device *dev, - struct nlattr *tb[], struct nlattr *data[], +static int ip6gre_newlink(struct net_device *dev, + struct rtnl_newlink_params *params, struct netlink_ext_ack *extack) { + struct net *net = params->link_net ? : dev_net(dev); struct ip6_tnl *nt = netdev_priv(dev); - struct net *net = dev_net(dev); + struct nlattr **data = params->data; + struct nlattr **tb = params->tb; struct ip6gre_net *ign; int err; @@ -2025,7 +2026,7 @@ static int ip6gre_newlink(struct net *src_net, struct net_device *dev, return -EEXIST; } - err = ip6gre_newlink_common(src_net, dev, tb, data, extack); + err = ip6gre_newlink_common(net, dev, tb, data, extack); if (!err) { ip6gre_tnl_link_config(nt, !tb[IFLA_MTU]); ip6gre_tunnel_link_md(ign, nt); @@ -2241,12 +2242,14 @@ static void ip6erspan_tap_setup(struct net_device *dev) netif_keep_dst(dev); } -static int ip6erspan_newlink(struct net *src_net, struct net_device *dev, - struct nlattr *tb[], struct nlattr *data[], +static int ip6erspan_newlink(struct net_device *dev, + struct rtnl_newlink_params *params, struct netlink_ext_ack *extack) { + struct net *net = params->link_net ? : dev_net(dev); struct ip6_tnl *nt = netdev_priv(dev); - struct net *net = dev_net(dev); + struct nlattr **data = params->data; + struct nlattr **tb = params->tb; struct ip6gre_net *ign; int err; @@ -2262,7 +2265,7 @@ static int ip6erspan_newlink(struct net *src_net, struct net_device *dev, return -EEXIST; } - err = ip6gre_newlink_common(src_net, dev, tb, data, extack); + err = ip6gre_newlink_common(net, dev, tb, data, extack); if (!err) { ip6erspan_tnl_link_config(nt, !tb[IFLA_MTU]); ip6erspan_tunnel_link_md(ign, nt); diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index d577bf2f3053..581bc6289081 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1386,6 +1386,7 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork, } v6_cork->hop_limit = ipc6->hlimit; v6_cork->tclass = ipc6->tclass; + v6_cork->dontfrag = ipc6->dontfrag; if (rt->dst.flags & DST_XFRM_TUNNEL) mtu = READ_ONCE(np->pmtudisc) >= IPV6_PMTUDISC_PROBE ? READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst); @@ -1421,7 +1422,7 @@ static int __ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb), void *from, size_t length, int transhdrlen, - unsigned int flags, struct ipcm6_cookie *ipc6) + unsigned int flags) { struct sk_buff *skb, *skb_prev = NULL; struct inet_cork *cork = &cork_full->base; @@ -1475,7 +1476,7 @@ static int __ip6_append_data(struct sock *sk, if (headersize + transhdrlen > mtu) goto emsgsize; - if (cork->length + length > mtu - headersize && ipc6->dontfrag && + if (cork->length + length > mtu - headersize && v6_cork->dontfrag && (sk->sk_protocol == IPPROTO_UDP || sk->sk_protocol == IPPROTO_ICMPV6 || sk->sk_protocol == IPPROTO_RAW)) { @@ -1855,7 +1856,7 @@ int ip6_append_data(struct sock *sk, return __ip6_append_data(sk, &sk->sk_write_queue, &inet->cork, &np->cork, sk_page_frag(sk), getfrag, - from, length, transhdrlen, flags, ipc6); + from, length, transhdrlen, flags); } EXPORT_SYMBOL_GPL(ip6_append_data); @@ -2054,13 +2055,11 @@ struct sk_buff *ip6_make_skb(struct sock *sk, ip6_cork_release(cork, &v6_cork); return ERR_PTR(err); } - if (ipc6->dontfrag < 0) - ipc6->dontfrag = inet6_test_bit(DONTFRAG, sk); err = __ip6_append_data(sk, &queue, cork, &v6_cork, ¤t->task_frag, getfrag, from, length + exthdrlen, transhdrlen + exthdrlen, - flags, ipc6); + flags); if (err) { __ip6_flush_pending_frames(sk, &queue, cork, &v6_cork); return ERR_PTR(err); diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 48fd53b98972..a04dd1bb4b19 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -52,6 +52,7 @@ #include <net/inet_ecn.h> #include <net/net_namespace.h> #include <net/netns/generic.h> +#include <net/netdev_lock.h> #include <net/dst_metadata.h> #include <net/inet_dscp.h> @@ -253,8 +254,7 @@ static void ip6_dev_free(struct net_device *dev) static int ip6_tnl_create2(struct net_device *dev) { struct ip6_tnl *t = netdev_priv(dev); - struct net *net = dev_net(dev); - struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); + struct ip6_tnl_net *ip6n = net_generic(t->net, ip6_tnl_net_id); int err; dev->rtnl_link_ops = &ip6_link_ops; @@ -1878,7 +1878,6 @@ ip6_tnl_dev_init_gen(struct net_device *dev) int t_hlen; t->dev = dev; - t->net = dev_net(dev); ret = dst_cache_init(&t->dst_cache, GFP_KERNEL); if (ret) @@ -1940,6 +1939,7 @@ static int __net_init ip6_fb_tnl_dev_init(struct net_device *dev) struct net *net = dev_net(dev); struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); + t->net = net; t->parms.proto = IPPROTO_IPV6; rcu_assign_pointer(ip6n->tnls_wc[0], t); @@ -2002,17 +2002,22 @@ static void ip6_tnl_netlink_parms(struct nlattr *data[], parms->fwmark = nla_get_u32(data[IFLA_IPTUN_FWMARK]); } -static int ip6_tnl_newlink(struct net *src_net, struct net_device *dev, - struct nlattr *tb[], struct nlattr *data[], +static int ip6_tnl_newlink(struct net_device *dev, + struct rtnl_newlink_params *params, struct netlink_ext_ack *extack) { - struct net *net = dev_net(dev); - struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); + struct nlattr **data = params->data; + struct nlattr **tb = params->tb; struct ip_tunnel_encap ipencap; + struct ip6_tnl_net *ip6n; struct ip6_tnl *nt, *t; + struct net *net; int err; + net = params->link_net ? : dev_net(dev); + ip6n = net_generic(net, ip6_tnl_net_id); nt = netdev_priv(dev); + nt->net = net; if (ip_tunnel_netlink_encap_parms(data, &ipencap)) { err = ip6_tnl_encap_setup(nt, &ipencap); @@ -2261,7 +2266,7 @@ static int __net_init ip6_tnl_init_net(struct net *net) /* FB netdevice is special: we have one, and only one per netns. * Allowing to move it to another netns is clearly unsafe. */ - ip6n->fb_tnl_dev->netns_local = true; + ip6n->fb_tnl_dev->netns_immutable = true; err = ip6_fb_tnl_dev_init(ip6n->fb_tnl_dev); if (err < 0) diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 590737c27537..09ec4b0ad7dc 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -45,6 +45,7 @@ #include <net/xfrm.h> #include <net/net_namespace.h> #include <net/netns/generic.h> +#include <net/netdev_lock.h> #include <linux/etherdevice.h> #define IP6_VTI_HASH_SIZE_SHIFT 5 @@ -177,8 +178,7 @@ vti6_tnl_unlink(struct vti6_net *ip6n, struct ip6_tnl *t) static int vti6_tnl_create2(struct net_device *dev) { struct ip6_tnl *t = netdev_priv(dev); - struct net *net = dev_net(dev); - struct vti6_net *ip6n = net_generic(net, vti6_net_id); + struct vti6_net *ip6n = net_generic(t->net, vti6_net_id); int err; dev->rtnl_link_ops = &vti6_link_ops; @@ -925,7 +925,6 @@ static inline int vti6_dev_init_gen(struct net_device *dev) struct ip6_tnl *t = netdev_priv(dev); t->dev = dev; - t->net = dev_net(dev); netdev_hold(dev, &t->dev_tracker, GFP_KERNEL); netdev_lockdep_set_classes(dev); return 0; @@ -958,6 +957,7 @@ static int __net_init vti6_fb_tnl_dev_init(struct net_device *dev) struct net *net = dev_net(dev); struct vti6_net *ip6n = net_generic(net, vti6_net_id); + t->net = net; t->parms.proto = IPPROTO_IPV6; rcu_assign_pointer(ip6n->tnls_wc[0], t); @@ -997,17 +997,20 @@ static void vti6_netlink_parms(struct nlattr *data[], parms->fwmark = nla_get_u32(data[IFLA_VTI_FWMARK]); } -static int vti6_newlink(struct net *src_net, struct net_device *dev, - struct nlattr *tb[], struct nlattr *data[], +static int vti6_newlink(struct net_device *dev, + struct rtnl_newlink_params *params, struct netlink_ext_ack *extack) { - struct net *net = dev_net(dev); + struct nlattr **data = params->data; struct ip6_tnl *nt; + struct net *net; + net = params->link_net ? : dev_net(dev); nt = netdev_priv(dev); vti6_netlink_parms(data, &nt->parms); nt->parms.proto = IPPROTO_IPV6; + nt->net = net; if (vti6_locate(net, &nt->parms, 0)) return -EEXIST; diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 535e9f72514c..e8ade93a0f0e 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -668,7 +668,7 @@ static void reg_vif_setup(struct net_device *dev) dev->flags = IFF_NOARP; dev->netdev_ops = ®_vif_netdev_ops; dev->needs_free_netdev = true; - dev->netns_local = true; + dev->netns_immutable = true; } static struct net_device *ip6mr_reg_vif(struct net *net, struct mr_table *mrt) diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 8699d1a188dc..ecb5c4b8518f 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1680,7 +1680,7 @@ static void ndisc_fill_redirect_hdr_option(struct sk_buff *skb, void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target) { struct net_device *dev = skb->dev; - struct net *net = dev_net(dev); + struct net *net = dev_net_rcu(dev); struct sock *sk = net->ipv6.ndisc_sk; int optlen = 0; struct inet_peer *peer; @@ -1695,8 +1695,8 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target) ops_data_buf[NDISC_OPS_REDIRECT_DATA_SPACE], *ops_data = NULL; bool ret; - if (netif_is_l3_master(skb->dev)) { - dev = dev_get_by_index_rcu(dev_net(skb->dev), IPCB(skb)->iif); + if (netif_is_l3_master(dev)) { + dev = dev_get_by_index_rcu(net, IPCB(skb)->iif); if (!dev) return; } @@ -1734,10 +1734,8 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target) goto release; } - rcu_read_lock(); peer = inet_getpeer_v6(net->ipv6.peers, &ipv6_hdr(skb)->saddr); ret = inet_peer_xrlim_allow(peer, 1*HZ); - rcu_read_unlock(); if (!ret) goto release; diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 4120e67a8ce6..d6bd8f7079bb 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -123,7 +123,8 @@ static void __net_exit nf_ct_frags6_sysctl_unregister(struct net *net) #endif static int nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *skb, - struct sk_buff *prev_tail, struct net_device *dev); + struct sk_buff *prev_tail, struct net_device *dev, + int *refs); static inline u8 ip6_frag_ecn(const struct ipv6hdr *ipv6h) { @@ -167,7 +168,8 @@ static struct frag_queue *fq_find(struct net *net, __be32 id, u32 user, static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb, - const struct frag_hdr *fhdr, int nhoff) + const struct frag_hdr *fhdr, int nhoff, + int *refs) { unsigned int payload_len; struct net_device *dev; @@ -221,7 +223,7 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb, * this case. -DaveM */ pr_debug("end of fragment not rounded to 8 bytes.\n"); - inet_frag_kill(&fq->q); + inet_frag_kill(&fq->q, refs); return -EPROTO; } if (end > fq->q.len) { @@ -287,7 +289,7 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb, unsigned long orefdst = skb->_skb_refdst; skb->_skb_refdst = 0UL; - err = nf_ct_frag6_reasm(fq, skb, prev, dev); + err = nf_ct_frag6_reasm(fq, skb, prev, dev, refs); skb->_skb_refdst = orefdst; /* After queue has assumed skb ownership, only 0 or @@ -301,7 +303,7 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb, return -EINPROGRESS; insert_error: - inet_frag_kill(&fq->q); + inet_frag_kill(&fq->q, refs); err: skb_dst_drop(skb); return -EINVAL; @@ -315,13 +317,14 @@ err: * the last and the first frames arrived and all the bits are here. */ static int nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *skb, - struct sk_buff *prev_tail, struct net_device *dev) + struct sk_buff *prev_tail, struct net_device *dev, + int *refs) { void *reasm_data; int payload_len; u8 ecn; - inet_frag_kill(&fq->q); + inet_frag_kill(&fq->q, refs); ecn = ip_frag_ecn_table[fq->ecn]; if (unlikely(ecn == 0xff)) @@ -372,7 +375,7 @@ static int nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *skb, return 0; err: - inet_frag_kill(&fq->q); + inet_frag_kill(&fq->q, refs); return -EINVAL; } @@ -447,6 +450,7 @@ int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user) struct frag_hdr *fhdr; struct frag_queue *fq; struct ipv6hdr *hdr; + int refs = 0; u8 prevhdr; /* Jumbo payload inhibits frag. header */ @@ -473,23 +477,26 @@ int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user) hdr = ipv6_hdr(skb); fhdr = (struct frag_hdr *)skb_transport_header(skb); + rcu_read_lock(); fq = fq_find(net, fhdr->identification, user, hdr, skb->dev ? skb->dev->ifindex : 0); if (fq == NULL) { + rcu_read_unlock(); pr_debug("Can't find and can't create new queue\n"); return -ENOMEM; } spin_lock_bh(&fq->q.lock); - ret = nf_ct_frag6_queue(fq, skb, fhdr, nhoff); + ret = nf_ct_frag6_queue(fq, skb, fhdr, nhoff, &refs); if (ret == -EPROTO) { skb->transport_header = savethdr; ret = 0; } spin_unlock_bh(&fq->q.lock); - inet_frag_put(&fq->q); + rcu_read_unlock(); + inet_frag_putn(&fq->q, refs); return ret; } EXPORT_SYMBOL_GPL(nf_ct_frag6_gather); diff --git a/net/ipv6/netfilter/nf_socket_ipv6.c b/net/ipv6/netfilter/nf_socket_ipv6.c index a7690ec62325..9ea5ef56cb27 100644 --- a/net/ipv6/netfilter/nf_socket_ipv6.c +++ b/net/ipv6/netfilter/nf_socket_ipv6.c @@ -103,6 +103,10 @@ struct sock *nf_sk_lookup_slow_v6(struct net *net, const struct sk_buff *skb, struct sk_buff *data_skb = NULL; int doff = 0; int thoff = 0, tproto; +#if IS_ENABLED(CONFIG_NF_CONNTRACK) + enum ip_conntrack_info ctinfo; + struct nf_conn const *ct; +#endif tproto = ipv6_find_hdr(skb, &thoff, -1, NULL, NULL); if (tproto < 0) { @@ -136,6 +140,25 @@ struct sock *nf_sk_lookup_slow_v6(struct net *net, const struct sk_buff *skb, return NULL; } +#if IS_ENABLED(CONFIG_NF_CONNTRACK) + /* Do the lookup with the original socket address in + * case this is a reply packet of an established + * SNAT-ted connection. + */ + ct = nf_ct_get(skb, &ctinfo); + if (ct && + ((tproto != IPPROTO_ICMPV6 && + ctinfo == IP_CT_ESTABLISHED_REPLY) || + (tproto == IPPROTO_ICMPV6 && + ctinfo == IP_CT_RELATED_REPLY)) && + (ct->status & IPS_SRC_NAT_DONE)) { + daddr = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.in6; + dport = (tproto == IPPROTO_TCP) ? + ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.tcp.port : + ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.udp.port; + } +#endif + return nf_socket_get_sock_v6(net, data_skb, doff, tproto, saddr, daddr, sport, dport, indev); } diff --git a/net/ipv6/netfilter/nft_fib_ipv6.c b/net/ipv6/netfilter/nft_fib_ipv6.c index c9f1634b3838..7fd9d7b21cd4 100644 --- a/net/ipv6/netfilter/nft_fib_ipv6.c +++ b/net/ipv6/netfilter/nft_fib_ipv6.c @@ -170,6 +170,11 @@ void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs, struct rt6_info *rt; int lookup_flags; + if (nft_fib_can_skip(pkt)) { + nft_fib_store_result(dest, priv, nft_in(pkt)); + return; + } + if (priv->flags & NFTA_FIB_F_IIF) oif = nft_in(pkt); else if (priv->flags & NFTA_FIB_F_OIF) @@ -181,17 +186,13 @@ void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs, return; } - lookup_flags = nft_fib6_flowi_init(&fl6, priv, pkt, oif, iph); - - if (nft_hook(pkt) == NF_INET_PRE_ROUTING || - nft_hook(pkt) == NF_INET_INGRESS) { - if (nft_fib_is_loopback(pkt->skb, nft_in(pkt)) || - nft_fib_v6_skip_icmpv6(pkt->skb, pkt->tprot, iph)) { - nft_fib_store_result(dest, priv, nft_in(pkt)); - return; - } + if (nft_fib_v6_skip_icmpv6(pkt->skb, pkt->tprot, iph)) { + nft_fib_store_result(dest, priv, nft_in(pkt)); + return; } + lookup_flags = nft_fib6_flowi_init(&fl6, priv, pkt, oif, iph); + *dest = 0; rt = (void *)ip6_route_lookup(nft_net(pkt), &fl6, pkt->skb, lookup_flags); diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index 46b8adf6e7f8..84d90dd8b3f0 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -119,9 +119,6 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) return -EINVAL; ipcm6_init_sk(&ipc6, sk); - ipc6.sockc.priority = READ_ONCE(sk->sk_priority); - ipc6.sockc.tsflags = READ_ONCE(sk->sk_tsflags); - ipc6.sockc.mark = READ_ONCE(sk->sk_mark); fl6.flowi6_oif = oif; diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index a45aba090aa4..fda640ebd53f 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -769,19 +769,16 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) hdrincl = inet_test_bit(HDRINCL, sk); + ipcm6_init_sk(&ipc6, sk); + /* * Get and verify the address. */ memset(&fl6, 0, sizeof(fl6)); - fl6.flowi6_mark = READ_ONCE(sk->sk_mark); + fl6.flowi6_mark = ipc6.sockc.mark; fl6.flowi6_uid = sk->sk_uid; - ipcm6_init(&ipc6); - ipc6.sockc.tsflags = READ_ONCE(sk->sk_tsflags); - ipc6.sockc.mark = fl6.flowi6_mark; - ipc6.sockc.priority = READ_ONCE(sk->sk_priority); - if (sin6) { if (addr_len < SIN6_LEN_RFC2133) return -EINVAL; @@ -891,9 +888,6 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) if (hdrincl) fl6.flowi6_flags |= FLOWI_FLAG_KNOWN_NH; - if (ipc6.tclass < 0) - ipc6.tclass = np->tclass; - fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel); dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p); @@ -904,9 +898,6 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) if (ipc6.hlimit < 0) ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst); - if (ipc6.dontfrag < 0) - ipc6.dontfrag = inet6_test_bit(DONTFRAG, sk); - if (msg->msg_flags&MSG_CONFIRM) goto do_confirm; diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index a48be617a8ab..49740898bc13 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -68,7 +68,8 @@ static u8 ip6_frag_ecn(const struct ipv6hdr *ipv6h) static struct inet_frags ip6_frags; static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *skb, - struct sk_buff *prev_tail, struct net_device *dev); + struct sk_buff *prev_tail, struct net_device *dev, + int *refs); static void ip6_frag_expire(struct timer_list *t) { @@ -105,7 +106,7 @@ fq_find(struct net *net, __be32 id, const struct ipv6hdr *hdr, int iif) static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, struct frag_hdr *fhdr, int nhoff, - u32 *prob_offset) + u32 *prob_offset, int *refs) { struct net *net = dev_net(skb_dst(skb)->dev); int offset, end, fragsize; @@ -220,7 +221,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, unsigned long orefdst = skb->_skb_refdst; skb->_skb_refdst = 0UL; - err = ip6_frag_reasm(fq, skb, prev_tail, dev); + err = ip6_frag_reasm(fq, skb, prev_tail, dev, refs); skb->_skb_refdst = orefdst; return err; } @@ -238,7 +239,7 @@ insert_error: __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASM_OVERLAPS); discard_fq: - inet_frag_kill(&fq->q); + inet_frag_kill(&fq->q, refs); __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMFAILS); err: @@ -254,7 +255,8 @@ err: * the last and the first frames arrived and all the bits are here. */ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *skb, - struct sk_buff *prev_tail, struct net_device *dev) + struct sk_buff *prev_tail, struct net_device *dev, + int *refs) { struct net *net = fq->q.fqdir->net; unsigned int nhoff; @@ -262,7 +264,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *skb, int payload_len; u8 ecn; - inet_frag_kill(&fq->q); + inet_frag_kill(&fq->q, refs); ecn = ip_frag_ecn_table[fq->ecn]; if (unlikely(ecn == 0xff)) @@ -303,9 +305,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *skb, skb_postpush_rcsum(skb, skb_network_header(skb), skb_network_header_len(skb)); - rcu_read_lock(); __IP6_INC_STATS(net, __in6_dev_stats_get(dev, skb), IPSTATS_MIB_REASMOKS); - rcu_read_unlock(); fq->q.rb_fragments = RB_ROOT; fq->q.fragments_tail = NULL; fq->q.last_run_head = NULL; @@ -317,10 +317,8 @@ out_oversize: out_oom: net_dbg_ratelimited("ip6_frag_reasm: no memory for reassembly\n"); out_fail: - rcu_read_lock(); __IP6_INC_STATS(net, __in6_dev_stats_get(dev, skb), IPSTATS_MIB_REASMFAILS); - rcu_read_unlock(); - inet_frag_kill(&fq->q); + inet_frag_kill(&fq->q, refs); return -1; } @@ -377,19 +375,21 @@ static int ipv6_frag_rcv(struct sk_buff *skb) } iif = skb->dev ? skb->dev->ifindex : 0; + rcu_read_lock(); fq = fq_find(net, fhdr->identification, hdr, iif); if (fq) { u32 prob_offset = 0; - int ret; + int ret, refs = 0; spin_lock(&fq->q.lock); fq->iif = iif; ret = ip6_frag_queue(fq, skb, fhdr, IP6CB(skb)->nhoff, - &prob_offset); + &prob_offset, &refs); spin_unlock(&fq->q.lock); - inet_frag_put(&fq->q); + rcu_read_unlock(); + inet_frag_putn(&fq->q, refs); if (prob_offset) { __IP6_INC_STATS(net, __in6_dev_get_safely(skb->dev), IPSTATS_MIB_INHDRERRORS); @@ -398,6 +398,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb) } return ret; } + rcu_read_unlock(); __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMFAILS); kfree_skb(skb); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 15ce21afc8c6..c3406a0d45bd 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -5131,7 +5131,8 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]); err = lwtunnel_valid_encap_type_attr(cfg->fc_mp, - cfg->fc_mp_len, extack); + cfg->fc_mp_len, + extack, true); if (err < 0) goto errout; } @@ -5150,7 +5151,8 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, if (tb[RTA_ENCAP_TYPE]) { cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]); - err = lwtunnel_valid_encap_type(cfg->fc_encap_type, extack); + err = lwtunnel_valid_encap_type(cfg->fc_encap_type, + extack, true); if (err < 0) goto errout; } diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 39bd8951bfca..9a0f32acb750 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -51,6 +51,7 @@ #include <net/dsfield.h> #include <net/net_namespace.h> #include <net/netns/generic.h> +#include <net/netdev_lock.h> #include <net/inet_dscp.h> /* @@ -201,8 +202,7 @@ static void ipip6_tunnel_clone_6rd(struct net_device *dev, struct sit_net *sitn) static int ipip6_tunnel_create(struct net_device *dev) { struct ip_tunnel *t = netdev_priv(dev); - struct net *net = dev_net(dev); - struct sit_net *sitn = net_generic(net, sit_net_id); + struct sit_net *sitn = net_generic(t->net, sit_net_id); int err; __dev_addr_set(dev, &t->parms.iph.saddr, 4); @@ -269,6 +269,7 @@ static struct ip_tunnel *ipip6_tunnel_locate(struct net *net, nt = netdev_priv(dev); + nt->net = net; nt->parms = *parms; if (ipip6_tunnel_create(dev) < 0) goto failed_free; @@ -1449,7 +1450,6 @@ static int ipip6_tunnel_init(struct net_device *dev) int err; tunnel->dev = dev; - tunnel->net = dev_net(dev); strcpy(tunnel->parms.name, dev->name); ipip6_tunnel_bind_dev(dev); @@ -1550,19 +1550,23 @@ static bool ipip6_netlink_6rd_parms(struct nlattr *data[], } #endif -static int ipip6_newlink(struct net *src_net, struct net_device *dev, - struct nlattr *tb[], struct nlattr *data[], +static int ipip6_newlink(struct net_device *dev, + struct rtnl_newlink_params *params, struct netlink_ext_ack *extack) { - struct net *net = dev_net(dev); + struct nlattr **data = params->data; + struct nlattr **tb = params->tb; struct ip_tunnel *nt; struct ip_tunnel_encap ipencap; #ifdef CONFIG_IPV6_SIT_6RD struct ip_tunnel_6rd ip6rd; #endif + struct net *net; int err; + net = params->link_net ? : dev_net(dev); nt = netdev_priv(dev); + nt->net = net; if (ip_tunnel_netlink_encap_parms(data, &ipencap)) { err = ip_tunnel_encap_setup(nt, &ipencap); @@ -1856,7 +1860,10 @@ static int __net_init sit_init_net(struct net *net) /* FB netdevice is special: we have one, and only one per netns. * Allowing to move it to another netns is clearly unsafe. */ - sitn->fb_tunnel_dev->netns_local = true; + sitn->fb_tunnel_dev->netns_immutable = true; + + t = netdev_priv(sitn->fb_tunnel_dev); + t->net = net; err = register_netdev(sitn->fb_tunnel_dev); if (err) @@ -1865,8 +1872,6 @@ static int __net_init sit_init_net(struct net *net) ipip6_tunnel_clone_6rd(sitn->fb_tunnel_dev, sitn); ipip6_fb_tunnel_init(sitn->fb_tunnel_dev); - t = netdev_priv(sitn->fb_tunnel_dev); - strcpy(t->parms.name, sitn->fb_tunnel_dev->name); return 0; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 2debdf085a3b..b03c223eda4f 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -376,7 +376,7 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, { const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data; const struct tcphdr *th = (struct tcphdr *)(skb->data+offset); - struct net *net = dev_net(skb->dev); + struct net *net = dev_net_rcu(skb->dev); struct request_sock *fastopen; struct ipv6_pinfo *np; struct tcp_sock *tp; @@ -798,6 +798,8 @@ static void tcp_v6_init_req(struct request_sock *req, ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr; ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr; + ireq->ir_rmt_addr = LOOPBACK4_IPV6; + ireq->ir_loc_addr = LOOPBACK4_IPV6; /* So that link locals have meaning */ if ((!sk_listener->sk_bound_dev_if || l3_slave) && @@ -864,16 +866,16 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 int oif, int rst, u8 tclass, __be32 label, u32 priority, u32 txhash, struct tcp_key *key) { - const struct tcphdr *th = tcp_hdr(skb); - struct tcphdr *t1; - struct sk_buff *buff; - struct flowi6 fl6; - struct net *net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev); - struct sock *ctl_sk = net->ipv6.tcp_sk; + struct net *net = sk ? sock_net(sk) : dev_net_rcu(skb_dst(skb)->dev); unsigned int tot_len = sizeof(struct tcphdr); + struct sock *ctl_sk = net->ipv6.tcp_sk; + const struct tcphdr *th = tcp_hdr(skb); __be32 mrst = 0, *topt; struct dst_entry *dst; - __u32 mark = 0; + struct sk_buff *buff; + struct tcphdr *t1; + struct flowi6 fl6; + u32 mark = 0; if (tsecr) tot_len += TCPOLEN_TSTAMP_ALIGNED; @@ -997,7 +999,7 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 if (!IS_ERR(dst)) { skb_dst_set(buff, dst); ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL, - tclass & ~INET_ECN_MASK, priority); + tclass, priority); TCP_INC_STATS(net, TCP_MIB_OUTSEGS); if (rst) TCP_INC_STATS(net, TCP_MIB_OUTRSTS); @@ -1039,7 +1041,7 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb, if (!sk && !ipv6_unicast_destination(skb)) return; - net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev); + net = sk ? sock_net(sk) : dev_net_rcu(skb_dst(skb)->dev); /* Invalid TCP option size or twice included auth */ if (tcp_parse_auth_options(th, &md5_hash_location, &aoh)) return; @@ -1133,7 +1135,8 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb, trace_tcp_send_reset(sk, skb, reason); tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, 1, - ipv6_get_dsfield(ipv6h), label, priority, txhash, + ipv6_get_dsfield(ipv6h) & ~INET_ECN_MASK, + label, priority, txhash, &key); #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) @@ -1153,11 +1156,16 @@ static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq, tclass, label, priority, txhash, key); } -static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb) +static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb, + enum tcp_tw_status tw_status) { struct inet_timewait_sock *tw = inet_twsk(sk); struct tcp_timewait_sock *tcptw = tcp_twsk(sk); + u8 tclass = tw->tw_tclass; struct tcp_key key = {}; + + if (tw_status == TCP_TW_ACK_OOW) + tclass &= ~INET_ECN_MASK; #ifdef CONFIG_TCP_AO struct tcp_ao_info *ao_info; @@ -1201,7 +1209,7 @@ static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb) tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, tcp_tw_tsval(tcptw), READ_ONCE(tcptw->tw_ts_recent), tw->tw_bound_dev_if, - &key, tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), + &key, tclass, cpu_to_be32(tw->tw_flowlabel), tw->tw_priority, tw->tw_txhash); #ifdef CONFIG_TCP_AO @@ -1277,8 +1285,9 @@ static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, tcp_rsk(req)->rcv_nxt, tcp_synack_window(req) >> inet_rsk(req)->rcv_wscale, tcp_rsk_tsval(tcp_rsk(req)), - READ_ONCE(req->ts_recent), sk->sk_bound_dev_if, - &key, ipv6_get_dsfield(ipv6_hdr(skb)), 0, + req->ts_recent, sk->sk_bound_dev_if, + &key, ipv6_get_dsfield(ipv6_hdr(skb)) & ~INET_ECN_MASK, + 0, READ_ONCE(sk->sk_priority), READ_ONCE(tcp_rsk(req)->txhash)); if (tcp_key_is_ao(&key)) @@ -1451,10 +1460,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * ip6_dst_store(newsk, dst, NULL, NULL); - newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr; newnp->saddr = ireq->ir_v6_loc_addr; - newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr; - newsk->sk_bound_dev_if = ireq->ir_iif; /* Now IPv6 options... @@ -1507,9 +1513,6 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * tcp_initialize_rcv_mss(newsk); - newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6; - newinet->inet_rcv_saddr = LOOPBACK4_IPV6; - #ifdef CONFIG_TCP_MD5SIG l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif); @@ -1735,7 +1738,7 @@ static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr, TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin + skb->len - th->doff*4); TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq); - TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th); + TCP_SKB_CB(skb)->tcp_flags = tcp_flags_ntohs(th); TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr); TCP_SKB_CB(skb)->sacked = 0; TCP_SKB_CB(skb)->has_rxtstamp = @@ -1744,7 +1747,9 @@ static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr, INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb) { + struct net *net = dev_net_rcu(skb->dev); enum skb_drop_reason drop_reason; + enum tcp_tw_status tw_status; int sdif = inet6_sdif(skb); int dif = inet6_iif(skb); const struct tcphdr *th; @@ -1753,7 +1758,6 @@ INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb) bool refcounted; int ret; u32 isn; - struct net *net = dev_net(skb->dev); drop_reason = SKB_DROP_REASON_NOT_SPECIFIED; if (skb->pkt_type != PACKET_HOST) @@ -1832,7 +1836,8 @@ lookup: th = (const struct tcphdr *)skb->data; hdr = ipv6_hdr(skb); tcp_v6_fill_cb(skb, hdr, th); - nsk = tcp_check_req(sk, skb, req, false, &req_stolen); + nsk = tcp_check_req(sk, skb, req, false, &req_stolen, + &drop_reason); } else { drop_reason = SKB_DROP_REASON_SOCKET_FILTER; } @@ -1965,7 +1970,8 @@ do_time_wait: goto csum_error; } - switch (tcp_timewait_state_process(inet_twsk(sk), skb, th, &isn)) { + tw_status = tcp_timewait_state_process(inet_twsk(sk), skb, th, &isn); + switch (tw_status) { case TCP_TW_SYN: { struct sock *sk2; @@ -1990,7 +1996,8 @@ do_time_wait: /* to ACK */ fallthrough; case TCP_TW_ACK: - tcp_v6_timewait_ack(sk, skb); + case TCP_TW_ACK_OOW: + tcp_v6_timewait_ack(sk, skb, tw_status); break; case TCP_TW_RST: tcp_v6_send_reset(sk, skb, SK_RST_REASON_TCP_TIMEWAIT_SOCKET); @@ -2004,7 +2011,7 @@ do_time_wait: void tcp_v6_early_demux(struct sk_buff *skb) { - struct net *net = dev_net(skb->dev); + struct net *net = dev_net_rcu(skb->dev); const struct ipv6hdr *hdr; const struct tcphdr *th; struct sock *sk; @@ -2061,8 +2068,6 @@ const struct inet_connection_sock_af_ops ipv6_specific = { .net_header_len = sizeof(struct ipv6hdr), .setsockopt = ipv6_setsockopt, .getsockopt = ipv6_getsockopt, - .addr2sockaddr = inet6_csk_addr2sockaddr, - .sockaddr_len = sizeof(struct sockaddr_in6), .mtu_reduced = tcp_v6_mtu_reduced, }; @@ -2095,8 +2100,6 @@ static const struct inet_connection_sock_af_ops ipv6_mapped = { .net_header_len = sizeof(struct iphdr), .setsockopt = ipv6_setsockopt, .getsockopt = ipv6_getsockopt, - .addr2sockaddr = inet6_csk_addr2sockaddr, - .sockaddr_len = sizeof(struct sockaddr_in6), .mtu_reduced = tcp_v4_mtu_reduced, }; @@ -2192,10 +2195,10 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) icsk_pending == ICSK_TIME_REO_TIMEOUT || icsk_pending == ICSK_TIME_LOSS_PROBE) { timer_active = 1; - timer_expires = icsk->icsk_timeout; + timer_expires = icsk_timeout(icsk); } else if (icsk_pending == ICSK_TIME_PROBE0) { timer_active = 4; - timer_expires = icsk->icsk_timeout; + timer_expires = icsk_timeout(icsk); } else if (timer_pending(&sp->sk_timer)) { timer_active = 2; timer_expires = sp->sk_timer.expires; diff --git a/net/ipv6/tcpv6_offload.c b/net/ipv6/tcpv6_offload.c index ae2da28f9dfb..d9b11fe41bf0 100644 --- a/net/ipv6/tcpv6_offload.c +++ b/net/ipv6/tcpv6_offload.c @@ -35,7 +35,7 @@ static void tcp6_check_fraglist_gro(struct list_head *head, struct sk_buff *skb, inet6_get_iif_sdif(skb, &iif, &sdif); hdr = skb_gro_network_header(skb); - net = dev_net(skb->dev); + net = dev_net_rcu(skb->dev); sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo, &hdr->saddr, th->source, &hdr->daddr, ntohs(th->dest), diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index c6ea438b5c75..024458ef163c 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -586,7 +586,7 @@ csum_copy_err: SNMP_INC_STATS(mib, UDP_MIB_CSUMERRORS); SNMP_INC_STATS(mib, UDP_MIB_INERRORS); } - kfree_skb(skb); + kfree_skb_reason(skb, SKB_DROP_REASON_UDP_CSUM); /* starting over for a new packet, but check if we need to yield */ cond_resched(); @@ -1494,11 +1494,8 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) int is_udplite = IS_UDPLITE(sk); int (*getfrag)(void *, char *, int, int, int, struct sk_buff *); - ipcm6_init(&ipc6); + ipcm6_init_sk(&ipc6, sk); ipc6.gso_size = READ_ONCE(up->gso_size); - ipc6.sockc.tsflags = READ_ONCE(sk->sk_tsflags); - ipc6.sockc.mark = READ_ONCE(sk->sk_mark); - ipc6.sockc.priority = READ_ONCE(sk->sk_priority); /* destination address check */ if (sin6) { @@ -1704,9 +1701,6 @@ do_udp_sendmsg: security_sk_classify_flow(sk, flowi6_to_flowi_common(fl6)); - if (ipc6.tclass < 0) - ipc6.tclass = np->tclass; - fl6->flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6->flowlabel); dst = ip6_sk_dst_lookup_flow(sk, fl6, final_p, connected); @@ -1752,8 +1746,6 @@ back_from_confirm: WRITE_ONCE(up->pending, AF_INET6); do_append_data: - if (ipc6.dontfrag < 0) - ipc6.dontfrag = inet6_test_bit(DONTFRAG, sk); up->len += ulen; err = ip6_append_data(sk, getfrag, msg, ulen, sizeof(struct udphdr), &ipc6, fl6, dst_rt6_info(dst), diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index b41152dd4246..404212dfc99a 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -117,7 +117,7 @@ static struct sock *udp6_gro_lookup_skb(struct sk_buff *skb, __be16 sport, __be16 dport) { const struct ipv6hdr *iph = skb_gro_network_header(skb); - struct net *net = dev_net(skb->dev); + struct net *net = dev_net_rcu(skb->dev); int iif, sdif; inet6_get_iif_sdif(skb, &iif, &sdif); diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c index e83691073496..cf0b66f4fb29 100644 --- a/net/l2tp/l2tp_eth.c +++ b/net/l2tp/l2tp_eth.c @@ -25,6 +25,7 @@ #include <net/xfrm.h> #include <net/net_namespace.h> #include <net/netns/generic.h> +#include <net/netdev_lock.h> #include <linux/ip.h> #include <linux/ipv6.h> #include <linux/udp.h> diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index f4c1da070826..b98d13584c81 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -547,7 +547,7 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) fl6.flowi6_mark = READ_ONCE(sk->sk_mark); fl6.flowi6_uid = sk->sk_uid; - ipcm6_init(&ipc6); + ipcm6_init_sk(&ipc6, sk); if (lsa) { if (addr_len < SIN6_LEN_RFC2133) @@ -634,9 +634,6 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6)); - if (ipc6.tclass < 0) - ipc6.tclass = np->tclass; - fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel); dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p); @@ -648,9 +645,6 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) if (ipc6.hlimit < 0) ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst); - if (ipc6.dontfrag < 0) - ipc6.dontfrag = inet6_test_bit(DONTFRAG, sk); - if (msg->msg_flags & MSG_CONFIRM) goto do_confirm; diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index 53baf2dd5d5d..fc5c2fd8f34c 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -806,6 +806,7 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr, po->chan.private = sk; po->chan.ops = &pppol2tp_chan_ops; po->chan.mtu = pppol2tp_tunnel_mtu(tunnel); + po->chan.direct_xmit = true; error = ppp_register_net_channel(sock_net(sk), &po->chan); if (error) { diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c index f3fbe5a4395e..aeb99d102c6e 100644 --- a/net/mac80211/agg-rx.c +++ b/net/mac80211/agg-rx.c @@ -9,7 +9,7 @@ * Copyright 2007, Michael Wu <flamingice@sourmilk.net> * Copyright 2007-2010, Intel Corporation * Copyright(c) 2015-2017 Intel Deutschland GmbH - * Copyright (C) 2018-2023 Intel Corporation + * Copyright (C) 2018-2024 Intel Corporation */ /** @@ -206,17 +206,19 @@ u8 ieee80211_retrieve_addba_ext_data(struct sta_info *sta, elems = ieee802_11_parse_elems(elem_data, elem_len, true, NULL); - if (elems && !elems->parse_error && elems->addba_ext_ie) { - data = elems->addba_ext_ie->data; + if (!elems || elems->parse_error || !elems->addba_ext_ie) + goto free; - if (!sta->sta.deflink.eht_cap.has_eht || !buf_size) - goto free; + data = elems->addba_ext_ie->data; + if (buf_size && + (sta->sta.valid_links || sta->sta.deflink.eht_cap.has_eht)) { buf_size_1k = u8_get_bits(elems->addba_ext_ie->data, IEEE80211_ADDBA_EXT_BUF_SIZE_MASK); *buf_size |= (u16)buf_size_1k << IEEE80211_ADDBA_EXT_BUF_SIZE_SHIFT; } + free: kfree(elems); @@ -258,7 +260,7 @@ static void ieee80211_send_addba_resp(struct sta_info *sta, u8 *da, u16 tid, mgmt->u.action.u.addba_resp.timeout = cpu_to_le16(timeout); mgmt->u.action.u.addba_resp.status = cpu_to_le16(status); - if (sta->sta.deflink.he_cap.has_he) + if (sta->sta.valid_links || sta->sta.deflink.he_cap.has_he) ieee80211_add_addbaext(skb, req_addba_ext_data, buf_size); ieee80211_tx_skb(sdata, skb); @@ -293,7 +295,8 @@ void __ieee80211_start_rx_ba_session(struct sta_info *sta, goto end; } - if (!sta->sta.deflink.ht_cap.ht_supported && + if (!sta->sta.valid_links && + !sta->sta.deflink.ht_cap.ht_supported && !sta->sta.deflink.he_cap.has_he) { ht_dbg(sta->sdata, "STA %pM erroneously requests BA session on tid %d w/o HT\n", @@ -309,7 +312,7 @@ void __ieee80211_start_rx_ba_session(struct sta_info *sta, goto end; } - if (sta->sta.deflink.eht_cap.has_eht) + if (sta->sta.valid_links || sta->sta.deflink.eht_cap.has_eht) max_buf_size = IEEE80211_MAX_AMPDU_BUF_EHT; else if (sta->sta.deflink.he_cap.has_he) max_buf_size = IEEE80211_MAX_AMPDU_BUF_HE; @@ -321,7 +324,8 @@ void __ieee80211_start_rx_ba_session(struct sta_info *sta, * and if buffer size does not exceeds max value */ /* XXX: check own ht delayed BA capability?? */ if (((ba_policy != 1) && - (!(sta->sta.deflink.ht_cap.cap & IEEE80211_HT_CAP_DELAY_BA))) || + (sta->sta.valid_links || + !(sta->sta.deflink.ht_cap.cap & IEEE80211_HT_CAP_DELAY_BA))) || (buf_size > max_buf_size)) { status = WLAN_STATUS_INVALID_QOS_PARAM; ht_dbg_ratelimited(sta->sdata, diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c index 61f2cac37728..63a5e48291ac 100644 --- a/net/mac80211/agg-tx.c +++ b/net/mac80211/agg-tx.c @@ -9,7 +9,7 @@ * Copyright 2007, Michael Wu <flamingice@sourmilk.net> * Copyright 2007-2010, Intel Corporation * Copyright(c) 2015-2017 Intel Deutschland GmbH - * Copyright (C) 2018 - 2023 Intel Corporation + * Copyright (C) 2018 - 2024 Intel Corporation */ #include <linux/ieee80211.h> @@ -464,7 +464,9 @@ static void ieee80211_send_addba_with_timeout(struct sta_info *sta, sta->ampdu_mlme.addba_req_num[tid]++; spin_unlock_bh(&sta->lock); - if (sta->sta.deflink.eht_cap.has_eht) { + if (sta->sta.valid_links || + sta->sta.deflink.eht_cap.has_eht || + ieee80211_hw_check(&local->hw, STRICT)) { buf_size = local->hw.max_tx_aggregation_subframes; } else if (sta->sta.deflink.he_cap.has_he) { buf_size = min_t(u16, local->hw.max_tx_aggregation_subframes, @@ -608,7 +610,8 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid, "Requested to start BA session on reserved tid=%d", tid)) return -EINVAL; - if (!pubsta->deflink.ht_cap.ht_supported && + if (!pubsta->valid_links && + !pubsta->deflink.ht_cap.ht_supported && !pubsta->deflink.vht_cap.vht_supported && !pubsta->deflink.he_cap.has_he && !pubsta->deflink.eht_cap.has_eht) diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 9351c64608a9..9f683f838431 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -5,7 +5,7 @@ * Copyright 2006-2010 Johannes Berg <johannes@sipsolutions.net> * Copyright 2013-2015 Intel Mobile Communications GmbH * Copyright (C) 2015-2017 Intel Deutschland GmbH - * Copyright (C) 2018-2024 Intel Corporation + * Copyright (C) 2018-2025 Intel Corporation */ #include <linux/ieee80211.h> @@ -89,15 +89,14 @@ static int ieee80211_set_mon_options(struct ieee80211_sub_if_data *sdata, /* check flags first */ if (params->flags && ieee80211_sdata_running(sdata)) { - u32 mask = MONITOR_FLAG_COOK_FRAMES | MONITOR_FLAG_ACTIVE; + u32 mask = MONITOR_FLAG_ACTIVE; /* - * Prohibit MONITOR_FLAG_COOK_FRAMES and - * MONITOR_FLAG_ACTIVE to be changed while the - * interface is up. + * Prohibit MONITOR_FLAG_ACTIVE to be changed + * while the interface is up. * Else we would need to add a lot of cruft * to update everything: - * cooked_mntrs, monitor and all fif_* counters + * monitor and all fif_* counters * reconfigure hardware */ if ((params->flags & mask) != (sdata->u.mntr.flags & mask)) @@ -920,7 +919,7 @@ static int ieee80211_set_monitor_channel(struct wiphy *wiphy, sdata = IEEE80211_DEV_TO_SUB_IF(dev); if (!ieee80211_hw_check(&local->hw, NO_VIRTUAL_MONITOR)) { if (cfg80211_chandef_identical(&local->monitor_chanreq.oper, - &chanreq.oper)) + &chanreq.oper)) return 0; sdata = wiphy_dereference(wiphy, local->monitor_sdata); @@ -929,7 +928,7 @@ static int ieee80211_set_monitor_channel(struct wiphy *wiphy, } if (rcu_access_pointer(sdata->deflink.conf->chanctx_conf) && - cfg80211_chandef_identical(&sdata->vif.bss_conf.chanreq.oper, + cfg80211_chandef_identical(&sdata->vif.bss_conf.chanreq.oper, &chanreq.oper)) return 0; @@ -1908,12 +1907,12 @@ static int sta_link_apply_parameters(struct ieee80211_local *local, } if (params->supported_rates && - params->supported_rates_len) { - ieee80211_parse_bitrates(link->conf->chanreq.oper.width, - sband, params->supported_rates, - params->supported_rates_len, - &link_sta->pub->supp_rates[sband->band]); - } + params->supported_rates_len && + !ieee80211_parse_bitrates(link->conf->chanreq.oper.width, + sband, params->supported_rates, + params->supported_rates_len, + &link_sta->pub->supp_rates[sband->band])) + return -EINVAL; if (params->ht_capa) ieee80211_ht_cap_ie_to_sta_ht_cap(sdata, sband, @@ -4371,9 +4370,8 @@ static int ieee80211_cfg_get_channel(struct wiphy *wiphy, if (chanctx_conf) { *chandef = link->conf->chanreq.oper; ret = 0; - } else if (!ieee80211_hw_check(&local->hw, NO_VIRTUAL_MONITOR) && - local->open_count > 0 && - local->open_count == local->monitors && + } else if (local->open_count > 0 && + local->open_count == local->virt_monitors && sdata->vif.type == NL80211_IFTYPE_MONITOR) { *chandef = local->monitor_chanreq.oper; ret = 0; @@ -5187,14 +5185,21 @@ ieee80211_set_ttlm(struct wiphy *wiphy, struct net_device *dev, static int ieee80211_assoc_ml_reconf(struct wiphy *wiphy, struct net_device *dev, - struct cfg80211_assoc_link *add_links, - u16 rem_links) + struct cfg80211_ml_reconf_req *req) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); lockdep_assert_wiphy(sdata->local->hw.wiphy); - return ieee80211_mgd_assoc_ml_reconf(sdata, add_links, rem_links); + return ieee80211_mgd_assoc_ml_reconf(sdata, req); +} + +static int +ieee80211_set_epcs(struct wiphy *wiphy, struct net_device *dev, bool enable) +{ + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + + return ieee80211_mgd_set_epcs(sdata, enable); } const struct cfg80211_ops mac80211_config_ops = { @@ -5312,4 +5317,5 @@ const struct cfg80211_ops mac80211_config_ops = { .set_ttlm = ieee80211_set_ttlm, .get_radio_mask = ieee80211_get_radio_mask, .assoc_ml_reconf = ieee80211_assoc_ml_reconf, + .set_epcs = ieee80211_set_epcs, }; diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c index dc28f2b0957a..c3bfac58151f 100644 --- a/net/mac80211/chan.c +++ b/net/mac80211/chan.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * mac80211 - channel management - * Copyright 2020 - 2024 Intel Corporation + * Copyright 2020 - 2025 Intel Corporation */ #include <linux/nl80211.h> @@ -2178,3 +2178,21 @@ void ieee80211_iter_chan_contexts_atomic( rcu_read_unlock(); } EXPORT_SYMBOL_GPL(ieee80211_iter_chan_contexts_atomic); + +void ieee80211_iter_chan_contexts_mtx( + struct ieee80211_hw *hw, + void (*iter)(struct ieee80211_hw *hw, + struct ieee80211_chanctx_conf *chanctx_conf, + void *data), + void *iter_data) +{ + struct ieee80211_local *local = hw_to_local(hw); + struct ieee80211_chanctx *ctx; + + lockdep_assert_wiphy(hw->wiphy); + + list_for_each_entry(ctx, &local->chanctx_list, list) + if (ctx->driver_present) + iter(hw, &ctx->conf, iter_data); +} +EXPORT_SYMBOL_GPL(ieee80211_iter_chan_contexts_mtx); diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c index bf0a2902d93c..69e03630f64c 100644 --- a/net/mac80211/debugfs.c +++ b/net/mac80211/debugfs.c @@ -492,6 +492,7 @@ static const char *hw_flag_names[] = { FLAG(DISALLOW_PUNCTURING), FLAG(DISALLOW_PUNCTURING_5GHZ), FLAG(HANDLES_QUIET_CSA), + FLAG(STRICT), #undef FLAG }; @@ -524,6 +525,46 @@ static ssize_t hwflags_read(struct file *file, char __user *user_buf, return rv; } +static ssize_t hwflags_write(struct file *file, const char __user *user_buf, + size_t count, loff_t *ppos) +{ + struct ieee80211_local *local = file->private_data; + char buf[100]; + int val; + + if (count >= sizeof(buf)) + return -EINVAL; + + if (copy_from_user(buf, user_buf, count)) + return -EFAULT; + + if (count && buf[count - 1] == '\n') + buf[count - 1] = '\0'; + else + buf[count] = '\0'; + + if (sscanf(buf, "strict=%d", &val) == 1) { + switch (val) { + case 0: + ieee80211_hw_set(&local->hw, STRICT); + return count; + case 1: + __clear_bit(IEEE80211_HW_STRICT, local->hw.flags); + return count; + default: + return -EINVAL; + } + } + + return -EINVAL; +} + +static const struct file_operations hwflags_ops = { + .open = simple_open, + .read = hwflags_read, + .write = hwflags_write, +}; + static ssize_t misc_read(struct file *file, char __user *user_buf, size_t count, loff_t *ppos) { @@ -574,7 +615,6 @@ static ssize_t queues_read(struct file *file, char __user *user_buf, return simple_read_from_buffer(user_buf, count, ppos, buf, res); } -DEBUGFS_READONLY_FILE_OPS(hwflags); DEBUGFS_READONLY_FILE_OPS(queues); DEBUGFS_READONLY_FILE_OPS(misc); @@ -651,7 +691,7 @@ void debugfs_hw_add(struct ieee80211_local *local) #ifdef CONFIG_PM DEBUGFS_ADD_MODE(reset, 0200); #endif - DEBUGFS_ADD(hwflags); + DEBUGFS_ADD_MODE(hwflags, 0600); DEBUGFS_ADD(user_power); DEBUGFS_ADD(power); DEBUGFS_ADD(hw_conf); diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c index a67a9d316008..a8948f4d983e 100644 --- a/net/mac80211/debugfs_sta.c +++ b/net/mac80211/debugfs_sta.c @@ -457,11 +457,12 @@ static ssize_t link_sta_addr_read(struct file *file, char __user *userbuf, size_t count, loff_t *ppos) { struct link_sta_info *link_sta = file->private_data; - u8 mac[3 * ETH_ALEN + 1]; + u8 mac[MAC_ADDR_STR_LEN + 2]; snprintf(mac, sizeof(mac), "%pM\n", link_sta->pub->addr); - return simple_read_from_buffer(userbuf, count, ppos, mac, 3 * ETH_ALEN); + return simple_read_from_buffer(userbuf, count, ppos, mac, + MAC_ADDR_STR_LEN + 1); } LINK_STA_OPS(addr); @@ -1240,7 +1241,7 @@ void ieee80211_sta_debugfs_add(struct sta_info *sta) struct ieee80211_local *local = sta->local; struct ieee80211_sub_if_data *sdata = sta->sdata; struct dentry *stations_dir = sta->sdata->debugfs.subdir_stations; - u8 mac[3*ETH_ALEN]; + u8 mac[MAC_ADDR_STR_LEN + 1]; if (!stations_dir) return; diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index 5acecc7bd4a9..307587c8a003 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -2,7 +2,7 @@ /* * Portions of this file * Copyright(c) 2016 Intel Deutschland GmbH -* Copyright (C) 2018-2019, 2021-2024 Intel Corporation +* Copyright (C) 2018-2019, 2021-2025 Intel Corporation */ #ifndef __MAC80211_DRIVER_OPS @@ -955,6 +955,7 @@ static inline void drv_mgd_complete_tx(struct ieee80211_local *local, return; WARN_ON_ONCE(sdata->vif.type != NL80211_IFTYPE_STATION); + info->link_id = info->link_id < 0 ? 0 : info->link_id; trace_drv_mgd_complete_tx(local, sdata, info->duration, info->subtype, info->success); if (local->ops->mgd_complete_tx) diff --git a/net/mac80211/drop.h b/net/mac80211/drop.h index 59e3ec4dc960..eb9ab310f91c 100644 --- a/net/mac80211/drop.h +++ b/net/mac80211/drop.h @@ -11,12 +11,6 @@ typedef unsigned int __bitwise ieee80211_rx_result; -#define MAC80211_DROP_REASONS_MONITOR(R) \ - R(RX_DROP_M_UNEXPECTED_4ADDR_FRAME) \ - R(RX_DROP_M_BAD_BCN_KEYIDX) \ - R(RX_DROP_M_BAD_MGMT_KEYIDX) \ -/* this line for the trailing \ - add before this */ - #define MAC80211_DROP_REASONS_UNUSABLE(R) \ /* 0x00 == ___RX_DROP_UNUSABLE */ \ R(RX_DROP_U_MIC_FAIL) \ @@ -66,6 +60,10 @@ typedef unsigned int __bitwise ieee80211_rx_result; R(RX_DROP_U_UNEXPECTED_STA_4ADDR) \ R(RX_DROP_U_UNEXPECTED_VLAN_MCAST) \ R(RX_DROP_U_NOT_PORT_CONTROL) \ + R(RX_DROP_U_UNEXPECTED_4ADDR_FRAME) \ + R(RX_DROP_U_BAD_BCN_KEYIDX) \ + /* 0x30 */ \ + R(RX_DROP_U_BAD_MGMT_KEYIDX) \ R(RX_DROP_U_UNKNOWN_ACTION_REJECTED) \ /* this line for the trailing \ - add before this */ @@ -78,10 +76,6 @@ enum ___mac80211_drop_reason { ___RX_QUEUED = SKB_NOT_DROPPED_YET, #define ENUM(x) ___ ## x, - ___RX_DROP_MONITOR = SKB_DROP_REASON_SUBSYS_MAC80211_MONITOR << - SKB_DROP_REASON_SUBSYS_SHIFT, - MAC80211_DROP_REASONS_MONITOR(ENUM) - ___RX_DROP_UNUSABLE = SKB_DROP_REASON_SUBSYS_MAC80211_UNUSABLE << SKB_DROP_REASON_SUBSYS_SHIFT, MAC80211_DROP_REASONS_UNUSABLE(ENUM) @@ -89,11 +83,10 @@ enum ___mac80211_drop_reason { }; enum mac80211_drop_reason { - RX_CONTINUE = (__force ieee80211_rx_result)___RX_CONTINUE, - RX_QUEUED = (__force ieee80211_rx_result)___RX_QUEUED, - RX_DROP_MONITOR = (__force ieee80211_rx_result)___RX_DROP_MONITOR, + RX_CONTINUE = (__force ieee80211_rx_result)___RX_CONTINUE, + RX_QUEUED = (__force ieee80211_rx_result)___RX_QUEUED, + RX_DROP = (__force ieee80211_rx_result)___RX_DROP_UNUSABLE, #define DEF(x) x = (__force ieee80211_rx_result)___ ## x, - MAC80211_DROP_REASONS_MONITOR(DEF) MAC80211_DROP_REASONS_UNUSABLE(DEF) #undef DEF }; diff --git a/net/mac80211/ethtool.c b/net/mac80211/ethtool.c index 42f7ee142ce3..0397755a3bd1 100644 --- a/net/mac80211/ethtool.c +++ b/net/mac80211/ethtool.c @@ -158,7 +158,7 @@ do_survey: if (chanctx_conf) channel = chanctx_conf->def.chan; else if (local->open_count > 0 && - local->open_count == local->monitors && + local->open_count == local->virt_monitors && sdata->vif.type == NL80211_IFTYPE_MONITOR) channel = local->monitor_chanreq.oper.chan; else diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index e7dc3f0cfc9a..fb05f3cd37ec 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -5,7 +5,7 @@ * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz> * Copyright 2007-2010 Johannes Berg <johannes@sipsolutions.net> * Copyright 2013-2015 Intel Mobile Communications GmbH - * Copyright (C) 2018-2024 Intel Corporation + * Copyright (C) 2018-2025 Intel Corporation */ #ifndef IEEE80211_I_H @@ -200,7 +200,6 @@ enum ieee80211_packet_rx_flags { /** * enum ieee80211_rx_flags - RX data flags * - * @IEEE80211_RX_CMNTR: received on cooked monitor already * @IEEE80211_RX_BEACON_REPORTED: This frame was already reported * to cfg80211_report_obss_beacon(). * @@ -208,8 +207,7 @@ enum ieee80211_packet_rx_flags { * for a single frame. */ enum ieee80211_rx_flags { - IEEE80211_RX_CMNTR = BIT(0), - IEEE80211_RX_BEACON_REPORTED = BIT(1), + IEEE80211_RX_BEACON_REPORTED = BIT(0), }; struct ieee80211_rx_data { @@ -446,8 +444,6 @@ struct ieee80211_mgd_assoc_data { const u8 *supp_rates; u8 supp_rates_len; - unsigned long userspace_selectors[BITS_TO_LONGS(128)]; - unsigned long timeout; int tries; @@ -462,7 +458,9 @@ struct ieee80211_mgd_assoc_data { bool s1g; bool spp_amsdu; - unsigned int assoc_link_id; + s8 assoc_link_id; + + __le16 ext_mld_capa_ops; u8 fils_nonces[2 * FILS_NONCE_LEN]; u8 fils_kek[FILS_MAX_KEK_LEN]; @@ -524,6 +522,8 @@ struct ieee80211_if_managed { struct ieee80211_mgd_auth_data *auth_data; struct ieee80211_mgd_assoc_data *assoc_data; + unsigned long userspace_selectors[BITS_TO_LONGS(128)]; + bool powersave; /* powersave requested for this iface */ bool broken_ap; /* AP is broken -- turn off powersave */ @@ -615,6 +615,12 @@ struct ieee80211_if_managed { u16 added_links; u8 dialog_token; } reconf; + + /* Support for epcs */ + struct { + bool enabled; + u8 dialog_token; + } epcs; }; struct ieee80211_if_ibss { @@ -1380,7 +1386,7 @@ struct ieee80211_local { spinlock_t queue_stop_reason_lock; int open_count; - int monitors, cooked_mntrs, tx_mntrs; + int monitors, virt_monitors, tx_mntrs; /* number of interfaces with corresponding FIF_ flags */ int fif_fcsfail, fif_plcpfail, fif_control, fif_other_bss, fif_pspoll, fif_probe_req; @@ -1492,7 +1498,7 @@ struct ieee80211_local { /* see iface.c */ struct list_head interfaces; - struct list_head mon_list; /* only that are IFF_UP && !cooked */ + struct list_head mon_list; /* only that are IFF_UP */ struct mutex iflist_mtx; /* Scanning and BSS list */ @@ -2090,8 +2096,7 @@ struct sk_buff * ieee80211_build_data_template(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, u32 info_flags); void ieee80211_tx_monitor(struct ieee80211_local *local, struct sk_buff *skb, - int retry_count, bool send_to_cooked, - struct ieee80211_tx_status *status); + int retry_count, struct ieee80211_tx_status *status); void ieee80211_check_fast_xmit(struct sta_info *sta); void ieee80211_check_fast_xmit_all(struct ieee80211_local *local); @@ -2774,14 +2779,19 @@ void ieee80211_process_neg_ttlm_res(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt, size_t len); int ieee80211_req_neg_ttlm(struct ieee80211_sub_if_data *sdata, struct cfg80211_ttlm_params *params); +void ieee80211_process_ttlm_teardown(struct ieee80211_sub_if_data *sdata); void ieee80211_check_wbrf_support(struct ieee80211_local *local); void ieee80211_add_wbrf(struct ieee80211_local *local, struct cfg80211_chan_def *chandef); void ieee80211_remove_wbrf(struct ieee80211_local *local, struct cfg80211_chan_def *chandef); +int ieee80211_mgd_set_epcs(struct ieee80211_sub_if_data *sdata, bool enable); +void ieee80211_process_epcs_ena_resp(struct ieee80211_sub_if_data *sdata, + struct ieee80211_mgmt *mgmt, size_t len); +void ieee80211_process_epcs_teardown(struct ieee80211_sub_if_data *sdata, + struct ieee80211_mgmt *mgmt, size_t len); int ieee80211_mgd_assoc_ml_reconf(struct ieee80211_sub_if_data *sdata, - struct cfg80211_assoc_link *add_links, - u16 rem_links); + struct cfg80211_ml_reconf_req *req); void ieee80211_process_ml_reconf_resp(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt, size_t len); @@ -2795,6 +2805,13 @@ int ieee80211_calc_chandef_subchan_offset(const struct cfg80211_chan_def *ap, void ieee80211_rearrange_tpe_psd(struct ieee80211_parsed_tpe_psd *psd, const struct cfg80211_chan_def *ap, const struct cfg80211_chan_def *used); +struct ieee802_11_elems * +ieee80211_determine_chan_mode(struct ieee80211_sub_if_data *sdata, + struct ieee80211_conn_settings *conn, + struct cfg80211_bss *cbss, int link_id, + struct ieee80211_chan_req *chanreq, + struct cfg80211_chan_def *ap_chandef, + unsigned long *userspace_selectors); #else #define EXPORT_SYMBOL_IF_MAC80211_KUNIT(sym) #define VISIBLE_IF_MAC80211_KUNIT static diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 738de269e13f..b0423046028c 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -8,7 +8,7 @@ * Copyright 2008, Johannes Berg <johannes@sipsolutions.net> * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright (c) 2016 Intel Deutschland GmbH - * Copyright (C) 2018-2024 Intel Corporation + * Copyright (C) 2018-2025 Intel Corporation */ #include <linux/slab.h> #include <linux/kernel.h> @@ -483,8 +483,6 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_do ieee80211_ibss_stop(sdata); break; case NL80211_IFTYPE_MONITOR: - if (sdata->u.mntr.flags & MONITOR_FLAG_COOK_FRAMES) - break; list_del_rcu(&sdata->u.mntr.list); break; default: @@ -584,18 +582,19 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_do /* no need to tell driver */ break; case NL80211_IFTYPE_MONITOR: - if (sdata->u.mntr.flags & MONITOR_FLAG_COOK_FRAMES) { - local->cooked_mntrs--; - break; - } - local->monitors--; - if (local->monitors == 0) { - local->hw.conf.flags &= ~IEEE80211_CONF_MONITOR; - hw_reconf_flags |= IEEE80211_CONF_CHANGE_MONITOR; - } - ieee80211_adjust_monitor_flags(sdata, -1); + if (!(sdata->u.mntr.flags & MONITOR_FLAG_ACTIVE) && + !ieee80211_hw_check(&local->hw, NO_VIRTUAL_MONITOR)) { + + local->virt_monitors--; + if (local->virt_monitors == 0) { + local->hw.conf.flags &= ~IEEE80211_CONF_MONITOR; + hw_reconf_flags |= IEEE80211_CONF_CHANGE_MONITOR; + } + + ieee80211_adjust_monitor_flags(sdata, -1); + } break; case NL80211_IFTYPE_NAN: /* clean all the functions */ @@ -686,7 +685,7 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_do case NL80211_IFTYPE_AP_VLAN: break; case NL80211_IFTYPE_MONITOR: - if (local->monitors == 0) + if (local->virt_monitors == 0) ieee80211_del_virtual_monitor(local); ieee80211_recalc_idle(local); @@ -723,7 +722,7 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_do ieee80211_configure_filter(local); ieee80211_hw_config(local, hw_reconf_flags); - if (local->monitors == local->open_count) + if (local->virt_monitors == local->open_count) ieee80211_add_virtual_monitor(local); } @@ -807,6 +806,9 @@ static void ieee80211_set_multicast_list(struct net_device *dev) */ static void ieee80211_teardown_sdata(struct ieee80211_sub_if_data *sdata) { + if (WARN_ON(!list_empty(&sdata->work.entry))) + wiphy_work_cancel(sdata->local->hw.wiphy, &sdata->work); + /* free extra data */ ieee80211_free_keys(sdata, false); @@ -979,7 +981,7 @@ static bool ieee80211_set_sdata_offload_flags(struct ieee80211_sub_if_data *sdat local->hw.wiphy->frag_threshold != (u32)-1) flags &= ~IEEE80211_OFFLOAD_ENCAP_ENABLED; - if (local->monitors) + if (local->virt_monitors) flags &= ~IEEE80211_OFFLOAD_ENCAP_ENABLED; } else { flags &= ~IEEE80211_OFFLOAD_ENCAP_ENABLED; @@ -989,7 +991,7 @@ static bool ieee80211_set_sdata_offload_flags(struct ieee80211_sub_if_data *sdat ieee80211_iftype_supports_hdr_offload(sdata->vif.type)) { flags |= IEEE80211_OFFLOAD_DECAP_ENABLED; - if (local->monitors && + if (local->virt_monitors && !ieee80211_hw_check(&local->hw, SUPPORTS_CONC_MON_RX_DECAP)) flags &= ~IEEE80211_OFFLOAD_DECAP_ENABLED; } else { @@ -1327,28 +1329,27 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up) } break; case NL80211_IFTYPE_MONITOR: - if (sdata->u.mntr.flags & MONITOR_FLAG_COOK_FRAMES) { - local->cooked_mntrs++; - break; - } - if ((sdata->u.mntr.flags & MONITOR_FLAG_ACTIVE) || ieee80211_hw_check(&local->hw, NO_VIRTUAL_MONITOR)) { res = drv_add_interface(local, sdata); if (res) goto err_stop; - } else if (local->monitors == 0 && local->open_count == 0) { - res = ieee80211_add_virtual_monitor(local); - if (res) - goto err_stop; + } else { + if (local->virt_monitors == 0 && local->open_count == 0) { + res = ieee80211_add_virtual_monitor(local); + if (res) + goto err_stop; + } + local->virt_monitors++; + + /* must be before the call to ieee80211_configure_filter */ + if (local->virt_monitors == 1) { + local->hw.conf.flags |= IEEE80211_CONF_MONITOR; + hw_reconf_flags |= IEEE80211_CONF_CHANGE_MONITOR; + } } - /* must be before the call to ieee80211_configure_filter */ local->monitors++; - if (local->monitors == 1) { - local->hw.conf.flags |= IEEE80211_CONF_MONITOR; - hw_reconf_flags |= IEEE80211_CONF_CHANGE_MONITOR; - } ieee80211_adjust_monitor_flags(sdata, 1); ieee80211_configure_filter(local); @@ -1424,8 +1425,6 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up) rcu_assign_pointer(local->p2p_sdata, sdata); break; case NL80211_IFTYPE_MONITOR: - if (sdata->u.mntr.flags & MONITOR_FLAG_COOK_FRAMES) - break; list_add_tail_rcu(&sdata->u.mntr.list, &local->mon_list); break; default: @@ -1561,10 +1560,21 @@ static void ieee80211_iface_process_skb(struct ieee80211_local *local, ieee80211_process_neg_ttlm_res(sdata, mgmt, skb->len); break; + case WLAN_PROTECTED_EHT_ACTION_TTLM_TEARDOWN: + ieee80211_process_ttlm_teardown(sdata); + break; case WLAN_PROTECTED_EHT_ACTION_LINK_RECONFIG_RESP: ieee80211_process_ml_reconf_resp(sdata, mgmt, skb->len); break; + case WLAN_PROTECTED_EHT_ACTION_EPCS_ENABLE_RESP: + ieee80211_process_epcs_ena_resp(sdata, mgmt, + skb->len); + break; + case WLAN_PROTECTED_EHT_ACTION_EPCS_ENABLE_TEARDOWN: + ieee80211_process_epcs_teardown(sdata, mgmt, + skb->len); + break; default: break; } diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 53e5aee46885..741e6c7edcb7 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -1744,18 +1744,7 @@ void ieee80211_free_hw(struct ieee80211_hw *hw) wiphy_free(local->hw.wiphy); } EXPORT_SYMBOL(ieee80211_free_hw); - -static const char * const drop_reasons_monitor[] = { -#define V(x) #x, - [0] = "RX_DROP_MONITOR", - MAC80211_DROP_REASONS_MONITOR(V) -}; - -static struct drop_reason_list drop_reason_list_monitor = { - .reasons = drop_reasons_monitor, - .n_reasons = ARRAY_SIZE(drop_reasons_monitor), -}; - +#define V(x) #x, static const char * const drop_reasons_unusable[] = { [0] = "RX_DROP_UNUSABLE", MAC80211_DROP_REASONS_UNUSABLE(V) @@ -1784,8 +1773,6 @@ static int __init ieee80211_init(void) if (ret) goto err_netdev; - drop_reasons_register_subsys(SKB_DROP_REASON_SUBSYS_MAC80211_MONITOR, - &drop_reason_list_monitor); drop_reasons_register_subsys(SKB_DROP_REASON_SUBSYS_MAC80211_UNUSABLE, &drop_reason_list_unusable); @@ -1804,7 +1791,6 @@ static void __exit ieee80211_exit(void) ieee80211_iface_exit(); - drop_reasons_unregister_subsys(SKB_DROP_REASON_SUBSYS_MAC80211_MONITOR); drop_reasons_unregister_subsys(SKB_DROP_REASON_SUBSYS_MAC80211_UNUSABLE); rcu_barrier(); diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c index 4e9546e998b6..c94a9c7ca960 100644 --- a/net/mac80211/mesh_hwmp.c +++ b/net/mac80211/mesh_hwmp.c @@ -367,6 +367,12 @@ u32 airtime_link_metric_get(struct ieee80211_local *local, return (u32)result; } +/* Check that the first metric is at least 10% better than the second one */ +static bool is_metric_better(u32 x, u32 y) +{ + return (x < y) && (x < (y - x / 10)); +} + /** * hwmp_route_info_get - Update routing info to originator and transmitter * @@ -458,8 +464,8 @@ static u32 hwmp_route_info_get(struct ieee80211_sub_if_data *sdata, (mpath->sn == orig_sn && (rcu_access_pointer(mpath->next_hop) != sta ? - mult_frac(new_metric, 10, 9) : - new_metric) >= mpath->metric)) { + !is_metric_better(new_metric, mpath->metric) : + new_metric >= mpath->metric))) { process = false; fresh_info = false; } @@ -533,8 +539,8 @@ static u32 hwmp_route_info_get(struct ieee80211_sub_if_data *sdata, if ((mpath->flags & MESH_PATH_FIXED) || ((mpath->flags & MESH_PATH_ACTIVE) && ((rcu_access_pointer(mpath->next_hop) != sta ? - mult_frac(last_hop_metric, 10, 9) : - last_hop_metric) > mpath->metric))) + !is_metric_better(last_hop_metric, mpath->metric) : + last_hop_metric > mpath->metric)))) fresh_info = false; } else { mpath = mesh_path_add(sdata, ta); diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 36a9be9a66c8..c010bb3d24e3 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -8,7 +8,7 @@ * Copyright 2007, Michael Wu <flamingice@sourmilk.net> * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright (C) 2015 - 2017 Intel Deutschland GmbH - * Copyright (C) 2018 - 2024 Intel Corporation + * Copyright (C) 2018 - 2025 Intel Corporation */ #include <linux/delay.h> @@ -168,6 +168,9 @@ ieee80211_determine_ap_chan(struct ieee80211_sub_if_data *sdata, bool no_vht = false; u32 ht_cfreq; + if (ieee80211_hw_check(&sdata->local->hw, STRICT)) + ignore_ht_channel_mismatch = false; + *chandef = (struct cfg80211_chan_def) { .chan = channel, .width = NL80211_CHAN_WIDTH_20_NOHT, @@ -343,6 +346,115 @@ ieee80211_determine_ap_chan(struct ieee80211_sub_if_data *sdata, } static bool +ieee80211_verify_sta_ht_mcs_support(struct ieee80211_sub_if_data *sdata, + struct ieee80211_supported_band *sband, + const struct ieee80211_ht_operation *ht_op) +{ + struct ieee80211_sta_ht_cap sta_ht_cap; + int i; + + if (sband->band == NL80211_BAND_6GHZ) + return true; + + if (!ht_op) + return false; + + memcpy(&sta_ht_cap, &sband->ht_cap, sizeof(sta_ht_cap)); + ieee80211_apply_htcap_overrides(sdata, &sta_ht_cap); + + /* + * P802.11REVme/D7.0 - 6.5.4.2.4 + * ... + * If the MLME of an HT STA receives an MLME-JOIN.request primitive + * with the SelectedBSS parameter containing a Basic HT-MCS Set field + * in the HT Operation parameter that contains any unsupported MCSs, + * the MLME response in the resulting MLME-JOIN.confirm primitive shall + * contain a ResultCode parameter that is not set to the value SUCCESS. + * ... + */ + + /* Simply check that all basic rates are in the STA RX mask */ + for (i = 0; i < IEEE80211_HT_MCS_MASK_LEN; i++) { + if ((ht_op->basic_set[i] & sta_ht_cap.mcs.rx_mask[i]) != + ht_op->basic_set[i]) + return false; + } + + return true; +} + +static bool +ieee80211_verify_sta_vht_mcs_support(struct ieee80211_sub_if_data *sdata, + int link_id, + struct ieee80211_supported_band *sband, + const struct ieee80211_vht_operation *vht_op) +{ + struct ieee80211_sta_vht_cap sta_vht_cap; + u16 ap_min_req_set, sta_rx_mcs_map, sta_tx_mcs_map; + int nss; + + if (sband->band != NL80211_BAND_5GHZ) + return true; + + if (!vht_op) + return false; + + memcpy(&sta_vht_cap, &sband->vht_cap, sizeof(sta_vht_cap)); + ieee80211_apply_vhtcap_overrides(sdata, &sta_vht_cap); + + ap_min_req_set = le16_to_cpu(vht_op->basic_mcs_set); + sta_rx_mcs_map = le16_to_cpu(sta_vht_cap.vht_mcs.rx_mcs_map); + sta_tx_mcs_map = le16_to_cpu(sta_vht_cap.vht_mcs.tx_mcs_map); + + /* + * Many APs are incorrectly advertising an all-zero value here, + * which really means MCS 0-7 are required for 1-8 streams, but + * they don't really mean it that way. + * Some other APs are incorrectly advertising 3 spatial streams + * with MCS 0-7 are required, but don't really mean it that way + * and we'll connect only with HT, rather than even HE. + * As a result, unfortunately the VHT basic MCS/NSS set cannot + * be used at all, so check it only in strict mode. + */ + if (!ieee80211_hw_check(&sdata->local->hw, STRICT)) + return true; + + /* + * P802.11REVme/D7.0 - 6.5.4.2.4 + * ... + * If the MLME of a VHT STA receives an MLME-JOIN.request primitive + * with a SelectedBSS parameter containing a Basic VHT-MCS And NSS Set + * field in the VHT Operation parameter that contains any unsupported + * <VHT-MCS, NSS> tuple, the MLME response in the resulting + * MLME-JOIN.confirm primitive shall contain a ResultCode parameter + * that is not set to the value SUCCESS. + * ... + */ + for (nss = 8; nss > 0; nss--) { + u8 ap_op_val = (ap_min_req_set >> (2 * (nss - 1))) & 3; + u8 sta_rx_val; + u8 sta_tx_val; + + if (ap_op_val == IEEE80211_HE_MCS_NOT_SUPPORTED) + continue; + + sta_rx_val = (sta_rx_mcs_map >> (2 * (nss - 1))) & 3; + sta_tx_val = (sta_tx_mcs_map >> (2 * (nss - 1))) & 3; + + if (sta_rx_val == IEEE80211_HE_MCS_NOT_SUPPORTED || + sta_tx_val == IEEE80211_HE_MCS_NOT_SUPPORTED || + sta_rx_val < ap_op_val || sta_tx_val < ap_op_val) { + link_id_info(sdata, link_id, + "Missing mandatory rates for %d Nss, rx %d, tx %d oper %d, disable VHT\n", + nss, sta_rx_val, sta_tx_val, ap_op_val); + return false; + } + } + + return true; +} + +static bool ieee80211_verify_peer_he_mcs_support(struct ieee80211_sub_if_data *sdata, int link_id, const struct ieee80211_he_cap_elem *he_cap, @@ -388,7 +500,7 @@ ieee80211_verify_peer_he_mcs_support(struct ieee80211_sub_if_data *sdata, * zeroes, which is nonsense, and completely inconsistent with itself * (it doesn't have 8 streams). Accept the settings in this case anyway. */ - if (!ap_min_req_set) + if (!ieee80211_hw_check(&sdata->local->hw, STRICT) && !ap_min_req_set) return true; /* make sure the AP is consistent with itself @@ -448,7 +560,7 @@ ieee80211_verify_sta_he_mcs_support(struct ieee80211_sub_if_data *sdata, * zeroes, which is nonsense, and completely inconsistent with itself * (it doesn't have 8 streams). Accept the settings in this case anyway. */ - if (!ap_min_req_set) + if (!ieee80211_hw_check(&sdata->local->hw, STRICT) && !ap_min_req_set) return true; /* Need to go over for 80MHz, 160MHz and for 80+80 */ @@ -676,7 +788,7 @@ static int ieee80211_chandef_num_subchans(const struct cfg80211_chan_def *c) if (c->width == NL80211_CHAN_WIDTH_80P80) return 4 + 4; - return nl80211_chan_width_to_mhz(c->width) / 20; + return cfg80211_chandef_get_width(c) / 20; } static int ieee80211_chandef_num_widths(const struct cfg80211_chan_def *c) @@ -877,7 +989,7 @@ static void ieee80211_set_chanreq_ap(struct ieee80211_sub_if_data *sdata, chanreq->ap = *ap_chandef; } -static struct ieee802_11_elems * +VISIBLE_IF_MAC80211_KUNIT struct ieee802_11_elems * ieee80211_determine_chan_mode(struct ieee80211_sub_if_data *sdata, struct ieee80211_conn_settings *conn, struct cfg80211_bss *cbss, int link_id, @@ -1039,6 +1151,26 @@ again: link_id_info(sdata, link_id, "regulatory prevented using AP config, downgraded\n"); + if (conn->mode >= IEEE80211_CONN_MODE_HT && + !ieee80211_verify_sta_ht_mcs_support(sdata, sband, + elems->ht_operation)) { + conn->mode = IEEE80211_CONN_MODE_LEGACY; + conn->bw_limit = IEEE80211_CONN_BW_LIMIT_20; + link_id_info(sdata, link_id, + "required MCSes not supported, disabling HT\n"); + } + + if (conn->mode >= IEEE80211_CONN_MODE_VHT && + !ieee80211_verify_sta_vht_mcs_support(sdata, link_id, sband, + elems->vht_operation)) { + conn->mode = IEEE80211_CONN_MODE_HT; + conn->bw_limit = min_t(enum ieee80211_conn_bw_limit, + conn->bw_limit, + IEEE80211_CONN_BW_LIMIT_40); + link_id_info(sdata, link_id, + "required MCSes not supported, disabling VHT\n"); + } + if (conn->mode >= IEEE80211_CONN_MODE_HE && (!ieee80211_verify_peer_he_mcs_support(sdata, link_id, (void *)elems->he_cap, @@ -1082,6 +1214,7 @@ free: kfree(elems); return ERR_PTR(ret); } +EXPORT_SYMBOL_IF_MAC80211_KUNIT(ieee80211_determine_chan_mode); static int ieee80211_config_bw(struct ieee80211_link_data *link, struct ieee802_11_elems *elems, @@ -1313,13 +1446,15 @@ static bool ieee80211_add_vht_ie(struct ieee80211_sub_if_data *sdata, * Some APs apparently get confused if our capabilities are better * than theirs, so restrict what we advertise in the assoc request. */ - if (!(ap_vht_cap->vht_cap_info & - cpu_to_le32(IEEE80211_VHT_CAP_SU_BEAMFORMER_CAPABLE))) - cap &= ~(IEEE80211_VHT_CAP_SU_BEAMFORMEE_CAPABLE | - IEEE80211_VHT_CAP_MU_BEAMFORMEE_CAPABLE); - else if (!(ap_vht_cap->vht_cap_info & - cpu_to_le32(IEEE80211_VHT_CAP_MU_BEAMFORMER_CAPABLE))) - cap &= ~IEEE80211_VHT_CAP_MU_BEAMFORMEE_CAPABLE; + if (!ieee80211_hw_check(&local->hw, STRICT)) { + if (!(ap_vht_cap->vht_cap_info & + cpu_to_le32(IEEE80211_VHT_CAP_SU_BEAMFORMER_CAPABLE))) + cap &= ~(IEEE80211_VHT_CAP_SU_BEAMFORMEE_CAPABLE | + IEEE80211_VHT_CAP_MU_BEAMFORMEE_CAPABLE); + else if (!(ap_vht_cap->vht_cap_info & + cpu_to_le32(IEEE80211_VHT_CAP_MU_BEAMFORMER_CAPABLE))) + cap &= ~IEEE80211_VHT_CAP_MU_BEAMFORMEE_CAPABLE; + } /* * If some other vif is using the MU-MIMO capability we cannot associate @@ -1361,14 +1496,16 @@ static bool ieee80211_add_vht_ie(struct ieee80211_sub_if_data *sdata, return mu_mimo_owner; } -static void ieee80211_assoc_add_rates(struct sk_buff *skb, +static void ieee80211_assoc_add_rates(struct ieee80211_local *local, + struct sk_buff *skb, enum nl80211_chan_width width, struct ieee80211_supported_band *sband, struct ieee80211_mgd_assoc_data *assoc_data) { u32 rates; - if (assoc_data->supp_rates_len) { + if (assoc_data->supp_rates_len && + !ieee80211_hw_check(&local->hw, STRICT)) { /* * Get all rates supported by the device and the AP as * some APs don't like getting a superset of their rates @@ -1584,7 +1721,7 @@ ieee80211_add_link_elems(struct ieee80211_sub_if_data *sdata, *capab |= WLAN_CAPABILITY_SPECTRUM_MGMT; if (sband->band != NL80211_BAND_S1GHZ) - ieee80211_assoc_add_rates(skb, width, sband, assoc_data); + ieee80211_assoc_add_rates(local, skb, width, sband, assoc_data); if (*capab & WLAN_CAPABILITY_SPECTRUM_MGMT || *capab & WLAN_CAPABILITY_RADIO_MEASURE) { @@ -1806,6 +1943,21 @@ ieee80211_assoc_add_ml_elem(struct ieee80211_sub_if_data *sdata, } skb_put_data(skb, &mld_capa_ops, sizeof(mld_capa_ops)); + /* Many APs have broken parsing of the extended MLD capa/ops field, + * dropping (re-)association request frames or replying with association + * response with a failure status if it's present. Without a clear + * indication as to whether the AP supports parsing this field or not do + * not include it in the common information unless strict mode is set. + */ + if (ieee80211_hw_check(&local->hw, STRICT) && + assoc_data->ext_mld_capa_ops) { + ml_elem->control |= + cpu_to_le16(IEEE80211_MLC_BASIC_PRES_EXT_MLD_CAPA_OP); + common->len += 2; + skb_put_data(skb, &assoc_data->ext_mld_capa_ops, + sizeof(assoc_data->ext_mld_capa_ops)); + } + for (link_id = 0; link_id < IEEE80211_MLD_MAX_NUM_LINKS; link_id++) { u16 link_present_elems[PRESENT_ELEMS_MAX] = {}; const u8 *extra_elems; @@ -1975,6 +2127,7 @@ static int ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata) /* max common info field in basic multi-link element */ size += sizeof(struct ieee80211_mle_basic_common_info) + 2 + /* capa & op */ + 2 + /* ext capa & op */ 2; /* EML capa */ /* @@ -2051,7 +2204,8 @@ static int ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata) * for some reason check it and want it to be set, set the bit for all * pre-EHT connections as we used to do. */ - if (link->u.mgd.conn.mode < IEEE80211_CONN_MODE_EHT) + if (link->u.mgd.conn.mode < IEEE80211_CONN_MODE_EHT && + !ieee80211_hw_check(&local->hw, STRICT)) capab |= WLAN_CAPABILITY_ESS; /* add the elements for the assoc (main) link */ @@ -3375,10 +3529,10 @@ void ieee80211_mgd_set_link_qos_params(struct ieee80211_link_data *link) /* MLME */ static bool -ieee80211_sta_wmm_params(struct ieee80211_local *local, - struct ieee80211_link_data *link, - const u8 *wmm_param, size_t wmm_param_len, - const struct ieee80211_mu_edca_param_set *mu_edca) +_ieee80211_sta_wmm_params(struct ieee80211_local *local, + struct ieee80211_link_data *link, + const u8 *wmm_param, size_t wmm_param_len, + const struct ieee80211_mu_edca_param_set *mu_edca) { struct ieee80211_sub_if_data *sdata = link->sdata; struct ieee80211_tx_queue_params params[IEEE80211_NUM_ACS]; @@ -3507,6 +3661,19 @@ ieee80211_sta_wmm_params(struct ieee80211_local *local, for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) link->tx_conf[ac] = params[ac]; + return true; +} + +static bool +ieee80211_sta_wmm_params(struct ieee80211_local *local, + struct ieee80211_link_data *link, + const u8 *wmm_param, size_t wmm_param_len, + const struct ieee80211_mu_edca_param_set *mu_edca) +{ + if (!_ieee80211_sta_wmm_params(local, link, wmm_param, wmm_param_len, + mu_edca)) + return false; + ieee80211_mgd_set_link_qos_params(link); /* enable WMM or activate new settings */ @@ -3779,8 +3946,34 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, ifmgd->associated = false; + if (tx) { + bool tx_link_found = false; + + for (link_id = 0; + link_id < ARRAY_SIZE(sdata->link); + link_id++) { + struct ieee80211_link_data *link; + + if (!ieee80211_vif_link_active(&sdata->vif, link_id)) + continue; + + link = sdata_dereference(sdata->link[link_id], sdata); + if (WARN_ON_ONCE(!link)) + continue; + + if (link->u.mgd.csa.blocked_tx) + continue; + + tx_link_found = true; + break; + } + + tx = tx_link_found; + } + /* other links will be destroyed */ sdata->deflink.conf->bss = NULL; + sdata->deflink.conf->epcs_support = false; sdata->deflink.smps_mode = IEEE80211_SMPS_OFF; netif_carrier_off(sdata->dev); @@ -3808,23 +4001,24 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, * insist sending these frames which can take time and delay * the disconnection and possible the roaming. */ - if (tx) - ieee80211_flush_queues(local, sdata, true); + ieee80211_flush_queues(local, sdata, true); - /* deauthenticate/disassociate now */ - if (tx || frame_buf) { + if (tx) { drv_mgd_prepare_tx(sdata->local, sdata, &info); ieee80211_send_deauth_disassoc(sdata, sdata->vif.cfg.ap_addr, sdata->vif.cfg.ap_addr, stype, - reason, tx, frame_buf); - } + reason, true, frame_buf); - /* flush out frame - make sure the deauth was actually sent */ - if (tx) + /* flush out frame - make sure the deauth was actually sent */ ieee80211_flush_queues(local, sdata, false); - drv_mgd_complete_tx(sdata->local, sdata, &info); + drv_mgd_complete_tx(sdata->local, sdata, &info); + } else if (frame_buf) { + ieee80211_send_deauth_disassoc(sdata, sdata->vif.cfg.ap_addr, + sdata->vif.cfg.ap_addr, stype, + reason, false, frame_buf); + } /* clear AP addr only after building the needed mgmt frames */ eth_zero_addr(sdata->deflink.u.mgd.bssid); @@ -3958,15 +4152,21 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, wiphy_work_cancel(sdata->local->hw.wiphy, &ifmgd->teardown_ttlm_work); - ieee80211_vif_set_links(sdata, 0, 0); - - ifmgd->mcast_seq_last = IEEE80211_SN_MODULO; - /* if disconnection happens in the middle of the ML reconfiguration * flow, cfg80211 must called to release the BSS references obtained * when the flow started. */ ieee80211_ml_reconf_reset(sdata); + + ieee80211_vif_set_links(sdata, 0, 0); + + ifmgd->mcast_seq_last = IEEE80211_SN_MODULO; + + ifmgd->epcs.enabled = false; + ifmgd->epcs.dialog_token = 0; + + memset(ifmgd->userspace_selectors, 0, + sizeof(ifmgd->userspace_selectors)); } static void ieee80211_reset_ap_probe(struct ieee80211_sub_if_data *sdata) @@ -4247,33 +4447,12 @@ static void __ieee80211_disconnect(struct ieee80211_sub_if_data *sdata) struct ieee80211_local *local = sdata->local; struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; u8 frame_buf[IEEE80211_DEAUTH_FRAME_LEN]; - bool tx = false; lockdep_assert_wiphy(local->hw.wiphy); if (!ifmgd->associated) return; - /* only transmit if we have a link that makes that worthwhile */ - for (unsigned int link_id = 0; - link_id < ARRAY_SIZE(sdata->link); - link_id++) { - struct ieee80211_link_data *link; - - if (!ieee80211_vif_link_active(&sdata->vif, link_id)) - continue; - - link = sdata_dereference(sdata->link[link_id], sdata); - if (WARN_ON_ONCE(!link)) - continue; - - if (link->u.mgd.csa.blocked_tx) - continue; - - tx = true; - break; - } - if (!ifmgd->driver_disconnect) { unsigned int link_id; @@ -4290,7 +4469,7 @@ static void __ieee80211_disconnect(struct ieee80211_sub_if_data *sdata) struct ieee80211_link_data *link; link = sdata_dereference(sdata->link[link_id], sdata); - if (!link) + if (!link || !link->conf->bss) continue; cfg80211_unlink_bss(local->hw.wiphy, link->conf->bss); link->conf->bss = NULL; @@ -4301,14 +4480,14 @@ static void __ieee80211_disconnect(struct ieee80211_sub_if_data *sdata) ifmgd->driver_disconnect ? WLAN_REASON_DEAUTH_LEAVING : WLAN_REASON_DISASSOC_DUE_TO_INACTIVITY, - tx, frame_buf); + true, frame_buf); /* the other links will be destroyed */ sdata->vif.bss_conf.csa_active = false; sdata->deflink.u.mgd.csa.waiting_bcn = false; sdata->deflink.u.mgd.csa.blocked_tx = false; ieee80211_vif_unblock_queues_csa(sdata); - ieee80211_report_disconnect(sdata, frame_buf, sizeof(frame_buf), tx, + ieee80211_report_disconnect(sdata, frame_buf, sizeof(frame_buf), true, WLAN_REASON_DISASSOC_DUE_TO_INACTIVITY, ifmgd->reconnect); ifmgd->reconnect = false; @@ -4570,6 +4749,8 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata, auth_transaction = le16_to_cpu(mgmt->u.auth.auth_transaction); status_code = le16_to_cpu(mgmt->u.auth.status_code); + info.link_id = ifmgd->auth_data->link_id; + if (auth_alg != ifmgd->auth_data->algorithm || (auth_alg != WLAN_AUTH_SAE && auth_transaction != ifmgd->auth_data->expected_transaction) || @@ -4835,6 +5016,82 @@ static bool ieee80211_twt_bcast_support(struct ieee80211_sub_if_data *sdata, IEEE80211_HE_MAC_CAP2_BCAST_TWT); } +static void ieee80211_epcs_changed(struct ieee80211_sub_if_data *sdata, + bool enabled) +{ + /* in any case this is called, dialog token should be reset */ + sdata->u.mgd.epcs.dialog_token = 0; + + if (sdata->u.mgd.epcs.enabled == enabled) + return; + + sdata->u.mgd.epcs.enabled = enabled; + cfg80211_epcs_changed(sdata->dev, enabled); +} + +static void ieee80211_epcs_teardown(struct ieee80211_sub_if_data *sdata) +{ + struct ieee80211_local *local = sdata->local; + u8 link_id; + + if (!sdata->u.mgd.epcs.enabled) + return; + + lockdep_assert_wiphy(local->hw.wiphy); + + for (link_id = 0; link_id < IEEE80211_MLD_MAX_NUM_LINKS; link_id++) { + struct ieee802_11_elems *elems; + struct ieee80211_link_data *link; + const struct cfg80211_bss_ies *ies; + bool ret; + + rcu_read_lock(); + + link = sdata_dereference(sdata->link[link_id], sdata); + if (!link || !link->conf || !link->conf->bss) { + rcu_read_unlock(); + continue; + } + + if (link->u.mgd.disable_wmm_tracking) { + rcu_read_unlock(); + ieee80211_set_wmm_default(link, false, false); + continue; + } + + ies = rcu_dereference(link->conf->bss->beacon_ies); + if (!ies) { + rcu_read_unlock(); + ieee80211_set_wmm_default(link, false, false); + continue; + } + + elems = ieee802_11_parse_elems(ies->data, ies->len, false, + NULL); + if (!elems) { + rcu_read_unlock(); + ieee80211_set_wmm_default(link, false, false); + continue; + } + + ret = _ieee80211_sta_wmm_params(local, link, + elems->wmm_param, + elems->wmm_param_len, + elems->mu_edca_param_set); + + kfree(elems); + rcu_read_unlock(); + + if (!ret) { + ieee80211_set_wmm_default(link, false, false); + continue; + } + + ieee80211_mgd_set_link_qos_params(link); + ieee80211_link_info_change_notify(sdata, link, BSS_CHANGED_QOS); + } +} + static bool ieee80211_assoc_config_link(struct ieee80211_link_data *link, struct link_sta_info *link_sta, struct cfg80211_bss *cbss, @@ -4936,7 +5193,7 @@ static bool ieee80211_assoc_config_link(struct ieee80211_link_data *link, * 2G/3G/4G wifi routers, reported models include the "Onda PN51T", * "Vodafone PocketWiFi 2", "ZTE MF60" and a similar T-Mobile device. */ - if (!is_6ghz && + if (!ieee80211_hw_check(&local->hw, STRICT) && !is_6ghz && ((assoc_data->wmm && !elems->wmm_param) || (link->u.mgd.conn.mode >= IEEE80211_CONN_MODE_HT && (!elems->ht_cap_elem || !elems->ht_operation)) || @@ -5072,6 +5329,15 @@ static bool ieee80211_assoc_config_link(struct ieee80211_link_data *link, bss_vht_cap = (const void *)elem->data; } + if (ieee80211_hw_check(&local->hw, STRICT) && + (!bss_vht_cap || memcmp(bss_vht_cap, elems->vht_cap_elem, + sizeof(*bss_vht_cap)))) { + rcu_read_unlock(); + ret = false; + link_info(link, "VHT capabilities mismatch\n"); + goto out; + } + ieee80211_vht_cap_ie_to_sta_vht_cap(sdata, sband, elems->vht_cap_elem, bss_vht_cap, link_sta); @@ -5109,14 +5375,27 @@ static bool ieee80211_assoc_config_link(struct ieee80211_link_data *link, link_sta); bss_conf->eht_support = link_sta->pub->eht_cap.has_eht; + bss_conf->epcs_support = bss_conf->eht_support && + !!(elems->eht_cap->fixed.mac_cap_info[0] & + IEEE80211_EHT_MAC_CAP0_EPCS_PRIO_ACCESS); + + /* EPCS might be already enabled but a new added link + * does not support EPCS. This should not really happen + * in practice. + */ + if (sdata->u.mgd.epcs.enabled && + !bss_conf->epcs_support) + ieee80211_epcs_teardown(sdata); } else { bss_conf->eht_support = false; + bss_conf->epcs_support = false; } } else { bss_conf->he_support = false; bss_conf->twt_requester = false; bss_conf->twt_protected = false; bss_conf->eht_support = false; + bss_conf->epcs_support = false; } bss_conf->twt_broadcast = @@ -5861,7 +6140,7 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata, err = ieee80211_prep_channel(sdata, link, link_id, cbss, true, &link->u.mgd.conn, - assoc_data->userspace_selectors); + sdata->u.mgd.userspace_selectors); if (err) { link_info(link, "prep_channel failed\n"); goto out_err; @@ -7147,7 +7426,8 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_link_data *link, ieee80211_mgd_update_bss_param_ch_cnt(sdata, bss_conf, elems); - if (!link->u.mgd.disable_wmm_tracking && + if (!sdata->u.mgd.epcs.enabled && + !link->u.mgd.disable_wmm_tracking && ieee80211_sta_wmm_params(local, link, elems->wmm_param, elems->wmm_param_len, elems->mu_edca_param_set)) @@ -7599,13 +7879,9 @@ void ieee80211_process_neg_ttlm_res(struct ieee80211_sub_if_data *sdata, __ieee80211_disconnect(sdata); } -static void ieee80211_teardown_ttlm_work(struct wiphy *wiphy, - struct wiphy_work *work) +void ieee80211_process_ttlm_teardown(struct ieee80211_sub_if_data *sdata) { u16 new_dormant_links; - struct ieee80211_sub_if_data *sdata = - container_of(work, struct ieee80211_sub_if_data, - u.mgd.teardown_ttlm_work); if (!sdata->vif.neg_ttlm.valid) return; @@ -7620,6 +7896,16 @@ static void ieee80211_teardown_ttlm_work(struct wiphy *wiphy, BSS_CHANGED_MLD_VALID_LINKS); } +static void ieee80211_teardown_ttlm_work(struct wiphy *wiphy, + struct wiphy_work *work) +{ + struct ieee80211_sub_if_data *sdata = + container_of(work, struct ieee80211_sub_if_data, + u.mgd.teardown_ttlm_work); + + ieee80211_process_ttlm_teardown(sdata); +} + void ieee80211_send_teardown_neg_ttlm(struct ieee80211_vif *vif) { struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); @@ -9100,6 +9386,8 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, else memcpy(assoc_data->ap_addr, cbss->bssid, ETH_ALEN); + assoc_data->ext_mld_capa_ops = cpu_to_le16(req->ext_mld_capa_ops); + if (ifmgd->associated) { u8 frame_buf[IEEE80211_DEAUTH_FRAME_LEN]; @@ -9116,7 +9404,9 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, false); } - ieee80211_parse_cfg_selectors(assoc_data->userspace_selectors, + memset(sdata->u.mgd.userspace_selectors, 0, + sizeof(sdata->u.mgd.userspace_selectors)); + ieee80211_parse_cfg_selectors(sdata->u.mgd.userspace_selectors, req->supported_selectors, req->supported_selectors_len); @@ -9367,7 +9657,7 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, err = ieee80211_prep_channel(sdata, NULL, i, assoc_data->link[i].bss, true, &assoc_data->link[i].conn, - assoc_data->userspace_selectors); + sdata->u.mgd.userspace_selectors); if (err) { req->links[i].error = err; goto err_clear; @@ -9384,7 +9674,7 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, req->ap_mld_addr, true, &assoc_data->link[assoc_link_id].conn, override, - assoc_data->userspace_selectors); + sdata->u.mgd.userspace_selectors); if (err) goto err_clear; @@ -9490,7 +9780,6 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata, ieee80211_report_disconnect(sdata, frame_buf, sizeof(frame_buf), true, req->reason_code, false); - drv_mgd_complete_tx(sdata->local, sdata, &info); return 0; } @@ -9629,16 +9918,6 @@ void ieee80211_disable_rssi_reports(struct ieee80211_vif *vif) } EXPORT_SYMBOL(ieee80211_disable_rssi_reports); -static void ieee80211_ml_reconf_selectors(unsigned long *userspace_selectors) -{ - *userspace_selectors = 0; - - /* these selectors are mandatory for ML reconfiguration */ - set_bit(BSS_MEMBERSHIP_SELECTOR_SAE_H2E, userspace_selectors); - set_bit(BSS_MEMBERSHIP_SELECTOR_HE_PHY, userspace_selectors); - set_bit(BSS_MEMBERSHIP_SELECTOR_EHT_PHY, userspace_selectors); -} - void ieee80211_process_ml_reconf_resp(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt, size_t len) { @@ -9652,7 +9931,6 @@ void ieee80211_process_ml_reconf_resp(struct ieee80211_sub_if_data *sdata, sdata->u.mgd.reconf.removed_links; u16 link_mask, valid_links; unsigned int link_id; - unsigned long userspace_selectors; size_t orig_len = len; u8 i, group_key_data_len; u8 *pos; @@ -9760,7 +10038,6 @@ void ieee80211_process_ml_reconf_resp(struct ieee80211_sub_if_data *sdata, } ieee80211_vif_set_links(sdata, valid_links, sdata->vif.dormant_links); - ieee80211_ml_reconf_selectors(&userspace_selectors); link_mask = 0; for (link_id = 0; link_id < IEEE80211_MLD_MAX_NUM_LINKS; link_id++) { struct cfg80211_bss *cbss = add_links_data->link[link_id].bss; @@ -9806,7 +10083,7 @@ void ieee80211_process_ml_reconf_resp(struct ieee80211_sub_if_data *sdata, link->u.mgd.conn = add_links_data->link[link_id].conn; if (ieee80211_prep_channel(sdata, link, link_id, cbss, true, &link->u.mgd.conn, - &userspace_selectors)) { + sdata->u.mgd.userspace_selectors)) { link_info(link, "mlo: reconf: prep_channel failed\n"); goto disconnect; } @@ -9854,8 +10131,11 @@ void ieee80211_process_ml_reconf_resp(struct ieee80211_sub_if_data *sdata, done_data.len = orig_len; done_data.added_links = link_mask; - for (link_id = 0; link_id < IEEE80211_MLD_MAX_NUM_LINKS; link_id++) + for (link_id = 0; link_id < IEEE80211_MLD_MAX_NUM_LINKS; link_id++) { done_data.links[link_id].bss = add_links_data->link[link_id].bss; + done_data.links[link_id].addr = + add_links_data->link[link_id].addr; + } cfg80211_mlo_reconf_add_done(sdata->dev, &done_data); kfree(sdata->u.mgd.reconf.add_links_data); @@ -9871,7 +10151,7 @@ disconnect: static struct sk_buff * ieee80211_build_ml_reconf_req(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgd_assoc_data *add_links_data, - u16 removed_links) + u16 removed_links, __le16 ext_mld_capa_ops) { struct ieee80211_local *local = sdata->local; struct ieee80211_mgmt *mgmt; @@ -9920,6 +10200,9 @@ ieee80211_build_ml_reconf_req(struct ieee80211_sub_if_data *sdata, var_common_size += 2; } + if (ext_mld_capa_ops) + var_common_size += 2; + /* Add the common information length */ size += common_size + var_common_size; @@ -9946,8 +10229,8 @@ ieee80211_build_ml_reconf_req(struct ieee80211_sub_if_data *sdata, size += 2 + sizeof(struct ieee80211_mle_per_sta_profile) + ETH_ALEN; - /* SSID element + WMM */ - size += 2 + sdata->vif.cfg.ssid_len + 9; + /* WMM */ + size += 9; size += ieee80211_link_common_elems_size(sdata, iftype, cbss, elems_len); } @@ -10004,6 +10287,12 @@ ieee80211_build_ml_reconf_req(struct ieee80211_sub_if_data *sdata, skb_put_data(skb, &mld_capa_ops, sizeof(mld_capa_ops)); } + if (ext_mld_capa_ops) { + ml_elem->control |= + cpu_to_le16(IEEE80211_MLC_RECONF_PRES_EXT_MLD_CAPA_OP); + skb_put_data(skb, &ext_mld_capa_ops, sizeof(ext_mld_capa_ops)); + } + if (sdata->u.mgd.flags & IEEE80211_STA_ENABLE_RRM) capab |= WLAN_CAPABILITY_RADIO_MEASURE; @@ -10053,11 +10342,6 @@ ieee80211_build_ml_reconf_req(struct ieee80211_sub_if_data *sdata, capab_pos = skb_put(skb, 2); - skb_put_u8(skb, WLAN_EID_SSID); - skb_put_u8(skb, sdata->vif.cfg.ssid_len); - skb_put_data(skb, sdata->vif.cfg.ssid, - sdata->vif.cfg.ssid_len); - extra_used = ieee80211_add_link_elems(sdata, skb, &capab, NULL, add_links_data->link[link_id].elems, @@ -10097,8 +10381,7 @@ ieee80211_build_ml_reconf_req(struct ieee80211_sub_if_data *sdata, } int ieee80211_mgd_assoc_ml_reconf(struct ieee80211_sub_if_data *sdata, - struct cfg80211_assoc_link *add_links, - u16 rem_links) + struct cfg80211_ml_reconf_req *req) { struct ieee80211_local *local = sdata->local; struct ieee80211_mgd_assoc_data *data = NULL; @@ -10118,9 +10401,8 @@ int ieee80211_mgd_assoc_ml_reconf(struct ieee80211_sub_if_data *sdata, return -EBUSY; added_links = 0; - for (link_id = 0; add_links && link_id < IEEE80211_MLD_MAX_NUM_LINKS; - link_id++) { - if (!add_links[link_id].bss) + for (link_id = 0; link_id < IEEE80211_MLD_MAX_NUM_LINKS; link_id++) { + if (!req->add_links[link_id].bss) continue; added_links |= BIT(link_id); @@ -10130,9 +10412,6 @@ int ieee80211_mgd_assoc_ml_reconf(struct ieee80211_sub_if_data *sdata, if (WARN_ON(!sta)) return -ENOLINK; - if (rem_links & BIT(sta->sta.deflink.link_id)) - return -EINVAL; - /* Adding links to the set of valid link is done only after a successful * ML reconfiguration frame exchange. Here prepare the data for the ML * reconfiguration frame construction and allocate the required @@ -10140,18 +10419,20 @@ int ieee80211_mgd_assoc_ml_reconf(struct ieee80211_sub_if_data *sdata, */ if (added_links) { bool uapsd_supported; - unsigned long userspace_selectors; data = kzalloc(sizeof(*data), GFP_KERNEL); if (!data) return -ENOMEM; + data->assoc_link_id = -1; + data->wmm = true; + uapsd_supported = true; - ieee80211_ml_reconf_selectors(&userspace_selectors); for (link_id = 0; link_id < IEEE80211_MLD_MAX_NUM_LINKS; link_id++) { struct ieee80211_supported_band *sband; - struct cfg80211_bss *link_cbss = add_links[link_id].bss; + struct cfg80211_bss *link_cbss = + req->add_links[link_id].bss; struct ieee80211_bss *bss; if (!link_cbss) @@ -10181,11 +10462,11 @@ int ieee80211_mgd_assoc_ml_reconf(struct ieee80211_sub_if_data *sdata, data->link[link_id].bss = link_cbss; data->link[link_id].disabled = - add_links[link_id].disabled; + req->add_links[link_id].disabled; data->link[link_id].elems = - (u8 *)add_links[link_id].elems; + (u8 *)req->add_links[link_id].elems; data->link[link_id].elems_len = - add_links[link_id].elems_len; + req->add_links[link_id].elems_len; if (!bss->uapsd_supported) uapsd_supported = false; @@ -10204,12 +10485,11 @@ int ieee80211_mgd_assoc_ml_reconf(struct ieee80211_sub_if_data *sdata, } } - /* Require U-APSD support to be similar to the current valid - * links - */ - if (uapsd_supported != - !!(sdata->u.mgd.flags & IEEE80211_STA_UAPSD_ENABLED)) { + /* Require U-APSD support if we enabled it */ + if (sdata->u.mgd.flags & IEEE80211_STA_UAPSD_ENABLED && + !uapsd_supported) { err = -EINVAL; + sdata_info(sdata, "U-APSD on but not available on (all) new links\n"); goto err_free; } @@ -10223,7 +10503,7 @@ int ieee80211_mgd_assoc_ml_reconf(struct ieee80211_sub_if_data *sdata, data->link[link_id].bss, true, &data->link[link_id].conn, - &userspace_selectors); + sdata->u.mgd.userspace_selectors); if (err) goto err_free; } @@ -10235,10 +10515,11 @@ int ieee80211_mgd_assoc_ml_reconf(struct ieee80211_sub_if_data *sdata, * Section 35.3.6.4 in Draft P802.11be_D7.0 the AP MLD should accept the * link removal request. */ - if (rem_links) { - u16 new_active_links = sdata->vif.active_links & ~rem_links; + if (req->rem_links) { + u16 new_active_links = + sdata->vif.active_links & ~req->rem_links; - new_valid_links = sdata->vif.valid_links & ~rem_links; + new_valid_links = sdata->vif.valid_links & ~req->rem_links; /* Should not be left with no valid links to perform the * ML reconfiguration @@ -10273,14 +10554,16 @@ int ieee80211_mgd_assoc_ml_reconf(struct ieee80211_sub_if_data *sdata, * is expected to send the ML reconfiguration response frame on the link * on which the request was received. */ - skb = ieee80211_build_ml_reconf_req(sdata, data, rem_links); + skb = ieee80211_build_ml_reconf_req(sdata, data, req->rem_links, + cpu_to_le16(req->ext_mld_capa_ops)); if (!skb) { err = -ENOMEM; goto err_free; } - if (rem_links) { - u16 new_dormant_links = sdata->vif.dormant_links & ~rem_links; + if (req->rem_links) { + u16 new_dormant_links = + sdata->vif.dormant_links & ~req->rem_links; err = ieee80211_vif_set_links(sdata, new_valid_links, new_dormant_links); @@ -10293,7 +10576,7 @@ int ieee80211_mgd_assoc_ml_reconf(struct ieee80211_sub_if_data *sdata, for (link_id = 0; link_id < IEEE80211_MLD_MAX_NUM_LINKS; link_id++) { - if (!(rem_links & BIT(link_id))) + if (!(req->rem_links & BIT(link_id))) continue; ieee80211_sta_remove_link(sta, link_id); @@ -10302,17 +10585,17 @@ int ieee80211_mgd_assoc_ml_reconf(struct ieee80211_sub_if_data *sdata, /* notify the driver and upper layers */ ieee80211_vif_cfg_change_notify(sdata, BSS_CHANGED_MLD_VALID_LINKS); - cfg80211_links_removed(sdata->dev, rem_links); + cfg80211_links_removed(sdata->dev, req->rem_links); } sdata_info(sdata, "mlo: reconf: adding=0x%x, removed=0x%x\n", - added_links, rem_links); + added_links, req->rem_links); ieee80211_tx_skb(sdata, skb); sdata->u.mgd.reconf.added_links = added_links; sdata->u.mgd.reconf.add_links_data = data; - sdata->u.mgd.reconf.removed_links = rem_links; + sdata->u.mgd.reconf.removed_links = req->rem_links; wiphy_delayed_work_queue(sdata->local->hw.wiphy, &sdata->u.mgd.reconf.wk, IEEE80211_ASSOC_TIMEOUT_SHORT); @@ -10322,3 +10605,198 @@ int ieee80211_mgd_assoc_ml_reconf(struct ieee80211_sub_if_data *sdata, kfree(data); return err; } + +static bool ieee80211_mgd_epcs_supp(struct ieee80211_sub_if_data *sdata) +{ + unsigned long valid_links = sdata->vif.valid_links; + u8 link_id; + + lockdep_assert_wiphy(sdata->local->hw.wiphy); + + if (!ieee80211_vif_is_mld(&sdata->vif)) + return false; + + for_each_set_bit(link_id, &valid_links, IEEE80211_MLD_MAX_NUM_LINKS) { + struct ieee80211_bss_conf *bss_conf = + sdata_dereference(sdata->vif.link_conf[link_id], sdata); + + if (WARN_ON(!bss_conf) || !bss_conf->epcs_support) + return false; + } + + return true; +} + +int ieee80211_mgd_set_epcs(struct ieee80211_sub_if_data *sdata, bool enable) +{ + struct ieee80211_local *local = sdata->local; + struct ieee80211_mgmt *mgmt; + struct sk_buff *skb; + int frame_len = offsetofend(struct ieee80211_mgmt, + u.action.u.epcs) + (enable ? 1 : 0); + + if (!ieee80211_mgd_epcs_supp(sdata)) + return -EINVAL; + + if (sdata->u.mgd.epcs.enabled == enable && + !sdata->u.mgd.epcs.dialog_token) + return 0; + + /* Do not allow enabling EPCS if the AP didn't respond yet. + * However, allow disabling EPCS in such a case. + */ + if (sdata->u.mgd.epcs.dialog_token && enable) + return -EALREADY; + + skb = dev_alloc_skb(local->hw.extra_tx_headroom + frame_len); + if (!skb) + return -ENOBUFS; + + skb_reserve(skb, local->hw.extra_tx_headroom); + mgmt = skb_put_zero(skb, frame_len); + mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | + IEEE80211_STYPE_ACTION); + memcpy(mgmt->da, sdata->vif.cfg.ap_addr, ETH_ALEN); + memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN); + memcpy(mgmt->bssid, sdata->vif.cfg.ap_addr, ETH_ALEN); + + mgmt->u.action.category = WLAN_CATEGORY_PROTECTED_EHT; + if (enable) { + u8 *pos = mgmt->u.action.u.epcs.variable; + + mgmt->u.action.u.epcs.action_code = + WLAN_PROTECTED_EHT_ACTION_EPCS_ENABLE_REQ; + + *pos = ++sdata->u.mgd.dialog_token_alloc; + sdata->u.mgd.epcs.dialog_token = *pos; + } else { + mgmt->u.action.u.epcs.action_code = + WLAN_PROTECTED_EHT_ACTION_EPCS_ENABLE_TEARDOWN; + + ieee80211_epcs_teardown(sdata); + ieee80211_epcs_changed(sdata, false); + } + + ieee80211_tx_skb(sdata, skb); + return 0; +} + +static void ieee80211_ml_epcs(struct ieee80211_sub_if_data *sdata, + struct ieee802_11_elems *elems) +{ + const struct element *sub; + size_t scratch_len = elems->ml_epcs_len; + u8 *scratch __free(kfree) = kzalloc(scratch_len, GFP_KERNEL); + + lockdep_assert_wiphy(sdata->local->hw.wiphy); + + if (!ieee80211_vif_is_mld(&sdata->vif) || !elems->ml_epcs) + return; + + if (WARN_ON(!scratch)) + return; + + /* Directly parse the sub elements as the common information doesn't + * hold any useful information. + */ + for_each_mle_subelement(sub, (const u8 *)elems->ml_epcs, + elems->ml_epcs_len) { + struct ieee80211_link_data *link; + struct ieee802_11_elems *link_elems __free(kfree); + u8 *pos = (void *)sub->data; + u16 control; + ssize_t len; + u8 link_id; + + if (sub->id != IEEE80211_MLE_SUBELEM_PER_STA_PROFILE) + continue; + + if (sub->datalen < sizeof(control)) + break; + + control = get_unaligned_le16(pos); + link_id = control & IEEE80211_MLE_STA_EPCS_CONTROL_LINK_ID; + + link = sdata_dereference(sdata->link[link_id], sdata); + if (!link) + continue; + + len = cfg80211_defragment_element(sub, (u8 *)elems->ml_epcs, + elems->ml_epcs_len, + scratch, scratch_len, + IEEE80211_MLE_SUBELEM_FRAGMENT); + if (len < (ssize_t)sizeof(control)) + continue; + + pos = scratch + sizeof(control); + len -= sizeof(control); + + link_elems = ieee802_11_parse_elems(pos, len, false, NULL); + if (!link_elems) + continue; + + if (ieee80211_sta_wmm_params(sdata->local, link, + link_elems->wmm_param, + link_elems->wmm_param_len, + link_elems->mu_edca_param_set)) + ieee80211_link_info_change_notify(sdata, link, + BSS_CHANGED_QOS); + } +} + +void ieee80211_process_epcs_ena_resp(struct ieee80211_sub_if_data *sdata, + struct ieee80211_mgmt *mgmt, size_t len) +{ + struct ieee802_11_elems *elems __free(kfree) = NULL; + size_t ies_len; + u16 status_code; + u8 *pos, dialog_token; + + if (!ieee80211_mgd_epcs_supp(sdata)) + return; + + /* Handle dialog token and status code */ + pos = mgmt->u.action.u.epcs.variable; + dialog_token = *pos; + status_code = get_unaligned_le16(pos + 1); + + /* An EPCS enable response with dialog token == 0 is an unsolicited + * notification from the AP MLD. In such a case, EPCS should already be + * enabled and status must be success + */ + if (!dialog_token && + (!sdata->u.mgd.epcs.enabled || + status_code != WLAN_STATUS_SUCCESS)) + return; + + if (sdata->u.mgd.epcs.dialog_token != dialog_token) + return; + + sdata->u.mgd.epcs.dialog_token = 0; + + if (status_code != WLAN_STATUS_SUCCESS) + return; + + pos += IEEE80211_EPCS_ENA_RESP_BODY_LEN; + ies_len = len - offsetof(struct ieee80211_mgmt, + u.action.u.epcs.variable) - + IEEE80211_EPCS_ENA_RESP_BODY_LEN; + + elems = ieee802_11_parse_elems(pos, ies_len, true, NULL); + if (!elems) + return; + + ieee80211_ml_epcs(sdata, elems); + ieee80211_epcs_changed(sdata, true); +} + +void ieee80211_process_epcs_teardown(struct ieee80211_sub_if_data *sdata, + struct ieee80211_mgmt *mgmt, size_t len) +{ + if (!ieee80211_vif_is_mld(&sdata->vif) || + !sdata->u.mgd.epcs.enabled) + return; + + ieee80211_epcs_teardown(sdata); + ieee80211_epcs_changed(sdata, false); +} diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 0659ec892ec6..f7f89cd1b7d7 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -1045,14 +1045,14 @@ static ieee80211_rx_result ieee80211_rx_mesh_check(struct ieee80211_rx_data *rx) if (is_multicast_ether_addr(hdr->addr1)) { if (ieee80211_has_tods(hdr->frame_control) || !ieee80211_has_fromds(hdr->frame_control)) - return RX_DROP_MONITOR; + return RX_DROP; if (ether_addr_equal(hdr->addr3, dev_addr)) - return RX_DROP_MONITOR; + return RX_DROP; } else { if (!ieee80211_has_a4(hdr->frame_control)) - return RX_DROP_MONITOR; + return RX_DROP; if (ether_addr_equal(hdr->addr4, dev_addr)) - return RX_DROP_MONITOR; + return RX_DROP; } } @@ -1064,20 +1064,20 @@ static ieee80211_rx_result ieee80211_rx_mesh_check(struct ieee80211_rx_data *rx) struct ieee80211_mgmt *mgmt; if (!ieee80211_is_mgmt(hdr->frame_control)) - return RX_DROP_MONITOR; + return RX_DROP; if (ieee80211_is_action(hdr->frame_control)) { u8 category; /* make sure category field is present */ if (rx->skb->len < IEEE80211_MIN_ACTION_SIZE) - return RX_DROP_MONITOR; + return RX_DROP; mgmt = (struct ieee80211_mgmt *)hdr; category = mgmt->u.action.category; if (category != WLAN_CATEGORY_MESH_ACTION && category != WLAN_CATEGORY_SELF_PROTECTED) - return RX_DROP_MONITOR; + return RX_DROP; return RX_CONTINUE; } @@ -1087,7 +1087,7 @@ static ieee80211_rx_result ieee80211_rx_mesh_check(struct ieee80211_rx_data *rx) ieee80211_is_auth(hdr->frame_control)) return RX_CONTINUE; - return RX_DROP_MONITOR; + return RX_DROP; } return RX_CONTINUE; @@ -1513,7 +1513,7 @@ ieee80211_rx_h_check(struct ieee80211_rx_data *rx) hdrlen = ieee80211_hdrlen(hdr->frame_control); if (rx->skb->len < hdrlen + 8) - return RX_DROP_MONITOR; + return RX_DROP; skb_copy_bits(rx->skb, hdrlen + 6, ðertype, 2); if (ethertype == rx->sdata->control_port_protocol) @@ -1526,7 +1526,7 @@ ieee80211_rx_h_check(struct ieee80211_rx_data *rx) GFP_ATOMIC)) return RX_DROP_U_SPURIOUS; - return RX_DROP_MONITOR; + return RX_DROP; } return RX_CONTINUE; @@ -1862,7 +1862,7 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx) cfg80211_rx_unexpected_4addr_frame( rx->sdata->dev, sta->sta.addr, GFP_ATOMIC); - return RX_DROP_M_UNEXPECTED_4ADDR_FRAME; + return RX_DROP_U_UNEXPECTED_4ADDR_FRAME; } /* * Update counter and free packet here to avoid @@ -1997,7 +1997,7 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) cfg80211_rx_unprot_mlme_mgmt(rx->sdata->dev, skb->data, skb->len); - return RX_DROP_M_BAD_BCN_KEYIDX; + return RX_DROP_U_BAD_BCN_KEYIDX; } rx->key = ieee80211_rx_get_bigtk(rx, mmie_keyidx); @@ -2011,11 +2011,11 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) if (mmie_keyidx < NUM_DEFAULT_KEYS || mmie_keyidx >= NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS) - return RX_DROP_M_BAD_MGMT_KEYIDX; /* unexpected BIP keyidx */ + return RX_DROP_U_BAD_MGMT_KEYIDX; /* unexpected BIP keyidx */ if (rx->link_sta) { if (ieee80211_is_group_privacy_action(skb) && test_sta_flag(rx->sta, WLAN_STA_MFP)) - return RX_DROP_MONITOR; + return RX_DROP; rx->key = rcu_dereference(rx->link_sta->gtk[mmie_keyidx]); } @@ -2100,11 +2100,11 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) if (rx->key) { if (unlikely(rx->key->flags & KEY_FLAG_TAINTED)) - return RX_DROP_MONITOR; + return RX_DROP; /* TODO: add threshold stuff again */ } else { - return RX_DROP_MONITOR; + return RX_DROP; } switch (rx->key->conf.cipher) { @@ -2278,7 +2278,7 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) goto out; if (is_multicast_ether_addr(hdr->addr1)) - return RX_DROP_MONITOR; + return RX_DROP; I802_DEBUG_INC(rx->local->rx_handlers_fragments); @@ -2333,7 +2333,7 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) rx->seqno_idx, hdr); if (!entry) { I802_DEBUG_INC(rx->local->rx_handlers_drop_defrag); - return RX_DROP_MONITOR; + return RX_DROP; } /* "The receiver shall discard MSDUs and MMPDUs whose constituent @@ -2855,25 +2855,25 @@ ieee80211_rx_mesh_data(struct ieee80211_sub_if_data *sdata, struct sta_info *sta return RX_CONTINUE; if (!pskb_may_pull(skb, sizeof(*eth) + 6)) - return RX_DROP_MONITOR; + return RX_DROP; mesh_hdr = (struct ieee80211s_hdr *)(skb->data + sizeof(*eth)); mesh_hdrlen = ieee80211_get_mesh_hdrlen(mesh_hdr); if (!pskb_may_pull(skb, sizeof(*eth) + mesh_hdrlen)) - return RX_DROP_MONITOR; + return RX_DROP; eth = (struct ethhdr *)skb->data; multicast = is_multicast_ether_addr(eth->h_dest); mesh_hdr = (struct ieee80211s_hdr *)(eth + 1); if (!mesh_hdr->ttl) - return RX_DROP_MONITOR; + return RX_DROP; /* frame is in RMC, don't forward */ if (is_multicast_ether_addr(eth->h_dest) && mesh_rmc_check(sdata, eth->h_source, mesh_hdr)) - return RX_DROP_MONITOR; + return RX_DROP; /* forward packet */ if (sdata->crypto_tx_tailroom_needed_cnt) @@ -2890,7 +2890,7 @@ ieee80211_rx_mesh_data(struct ieee80211_sub_if_data *sdata, struct sta_info *sta /* has_a4 already checked in ieee80211_rx_mesh_check */ proxied_addr = mesh_hdr->eaddr2; else - return RX_DROP_MONITOR; + return RX_DROP; rcu_read_lock(); mppath = mpp_path_lookup(sdata, proxied_addr); @@ -2922,14 +2922,14 @@ ieee80211_rx_mesh_data(struct ieee80211_sub_if_data *sdata, struct sta_info *sta goto rx_accept; IEEE80211_IFSTA_MESH_CTR_INC(ifmsh, dropped_frames_ttl); - return RX_DROP_MONITOR; + return RX_DROP; } if (!ifmsh->mshcfg.dot11MeshForwarding) { if (is_multicast_ether_addr(eth->h_dest)) goto rx_accept; - return RX_DROP_MONITOR; + return RX_DROP; } skb_set_queue_mapping(skb, ieee802_1d_to_ac[skb->priority]); @@ -3122,7 +3122,7 @@ ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx) return RX_CONTINUE; if (unlikely(!ieee80211_is_data_present(fc))) - return RX_DROP_MONITOR; + return RX_DROP; if (unlikely(ieee80211_has_a4(hdr->frame_control))) { switch (rx->sdata->vif.type) { @@ -3179,19 +3179,16 @@ ieee80211_rx_h_data(struct ieee80211_rx_data *rx) return RX_CONTINUE; if (unlikely(!ieee80211_is_data_present(hdr->frame_control))) - return RX_DROP_MONITOR; + return RX_DROP; - /* - * Send unexpected-4addr-frame event to hostapd. For older versions, - * also drop the frame to cooked monitor interfaces. - */ + /* Send unexpected-4addr-frame event to hostapd */ if (ieee80211_has_a4(hdr->frame_control) && sdata->vif.type == NL80211_IFTYPE_AP) { if (rx->sta && !test_and_set_sta_flag(rx->sta, WLAN_STA_4ADDR_EVENT)) cfg80211_rx_unexpected_4addr_frame( rx->sdata->dev, rx->sta->sta.addr, GFP_ATOMIC); - return RX_DROP_MONITOR; + return RX_DROP; } res = __ieee80211_data_to_8023(rx, &port_control); @@ -3203,7 +3200,7 @@ ieee80211_rx_h_data(struct ieee80211_rx_data *rx) return res; if (!ieee80211_frame_allowed(rx, fc)) - return RX_DROP_MONITOR; + return RX_DROP; /* directly handle TDLS channel switch requests/responses */ if (unlikely(((struct ethhdr *)rx->skb->data)->h_proto == @@ -3268,11 +3265,11 @@ ieee80211_rx_h_ctrl(struct ieee80211_rx_data *rx, struct sk_buff_head *frames) }; if (!rx->sta) - return RX_DROP_MONITOR; + return RX_DROP; if (skb_copy_bits(skb, offsetof(struct ieee80211_bar, control), &bar_data, sizeof(bar_data))) - return RX_DROP_MONITOR; + return RX_DROP; tid = le16_to_cpu(bar_data.control) >> 12; @@ -3284,7 +3281,7 @@ ieee80211_rx_h_ctrl(struct ieee80211_rx_data *rx, struct sk_buff_head *frames) tid_agg_rx = rcu_dereference(rx->sta->ampdu_mlme.tid_rx[tid]); if (!tid_agg_rx) - return RX_DROP_MONITOR; + return RX_DROP; start_seq_num = le16_to_cpu(bar_data.start_seq_num) >> 4; event.u.ba.tid = tid; @@ -3308,12 +3305,7 @@ ieee80211_rx_h_ctrl(struct ieee80211_rx_data *rx, struct sk_buff_head *frames) return RX_QUEUED; } - /* - * After this point, we only want management frames, - * so we can drop all remaining control frames to - * cooked monitor interfaces. - */ - return RX_DROP_MONITOR; + return RX_DROP; } static void ieee80211_process_sa_query_req(struct ieee80211_sub_if_data *sdata, @@ -3422,10 +3414,10 @@ ieee80211_rx_h_mgmt_check(struct ieee80211_rx_data *rx) * and unknown (reserved) frames are useless. */ if (rx->skb->len < 24) - return RX_DROP_MONITOR; + return RX_DROP; if (!ieee80211_is_mgmt(mgmt->frame_control)) - return RX_DROP_MONITOR; + return RX_DROP; /* drop too small action frames */ if (ieee80211_is_action(mgmt->frame_control) && @@ -3819,6 +3811,14 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) u.action.u.ttlm_res)) goto invalid; goto queue; + case WLAN_PROTECTED_EHT_ACTION_TTLM_TEARDOWN: + if (sdata->vif.type != NL80211_IFTYPE_STATION) + break; + + if (len < offsetofend(typeof(*mgmt), + u.action.u.ttlm_tear_down)) + goto invalid; + goto queue; case WLAN_PROTECTED_EHT_ACTION_LINK_RECONFIG_RESP: if (sdata->vif.type != NL80211_IFTYPE_STATION) break; @@ -3831,6 +3831,23 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) u.action.u.ml_reconf_resp) + 3) goto invalid; goto queue; + case WLAN_PROTECTED_EHT_ACTION_EPCS_ENABLE_RESP: + if (sdata->vif.type != NL80211_IFTYPE_STATION) + break; + + if (len < offsetofend(typeof(*mgmt), + u.action.u.epcs) + + IEEE80211_EPCS_ENA_RESP_BODY_LEN) + goto invalid; + goto queue; + case WLAN_PROTECTED_EHT_ACTION_EPCS_ENABLE_TEARDOWN: + if (sdata->vif.type != NL80211_IFTYPE_STATION) + break; + + if (len < offsetofend(typeof(*mgmt), + u.action.u.epcs)) + goto invalid; + goto queue; default: break; } @@ -3951,17 +3968,16 @@ ieee80211_rx_h_action_return(struct ieee80211_rx_data *rx) * ones. For all other modes we will return them to the sender, * setting the 0x80 bit in the action category, as required by * 802.11-2012 9.24.4. - * Newer versions of hostapd shall also use the management frame - * registration mechanisms, but older ones still use cooked - * monitor interfaces so push all frames there. + * Newer versions of hostapd use the management frame registration + * mechanisms and old cooked monitor interface is no longer supported. */ if (!(status->rx_flags & IEEE80211_RX_MALFORMED_ACTION_FRM) && (sdata->vif.type == NL80211_IFTYPE_AP || sdata->vif.type == NL80211_IFTYPE_AP_VLAN)) - return RX_DROP_MONITOR; + return RX_DROP; if (is_multicast_ether_addr(mgmt->da)) - return RX_DROP_MONITOR; + return RX_DROP; /* do not return rejected action frames */ if (mgmt->u.action.category & 0x80) @@ -4006,7 +4022,7 @@ ieee80211_rx_h_ext(struct ieee80211_rx_data *rx) return RX_CONTINUE; if (sdata->vif.type != NL80211_IFTYPE_STATION) - return RX_DROP_MONITOR; + return RX_DROP; /* for now only beacons are ext, so queue them */ ieee80211_queue_skb_to_iface(sdata, rx->link_id, rx->sta, rx->skb); @@ -4027,7 +4043,7 @@ ieee80211_rx_h_mgmt(struct ieee80211_rx_data *rx) sdata->vif.type != NL80211_IFTYPE_ADHOC && sdata->vif.type != NL80211_IFTYPE_OCB && sdata->vif.type != NL80211_IFTYPE_STATION) - return RX_DROP_MONITOR; + return RX_DROP; switch (stype) { case cpu_to_le16(IEEE80211_STYPE_AUTH): @@ -4038,32 +4054,32 @@ ieee80211_rx_h_mgmt(struct ieee80211_rx_data *rx) case cpu_to_le16(IEEE80211_STYPE_DEAUTH): if (is_multicast_ether_addr(mgmt->da) && !is_broadcast_ether_addr(mgmt->da)) - return RX_DROP_MONITOR; + return RX_DROP; /* process only for station/IBSS */ if (sdata->vif.type != NL80211_IFTYPE_STATION && sdata->vif.type != NL80211_IFTYPE_ADHOC) - return RX_DROP_MONITOR; + return RX_DROP; break; case cpu_to_le16(IEEE80211_STYPE_ASSOC_RESP): case cpu_to_le16(IEEE80211_STYPE_REASSOC_RESP): case cpu_to_le16(IEEE80211_STYPE_DISASSOC): if (is_multicast_ether_addr(mgmt->da) && !is_broadcast_ether_addr(mgmt->da)) - return RX_DROP_MONITOR; + return RX_DROP; /* process only for station */ if (sdata->vif.type != NL80211_IFTYPE_STATION) - return RX_DROP_MONITOR; + return RX_DROP; break; case cpu_to_le16(IEEE80211_STYPE_PROBE_REQ): /* process only for ibss and mesh */ if (sdata->vif.type != NL80211_IFTYPE_ADHOC && sdata->vif.type != NL80211_IFTYPE_MESH_POINT) - return RX_DROP_MONITOR; + return RX_DROP; break; default: - return RX_DROP_MONITOR; + return RX_DROP; } ieee80211_queue_skb_to_iface(sdata, rx->link_id, rx->sta, rx->skb); @@ -4071,82 +4087,9 @@ ieee80211_rx_h_mgmt(struct ieee80211_rx_data *rx) return RX_QUEUED; } -static void ieee80211_rx_cooked_monitor(struct ieee80211_rx_data *rx, - struct ieee80211_rate *rate, - ieee80211_rx_result reason) -{ - struct ieee80211_sub_if_data *sdata; - struct ieee80211_local *local = rx->local; - struct sk_buff *skb = rx->skb, *skb2; - struct net_device *prev_dev = NULL; - struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); - int needed_headroom; - - /* - * If cooked monitor has been processed already, then - * don't do it again. If not, set the flag. - */ - if (rx->flags & IEEE80211_RX_CMNTR) - goto out_free_skb; - rx->flags |= IEEE80211_RX_CMNTR; - - /* If there are no cooked monitor interfaces, just free the SKB */ - if (!local->cooked_mntrs) - goto out_free_skb; - - /* room for the radiotap header based on driver features */ - needed_headroom = ieee80211_rx_radiotap_hdrlen(local, status, skb); - - if (skb_headroom(skb) < needed_headroom && - pskb_expand_head(skb, needed_headroom, 0, GFP_ATOMIC)) - goto out_free_skb; - - /* prepend radiotap information */ - ieee80211_add_rx_radiotap_header(local, skb, rate, needed_headroom, - false); - - skb_reset_mac_header(skb); - skb->ip_summed = CHECKSUM_UNNECESSARY; - skb->pkt_type = PACKET_OTHERHOST; - skb->protocol = htons(ETH_P_802_2); - - list_for_each_entry_rcu(sdata, &local->interfaces, list) { - if (!ieee80211_sdata_running(sdata)) - continue; - - if (sdata->vif.type != NL80211_IFTYPE_MONITOR || - !(sdata->u.mntr.flags & MONITOR_FLAG_COOK_FRAMES)) - continue; - - if (prev_dev) { - skb2 = skb_clone(skb, GFP_ATOMIC); - if (skb2) { - skb2->dev = prev_dev; - netif_receive_skb(skb2); - } - } - - prev_dev = sdata->dev; - dev_sw_netstats_rx_add(sdata->dev, skb->len); - } - - if (prev_dev) { - skb->dev = prev_dev; - netif_receive_skb(skb); - return; - } - - out_free_skb: - kfree_skb_reason(skb, (__force u32)reason); -} - static void ieee80211_rx_handlers_result(struct ieee80211_rx_data *rx, ieee80211_rx_result res) { - struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb); - struct ieee80211_supported_band *sband; - struct ieee80211_rate *rate = NULL; - if (res == RX_QUEUED) { I802_DEBUG_INC(rx->sdata->local->rx_handlers_queued); return; @@ -4158,23 +4101,13 @@ static void ieee80211_rx_handlers_result(struct ieee80211_rx_data *rx, rx->link_sta->rx_stats.dropped++; } - if (u32_get_bits((__force u32)res, SKB_DROP_REASON_SUBSYS_MASK) == - SKB_DROP_REASON_SUBSYS_MAC80211_UNUSABLE) { - kfree_skb_reason(rx->skb, (__force u32)res); - return; - } - - sband = rx->local->hw.wiphy->bands[status->band]; - if (status->encoding == RX_ENC_LEGACY) - rate = &sband->bitrates[status->rate_idx]; - - ieee80211_rx_cooked_monitor(rx, rate, res); + kfree_skb_reason(rx->skb, (__force u32)res); } static void ieee80211_rx_handlers(struct ieee80211_rx_data *rx, struct sk_buff_head *frames) { - ieee80211_rx_result res = RX_DROP_MONITOR; + ieee80211_rx_result res = RX_DROP; struct sk_buff *skb; #define CALL_RXH(rxh) \ @@ -4238,7 +4171,7 @@ static void ieee80211_rx_handlers(struct ieee80211_rx_data *rx, static void ieee80211_invoke_rx_handlers(struct ieee80211_rx_data *rx) { struct sk_buff_head reorder_release; - ieee80211_rx_result res = RX_DROP_MONITOR; + ieee80211_rx_result res = RX_DROP; __skb_queue_head_init(&reorder_release); diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index caa3d0236b5e..30cdc783999d 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -2598,6 +2598,39 @@ static inline u64 sta_get_stats_bytes(struct ieee80211_sta_rx_stats *rxstats) return value; } +#ifdef CONFIG_MAC80211_MESH +static void sta_set_mesh_sinfo(struct sta_info *sta, + struct station_info *sinfo) +{ + struct ieee80211_local *local = sta->sdata->local; + + sinfo->filled |= BIT_ULL(NL80211_STA_INFO_LLID) | + BIT_ULL(NL80211_STA_INFO_PLID) | + BIT_ULL(NL80211_STA_INFO_PLINK_STATE) | + BIT_ULL(NL80211_STA_INFO_LOCAL_PM) | + BIT_ULL(NL80211_STA_INFO_PEER_PM) | + BIT_ULL(NL80211_STA_INFO_NONPEER_PM) | + BIT_ULL(NL80211_STA_INFO_CONNECTED_TO_GATE) | + BIT_ULL(NL80211_STA_INFO_CONNECTED_TO_AS); + + sinfo->llid = sta->mesh->llid; + sinfo->plid = sta->mesh->plid; + sinfo->plink_state = sta->mesh->plink_state; + if (test_sta_flag(sta, WLAN_STA_TOFFSET_KNOWN)) { + sinfo->filled |= BIT_ULL(NL80211_STA_INFO_T_OFFSET); + sinfo->t_offset = sta->mesh->t_offset; + } + sinfo->local_pm = sta->mesh->local_pm; + sinfo->peer_pm = sta->mesh->peer_pm; + sinfo->nonpeer_pm = sta->mesh->nonpeer_pm; + sinfo->connected_to_gate = sta->mesh->connected_to_gate; + sinfo->connected_to_as = sta->mesh->connected_to_as; + + sinfo->filled |= BIT_ULL(NL80211_STA_INFO_AIRTIME_LINK_METRIC); + sinfo->airtime_link_metric = airtime_link_metric_get(local, sta); +} +#endif + void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo, bool tidstats) { @@ -2782,31 +2815,10 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo, sta_set_tidstats(sta, &sinfo->pertid[i], i); } - if (ieee80211_vif_is_mesh(&sdata->vif)) { #ifdef CONFIG_MAC80211_MESH - sinfo->filled |= BIT_ULL(NL80211_STA_INFO_LLID) | - BIT_ULL(NL80211_STA_INFO_PLID) | - BIT_ULL(NL80211_STA_INFO_PLINK_STATE) | - BIT_ULL(NL80211_STA_INFO_LOCAL_PM) | - BIT_ULL(NL80211_STA_INFO_PEER_PM) | - BIT_ULL(NL80211_STA_INFO_NONPEER_PM) | - BIT_ULL(NL80211_STA_INFO_CONNECTED_TO_GATE) | - BIT_ULL(NL80211_STA_INFO_CONNECTED_TO_AS); - - sinfo->llid = sta->mesh->llid; - sinfo->plid = sta->mesh->plid; - sinfo->plink_state = sta->mesh->plink_state; - if (test_sta_flag(sta, WLAN_STA_TOFFSET_KNOWN)) { - sinfo->filled |= BIT_ULL(NL80211_STA_INFO_T_OFFSET); - sinfo->t_offset = sta->mesh->t_offset; - } - sinfo->local_pm = sta->mesh->local_pm; - sinfo->peer_pm = sta->mesh->peer_pm; - sinfo->nonpeer_pm = sta->mesh->nonpeer_pm; - sinfo->connected_to_gate = sta->mesh->connected_to_gate; - sinfo->connected_to_as = sta->mesh->connected_to_as; + if (ieee80211_vif_is_mesh(&sdata->vif)) + sta_set_mesh_sinfo(sta, sinfo); #endif - } sinfo->bss_param.flags = 0; if (sdata->vif.bss_conf.use_cts_prot) @@ -2862,12 +2874,6 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo, sinfo->filled |= BIT_ULL(NL80211_STA_INFO_ACK_SIGNAL_AVG); } - - if (ieee80211_vif_is_mesh(&sdata->vif)) { - sinfo->filled |= BIT_ULL(NL80211_STA_INFO_AIRTIME_LINK_METRIC); - sinfo->airtime_link_metric = - airtime_link_metric_get(local, sta); - } } u32 sta_get_expected_throughput(struct sta_info *sta) diff --git a/net/mac80211/status.c b/net/mac80211/status.c index 5f28f3633fa0..b17b3cc7fb90 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -895,8 +895,7 @@ static int ieee80211_tx_get_rates(struct ieee80211_hw *hw, } void ieee80211_tx_monitor(struct ieee80211_local *local, struct sk_buff *skb, - int retry_count, bool send_to_cooked, - struct ieee80211_tx_status *status) + int retry_count, struct ieee80211_tx_status *status) { struct sk_buff *skb2; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); @@ -930,10 +929,6 @@ void ieee80211_tx_monitor(struct ieee80211_local *local, struct sk_buff *skb, if (sdata->u.mntr.flags & MONITOR_FLAG_SKIP_TX) continue; - if ((sdata->u.mntr.flags & MONITOR_FLAG_COOK_FRAMES) && - !send_to_cooked) - continue; - if (prev_dev) { skb2 = skb_clone(skb, GFP_ATOMIC); if (skb2) { @@ -964,7 +959,6 @@ static void __ieee80211_tx_status(struct ieee80211_hw *hw, struct ieee80211_tx_info *info = status->info; struct sta_info *sta; __le16 fc; - bool send_to_cooked; bool acked; bool noack_success; struct ieee80211_bar *bar; @@ -1091,28 +1085,10 @@ static void __ieee80211_tx_status(struct ieee80211_hw *hw, ieee80211_report_used_skb(local, skb, false, status->ack_hwtstamp); - /* this was a transmitted frame, but now we want to reuse it */ - skb_orphan(skb); - - /* Need to make a copy before skb->cb gets cleared */ - send_to_cooked = !!(info->flags & IEEE80211_TX_CTL_INJECTED) || - !(ieee80211_is_data(fc)); - - /* - * This is a bit racy but we can avoid a lot of work - * with this test... - */ - if (!local->tx_mntrs && (!send_to_cooked || !local->cooked_mntrs)) { - if (status->free_list) - list_add_tail(&skb->list, status->free_list); - else - dev_kfree_skb(skb); - return; - } - - /* send to monitor interfaces */ - ieee80211_tx_monitor(local, skb, retry_count, - send_to_cooked, status); + if (status->free_list) + list_add_tail(&skb->list, status->free_list); + else + dev_kfree_skb(skb); } void ieee80211_tx_status_skb(struct ieee80211_hw *hw, struct sk_buff *skb) diff --git a/net/mac80211/tests/Makefile b/net/mac80211/tests/Makefile index 0f5336bc7314..3b0c08356fc5 100644 --- a/net/mac80211/tests/Makefile +++ b/net/mac80211/tests/Makefile @@ -1,3 +1,3 @@ -mac80211-tests-y += module.o util.o elems.o mfp.o tpe.o +mac80211-tests-y += module.o util.o elems.o mfp.o tpe.o chan-mode.o obj-$(CONFIG_MAC80211_KUNIT_TEST) += mac80211-tests.o diff --git a/net/mac80211/tests/chan-mode.c b/net/mac80211/tests/chan-mode.c new file mode 100644 index 000000000000..96c7b3ab2744 --- /dev/null +++ b/net/mac80211/tests/chan-mode.c @@ -0,0 +1,254 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * KUnit tests for channel mode functions + * + * Copyright (C) 2024 Intel Corporation + */ +#include <net/cfg80211.h> +#include <kunit/test.h> + +#include "util.h" + +MODULE_IMPORT_NS("EXPORTED_FOR_KUNIT_TESTING"); + +static const struct determine_chan_mode_case { + const char *desc; + u8 extra_supp_rate; + enum ieee80211_conn_mode conn_mode; + enum ieee80211_conn_mode expected_mode; + bool strict; + u8 userspace_selector; + struct ieee80211_ht_cap ht_capa_mask; + struct ieee80211_vht_cap vht_capa; + struct ieee80211_vht_cap vht_capa_mask; + u8 vht_basic_mcs_1_4_set:1, + vht_basic_mcs_5_8_set:1, + he_basic_mcs_1_4_set:1, + he_basic_mcs_5_8_set:1; + u8 vht_basic_mcs_1_4, vht_basic_mcs_5_8; + u8 he_basic_mcs_1_4, he_basic_mcs_5_8; + u8 eht_mcs7_min_nss; + int error; +} determine_chan_mode_cases[] = { + { + .desc = "Normal case, EHT is working", + .conn_mode = IEEE80211_CONN_MODE_EHT, + .expected_mode = IEEE80211_CONN_MODE_EHT, + }, { + .desc = "Requiring EHT support is fine", + .conn_mode = IEEE80211_CONN_MODE_EHT, + .expected_mode = IEEE80211_CONN_MODE_EHT, + .extra_supp_rate = 0x80 | BSS_MEMBERSHIP_SELECTOR_EHT_PHY, + }, { + .desc = "Lowering the mode limits us", + .conn_mode = IEEE80211_CONN_MODE_VHT, + .expected_mode = IEEE80211_CONN_MODE_VHT, + }, { + .desc = "Requesting a basic rate/selector that we do not support", + .conn_mode = IEEE80211_CONN_MODE_EHT, + .extra_supp_rate = 0x80 | (BSS_MEMBERSHIP_SELECTOR_MIN - 1), + .error = EINVAL, + }, { + .desc = "As before, but userspace says it is taking care of it", + .conn_mode = IEEE80211_CONN_MODE_EHT, + .userspace_selector = BSS_MEMBERSHIP_SELECTOR_MIN - 1, + .extra_supp_rate = 0x80 | (BSS_MEMBERSHIP_SELECTOR_MIN - 1), + .expected_mode = IEEE80211_CONN_MODE_EHT, + }, { + .desc = "Masking out a supported rate in HT capabilities", + .conn_mode = IEEE80211_CONN_MODE_EHT, + .expected_mode = IEEE80211_CONN_MODE_LEGACY, + .ht_capa_mask = { + .mcs.rx_mask[0] = 0xf7, + }, + }, { + .desc = "Masking out a RX rate in VHT capabilities", + .conn_mode = IEEE80211_CONN_MODE_EHT, + .expected_mode = IEEE80211_CONN_MODE_HT, + /* Only one RX stream at MCS 0-7 */ + .vht_capa = { + .supp_mcs.rx_mcs_map = + cpu_to_le16(IEEE80211_VHT_MCS_SUPPORT_0_7), + }, + .vht_capa_mask = { + .supp_mcs.rx_mcs_map = cpu_to_le16(0xffff), + }, + .strict = true, + }, { + .desc = "Masking out a TX rate in VHT capabilities", + .conn_mode = IEEE80211_CONN_MODE_EHT, + .expected_mode = IEEE80211_CONN_MODE_HT, + /* Only one TX stream at MCS 0-7 */ + .vht_capa = { + .supp_mcs.tx_mcs_map = + cpu_to_le16(IEEE80211_VHT_MCS_SUPPORT_0_7), + }, + .vht_capa_mask = { + .supp_mcs.tx_mcs_map = cpu_to_le16(0xffff), + }, + .strict = true, + }, { + .desc = "AP has higher VHT requirement than client", + .conn_mode = IEEE80211_CONN_MODE_EHT, + .expected_mode = IEEE80211_CONN_MODE_HT, + .vht_basic_mcs_5_8_set = 1, + .vht_basic_mcs_5_8 = 0xFE, /* require 5th stream */ + .strict = true, + }, { + .desc = "all zero VHT basic rates are ignored (many APs broken)", + .conn_mode = IEEE80211_CONN_MODE_VHT, + .expected_mode = IEEE80211_CONN_MODE_VHT, + .vht_basic_mcs_1_4_set = 1, + .vht_basic_mcs_5_8_set = 1, + }, { + .desc = "AP requires 3 HE streams but client only has two", + .conn_mode = IEEE80211_CONN_MODE_EHT, + .expected_mode = IEEE80211_CONN_MODE_VHT, + .he_basic_mcs_1_4 = 0b11001010, + .he_basic_mcs_1_4_set = 1, + }, { + .desc = "all zero HE basic rates are ignored (iPhone workaround)", + .conn_mode = IEEE80211_CONN_MODE_HE, + .expected_mode = IEEE80211_CONN_MODE_HE, + .he_basic_mcs_1_4_set = 1, + .he_basic_mcs_5_8_set = 1, + }, { + .desc = "AP requires too many RX streams with EHT MCS 7", + .conn_mode = IEEE80211_CONN_MODE_EHT, + .expected_mode = IEEE80211_CONN_MODE_HE, + .eht_mcs7_min_nss = 0x15, + }, { + .desc = "AP requires too many TX streams with EHT MCS 7", + .conn_mode = IEEE80211_CONN_MODE_EHT, + .expected_mode = IEEE80211_CONN_MODE_HE, + .eht_mcs7_min_nss = 0x51, + }, { + .desc = "AP requires too many RX streams with EHT MCS 7 and EHT is required", + .extra_supp_rate = 0x80 | BSS_MEMBERSHIP_SELECTOR_EHT_PHY, + .conn_mode = IEEE80211_CONN_MODE_EHT, + .eht_mcs7_min_nss = 0x15, + .error = EINVAL, + } +}; +KUNIT_ARRAY_PARAM_DESC(determine_chan_mode, determine_chan_mode_cases, desc) + +static void test_determine_chan_mode(struct kunit *test) +{ + const struct determine_chan_mode_case *params = test->param_value; + struct t_sdata *t_sdata = T_SDATA(test); + struct ieee80211_conn_settings conn = { + .mode = params->conn_mode, + .bw_limit = IEEE80211_CONN_BW_LIMIT_20, + }; + struct cfg80211_bss cbss = { + .channel = &t_sdata->band_5ghz.channels[0], + }; + unsigned long userspace_selectors[BITS_TO_LONGS(128)] = {}; + u8 bss_ies[] = { + /* Supported Rates */ + WLAN_EID_SUPP_RATES, 0x08, + 0x82, 0x84, 0x8b, 0x96, 0xc, 0x12, 0x18, 0x24, + /* Extended Supported Rates */ + WLAN_EID_EXT_SUPP_RATES, 0x05, + 0x30, 0x48, 0x60, 0x6c, params->extra_supp_rate, + /* HT Capabilities */ + WLAN_EID_HT_CAPABILITY, 0x1a, + 0x0c, 0x00, 0x1b, 0xff, 0xff, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, + /* HT Information (0xff for 1 stream) */ + WLAN_EID_HT_OPERATION, 0x16, + 0x24, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + /* VHT Capabilities */ + WLAN_EID_VHT_CAPABILITY, 0xc, + 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0x00, 0x00, + 0xff, 0xff, 0x00, 0x00, + /* VHT Operation */ + WLAN_EID_VHT_OPERATION, 0x05, + 0x00, 0x00, 0x00, + params->vht_basic_mcs_1_4_set ? + params->vht_basic_mcs_1_4 : + le16_get_bits(t_sdata->band_5ghz.vht_cap.vht_mcs.rx_mcs_map, 0xff), + params->vht_basic_mcs_5_8_set ? + params->vht_basic_mcs_5_8 : + le16_get_bits(t_sdata->band_5ghz.vht_cap.vht_mcs.rx_mcs_map, 0xff00), + /* HE Capabilities */ + WLAN_EID_EXTENSION, 0x16, WLAN_EID_EXT_HE_CAPABILITY, + 0x01, 0x78, 0xc8, 0x1a, 0x40, 0x00, 0x00, 0xbf, + 0xce, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0xfa, 0xff, 0xfa, 0xff, + /* HE Operation (permit overriding values) */ + WLAN_EID_EXTENSION, 0x07, WLAN_EID_EXT_HE_OPERATION, + 0xf0, 0x3f, 0x00, 0xb0, + params->he_basic_mcs_1_4_set ? params->he_basic_mcs_1_4 : 0xfc, + params->he_basic_mcs_5_8_set ? params->he_basic_mcs_5_8 : 0xff, + /* EHT Capabilities */ + WLAN_EID_EXTENSION, 0x12, WLAN_EID_EXT_EHT_CAPABILITY, + 0x07, 0x00, 0x1c, 0x00, 0x00, 0xfe, 0xff, 0xff, + 0x7f, 0x01, 0x00, 0x88, 0x88, 0x88, 0x00, 0x00, + 0x00, + /* EHT Operation */ + WLAN_EID_EXTENSION, 0x09, WLAN_EID_EXT_EHT_OPERATION, + 0x01, params->eht_mcs7_min_nss ? params->eht_mcs7_min_nss : 0x11, + 0x00, 0x00, 0x00, 0x00, 0x24, 0x00, + }; + struct ieee80211_chan_req chanreq = {}; + struct cfg80211_chan_def ap_chandef = {}; + struct ieee802_11_elems *elems; + + if (params->strict) + set_bit(IEEE80211_HW_STRICT, t_sdata->local.hw.flags); + else + clear_bit(IEEE80211_HW_STRICT, t_sdata->local.hw.flags); + + t_sdata->sdata->u.mgd.ht_capa_mask = params->ht_capa_mask; + t_sdata->sdata->u.mgd.vht_capa = params->vht_capa; + t_sdata->sdata->u.mgd.vht_capa_mask = params->vht_capa_mask; + + if (params->userspace_selector) + set_bit(params->userspace_selector, userspace_selectors); + + rcu_assign_pointer(cbss.ies, + kunit_kzalloc(test, + sizeof(cbss) + sizeof(bss_ies), + GFP_KERNEL)); + KUNIT_ASSERT_NOT_NULL(test, rcu_access_pointer(cbss.ies)); + ((struct cfg80211_bss_ies *)rcu_access_pointer(cbss.ies))->len = sizeof(bss_ies); + + memcpy((void *)rcu_access_pointer(cbss.ies)->data, bss_ies, + sizeof(bss_ies)); + + rcu_read_lock(); + elems = ieee80211_determine_chan_mode(t_sdata->sdata, &conn, &cbss, + 0, &chanreq, &ap_chandef, + userspace_selectors); + rcu_read_unlock(); + + /* We do not need elems, free them if they are valid. */ + if (!IS_ERR_OR_NULL(elems)) + kfree(elems); + + if (params->error) { + KUNIT_ASSERT_TRUE(test, IS_ERR(elems)); + KUNIT_ASSERT_EQ(test, PTR_ERR(elems), -params->error); + } else { + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, elems); + KUNIT_ASSERT_EQ(test, conn.mode, params->expected_mode); + } +} + +static struct kunit_case chan_mode_cases[] = { + KUNIT_CASE_PARAM(test_determine_chan_mode, + determine_chan_mode_gen_params), + {} +}; + +static struct kunit_suite chan_mode = { + .name = "mac80211-mlme-chan-mode", + .test_cases = chan_mode_cases, +}; + +kunit_test_suite(chan_mode); diff --git a/net/mac80211/tests/util.c b/net/mac80211/tests/util.c index 0936a73e3617..9c2d63a5cd2b 100644 --- a/net/mac80211/tests/util.c +++ b/net/mac80211/tests/util.c @@ -266,11 +266,7 @@ int t_sdata_init(struct kunit_resource *resource, void *ctx) cpu_to_le16(IEEE80211_VHT_MCS_SUPPORT_0_9 << 0 | IEEE80211_VHT_MCS_SUPPORT_0_9 << 2 | IEEE80211_VHT_MCS_SUPPORT_0_9 << 4 | - IEEE80211_VHT_MCS_SUPPORT_0_9 << 6 | - IEEE80211_VHT_MCS_SUPPORT_0_9 << 8 | - IEEE80211_VHT_MCS_SUPPORT_0_9 << 10 | - IEEE80211_VHT_MCS_SUPPORT_0_9 << 12 | - IEEE80211_VHT_MCS_SUPPORT_0_9 << 14); + IEEE80211_VHT_MCS_SUPPORT_0_9 << 6); sband->vht_cap.vht_mcs.tx_mcs_map = sband->vht_cap.vht_mcs.rx_mcs_map; break; diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index a24636bda679..20179db88c4a 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1182,7 +1182,8 @@ void ieee80211_aggr_check(struct ieee80211_sub_if_data *sdata, if (!ref || !(ref->ops->capa & RATE_CTRL_CAPA_AMPDU_TRIGGER)) return; - if (!sta || !sta->sta.deflink.ht_cap.ht_supported || + if (!sta || + (!sta->sta.valid_links && !sta->sta.deflink.ht_cap.ht_supported) || !sta->sta.wme || skb_get_queue_mapping(skb) == IEEE80211_AC_VO || skb->protocol == sdata->control_port_protocol) return; @@ -5617,7 +5618,7 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw, if (!copy) return bcn; - ieee80211_tx_monitor(hw_to_local(hw), copy, 1, false, NULL); + ieee80211_tx_monitor(hw_to_local(hw), copy, 1, NULL); return bcn; } diff --git a/net/mac80211/util.c b/net/mac80211/util.c index fdda14c08e2b..dec6e16b8c7d 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -2156,7 +2156,8 @@ int ieee80211_reconfig(struct ieee80211_local *local) wake_up: - if (local->monitors == local->open_count && local->monitors > 0) + if (local->virt_monitors > 0 && + local->virt_monitors == local->open_count) ieee80211_add_virtual_monitor(local); /* diff --git a/net/mac80211/wbrf.c b/net/mac80211/wbrf.c index 3a8612309137..478b34b81919 100644 --- a/net/mac80211/wbrf.c +++ b/net/mac80211/wbrf.c @@ -2,6 +2,7 @@ /* * Wifi Band Exclusion Interface for WLAN * Copyright (C) 2023 Advanced Micro Devices + * Copyright (C) 2025 Intel Corporation * */ @@ -45,7 +46,7 @@ static void get_ranges_from_chandef(struct cfg80211_chan_def *chandef, u64 start_freq2, end_freq2; int bandwidth; - bandwidth = nl80211_chan_width_to_mhz(chandef->width); + bandwidth = cfg80211_chandef_get_width(chandef); get_chan_freq_boundary(chandef->center_freq1, bandwidth, &start_freq1, &end_freq1); diff --git a/net/mac802154/main.c b/net/mac802154/main.c index 21b7c3b280b4..ea1efef3572a 100644 --- a/net/mac802154/main.c +++ b/net/mac802154/main.c @@ -213,8 +213,8 @@ int ieee802154_register_hw(struct ieee802154_hw *hw) goto out_wq; } - hrtimer_init(&local->ifs_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); - local->ifs_timer.function = ieee802154_xmit_ifs_timer; + hrtimer_setup(&local->ifs_timer, ieee802154_xmit_ifs_timer, CLOCK_MONOTONIC, + HRTIMER_MODE_REL); wpan_phy_set_dev(local->phy, local->hw.parent); diff --git a/net/mptcp/Makefile b/net/mptcp/Makefile index bcf1dbf3a432..89bf6c47c818 100644 --- a/net/mptcp/Makefile +++ b/net/mptcp/Makefile @@ -3,7 +3,7 @@ obj-$(CONFIG_MPTCP) += mptcp.o mptcp-y := protocol.o subflow.o options.o token.o crypto.o ctrl.o pm.o diag.o \ mib.o pm_netlink.o sockopt.o pm_userspace.o fastopen.o sched.o \ - mptcp_pm_gen.o + mptcp_pm_gen.o pm_kernel.o obj-$(CONFIG_SYN_COOKIES) += syncookies.o obj-$(CONFIG_INET_MPTCP_DIAG) += mptcp_diag.o diff --git a/net/mptcp/ctrl.c b/net/mptcp/ctrl.c index 2dd81e6c26bd..d9290c5bb6c7 100644 --- a/net/mptcp/ctrl.c +++ b/net/mptcp/ctrl.c @@ -39,6 +39,7 @@ struct mptcp_pernet { u8 allow_join_initial_addr_port; u8 pm_type; char scheduler[MPTCP_SCHED_NAME_MAX]; + char path_manager[MPTCP_PM_NAME_MAX]; }; static struct mptcp_pernet *mptcp_get_pernet(const struct net *net) @@ -83,6 +84,11 @@ int mptcp_get_pm_type(const struct net *net) return mptcp_get_pernet(net)->pm_type; } +const char *mptcp_get_path_manager(const struct net *net) +{ + return mptcp_get_pernet(net)->path_manager; +} + const char *mptcp_get_scheduler(const struct net *net) { return mptcp_get_pernet(net)->scheduler; @@ -101,6 +107,7 @@ static void mptcp_pernet_set_defaults(struct mptcp_pernet *pernet) pernet->stale_loss_cnt = 4; pernet->pm_type = MPTCP_PM_TYPE_KERNEL; strscpy(pernet->scheduler, "default", sizeof(pernet->scheduler)); + strscpy(pernet->path_manager, "kernel", sizeof(pernet->path_manager)); } #ifdef CONFIG_SYSCTL @@ -174,6 +181,96 @@ static int proc_blackhole_detect_timeout(const struct ctl_table *table, return ret; } +static int mptcp_set_path_manager(char *path_manager, const char *name) +{ + struct mptcp_pm_ops *pm_ops; + int ret = 0; + + rcu_read_lock(); + pm_ops = mptcp_pm_find(name); + if (pm_ops) + strscpy(path_manager, name, MPTCP_PM_NAME_MAX); + else + ret = -ENOENT; + rcu_read_unlock(); + + return ret; +} + +static int proc_path_manager(const struct ctl_table *ctl, int write, + void *buffer, size_t *lenp, loff_t *ppos) +{ + struct mptcp_pernet *pernet = container_of(ctl->data, + struct mptcp_pernet, + path_manager); + char (*path_manager)[MPTCP_PM_NAME_MAX] = ctl->data; + char pm_name[MPTCP_PM_NAME_MAX]; + const struct ctl_table tbl = { + .data = pm_name, + .maxlen = MPTCP_PM_NAME_MAX, + }; + int ret; + + strscpy(pm_name, *path_manager, MPTCP_PM_NAME_MAX); + + ret = proc_dostring(&tbl, write, buffer, lenp, ppos); + if (write && ret == 0) { + ret = mptcp_set_path_manager(*path_manager, pm_name); + if (ret == 0) { + u8 pm_type = __MPTCP_PM_TYPE_NR; + + if (strncmp(pm_name, "kernel", MPTCP_PM_NAME_MAX) == 0) + pm_type = MPTCP_PM_TYPE_KERNEL; + else if (strncmp(pm_name, "userspace", MPTCP_PM_NAME_MAX) == 0) + pm_type = MPTCP_PM_TYPE_USERSPACE; + pernet->pm_type = pm_type; + } + } + + return ret; +} + +static int proc_pm_type(const struct ctl_table *ctl, int write, + void *buffer, size_t *lenp, loff_t *ppos) +{ + struct mptcp_pernet *pernet = container_of(ctl->data, + struct mptcp_pernet, + pm_type); + int ret; + + ret = proc_dou8vec_minmax(ctl, write, buffer, lenp, ppos); + if (write && ret == 0) { + u8 pm_type = READ_ONCE(*(u8 *)ctl->data); + char *pm_name = ""; + + if (pm_type == MPTCP_PM_TYPE_KERNEL) + pm_name = "kernel"; + else if (pm_type == MPTCP_PM_TYPE_USERSPACE) + pm_name = "userspace"; + mptcp_set_path_manager(pernet->path_manager, pm_name); + } + + return ret; +} + +static int proc_available_path_managers(const struct ctl_table *ctl, + int write, void *buffer, + size_t *lenp, loff_t *ppos) +{ + struct ctl_table tbl = { .maxlen = MPTCP_PM_BUF_MAX, }; + int ret; + + tbl.data = kmalloc(tbl.maxlen, GFP_USER); + if (!tbl.data) + return -ENOMEM; + + mptcp_pm_get_available(tbl.data, MPTCP_PM_BUF_MAX); + ret = proc_dostring(&tbl, write, buffer, lenp, ppos); + kfree(tbl.data); + + return ret; +} + static struct ctl_table mptcp_sysctl_table[] = { { .procname = "enabled", @@ -218,7 +315,7 @@ static struct ctl_table mptcp_sysctl_table[] = { .procname = "pm_type", .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dou8vec_minmax, + .proc_handler = proc_pm_type, .extra1 = SYSCTL_ZERO, .extra2 = &mptcp_pm_type_max }, @@ -253,6 +350,18 @@ static struct ctl_table mptcp_sysctl_table[] = { .mode = 0644, .proc_handler = proc_dou8vec_minmax, }, + { + .procname = "path_manager", + .maxlen = MPTCP_PM_NAME_MAX, + .mode = 0644, + .proc_handler = proc_path_manager, + }, + { + .procname = "available_path_managers", + .maxlen = MPTCP_PM_BUF_MAX, + .mode = 0444, + .proc_handler = proc_available_path_managers, + }, }; static int mptcp_pernet_new_table(struct net *net, struct mptcp_pernet *pernet) @@ -278,6 +387,8 @@ static int mptcp_pernet_new_table(struct net *net, struct mptcp_pernet *pernet) table[8].data = &pernet->close_timeout; table[9].data = &pernet->blackhole_timeout; table[10].data = &pernet->syn_retrans_before_tcp_fallback; + table[11].data = &pernet->path_manager; + /* table[12] is for available_path_managers which is read-only info */ hdr = register_net_sysctl_sz(net, MPTCP_SYSCTL_PATH, table, ARRAY_SIZE(mptcp_sysctl_table)); @@ -401,26 +512,30 @@ void mptcp_active_enable(struct sock *sk) void mptcp_active_detect_blackhole(struct sock *ssk, bool expired) { struct mptcp_subflow_context *subflow; + u8 timeouts, to_max; + struct net *net; - if (!sk_is_mptcp(ssk)) + /* Only check MPTCP SYN ... */ + if (likely(!sk_is_mptcp(ssk) || ssk->sk_state != TCP_SYN_SENT)) return; subflow = mptcp_subflow_ctx(ssk); - if (subflow->request_mptcp && ssk->sk_state == TCP_SYN_SENT) { - struct net *net = sock_net(ssk); - u8 timeouts, to_max; + /* ... + MP_CAPABLE */ + if (!subflow->request_mptcp) { + /* Mark as blackhole iif the 1st non-MPTCP SYN is accepted */ + subflow->mpc_drop = 0; + return; + } - timeouts = inet_csk(ssk)->icsk_retransmits; - to_max = mptcp_get_pernet(net)->syn_retrans_before_tcp_fallback; + net = sock_net(ssk); + timeouts = inet_csk(ssk)->icsk_retransmits; + to_max = mptcp_get_pernet(net)->syn_retrans_before_tcp_fallback; - if (timeouts == to_max || (timeouts < to_max && expired)) { - MPTCP_INC_STATS(net, MPTCP_MIB_MPCAPABLEACTIVEDROP); - subflow->mpc_drop = 1; - mptcp_subflow_early_fallback(mptcp_sk(subflow->conn), subflow); - } - } else if (ssk->sk_state == TCP_SYN_SENT) { - subflow->mpc_drop = 0; + if (timeouts == to_max || (timeouts < to_max && expired)) { + MPTCP_INC_STATS(net, MPTCP_MIB_MPCAPABLEACTIVEDROP); + subflow->mpc_drop = 1; + mptcp_subflow_early_fallback(mptcp_sk(subflow->conn), subflow); } } diff --git a/net/mptcp/diag.c b/net/mptcp/diag.c index 02205f7994d7..70cf9ebce833 100644 --- a/net/mptcp/diag.c +++ b/net/mptcp/diag.c @@ -12,7 +12,7 @@ #include <net/netlink.h> #include "protocol.h" -static int subflow_get_info(struct sock *sk, struct sk_buff *skb) +static int subflow_get_info(struct sock *sk, struct sk_buff *skb, bool net_admin) { struct mptcp_subflow_context *sf; struct nlattr *start; @@ -56,15 +56,6 @@ static int subflow_get_info(struct sock *sk, struct sk_buff *skb) if (nla_put_u32(skb, MPTCP_SUBFLOW_ATTR_TOKEN_REM, sf->remote_token) || nla_put_u32(skb, MPTCP_SUBFLOW_ATTR_TOKEN_LOC, sf->token) || - nla_put_u32(skb, MPTCP_SUBFLOW_ATTR_RELWRITE_SEQ, - sf->rel_write_seq) || - nla_put_u64_64bit(skb, MPTCP_SUBFLOW_ATTR_MAP_SEQ, sf->map_seq, - MPTCP_SUBFLOW_ATTR_PAD) || - nla_put_u32(skb, MPTCP_SUBFLOW_ATTR_MAP_SFSEQ, - sf->map_subflow_seq) || - nla_put_u32(skb, MPTCP_SUBFLOW_ATTR_SSN_OFFSET, sf->ssn_offset) || - nla_put_u16(skb, MPTCP_SUBFLOW_ATTR_MAP_DATALEN, - sf->map_data_len) || nla_put_u32(skb, MPTCP_SUBFLOW_ATTR_FLAGS, flags) || nla_put_u8(skb, MPTCP_SUBFLOW_ATTR_ID_REM, sf->remote_id) || nla_put_u8(skb, MPTCP_SUBFLOW_ATTR_ID_LOC, subflow_get_local_id(sf))) { @@ -72,6 +63,21 @@ static int subflow_get_info(struct sock *sk, struct sk_buff *skb) goto nla_failure; } + /* Only export seq related counters to user with CAP_NET_ADMIN */ + if (net_admin && + (nla_put_u32(skb, MPTCP_SUBFLOW_ATTR_RELWRITE_SEQ, + sf->rel_write_seq) || + nla_put_u64_64bit(skb, MPTCP_SUBFLOW_ATTR_MAP_SEQ, sf->map_seq, + MPTCP_SUBFLOW_ATTR_PAD) || + nla_put_u32(skb, MPTCP_SUBFLOW_ATTR_MAP_SFSEQ, + sf->map_subflow_seq) || + nla_put_u32(skb, MPTCP_SUBFLOW_ATTR_SSN_OFFSET, sf->ssn_offset) || + nla_put_u16(skb, MPTCP_SUBFLOW_ATTR_MAP_DATALEN, + sf->map_data_len))) { + err = -EMSGSIZE; + goto nla_failure; + } + rcu_read_unlock(); unlock_sock_fast(sk, slow); nla_nest_end(skb, start); @@ -84,22 +90,26 @@ nla_failure: return err; } -static size_t subflow_get_info_size(const struct sock *sk) +static size_t subflow_get_info_size(const struct sock *sk, bool net_admin) { size_t size = 0; size += nla_total_size(0) + /* INET_ULP_INFO_MPTCP */ nla_total_size(4) + /* MPTCP_SUBFLOW_ATTR_TOKEN_REM */ nla_total_size(4) + /* MPTCP_SUBFLOW_ATTR_TOKEN_LOC */ - nla_total_size(4) + /* MPTCP_SUBFLOW_ATTR_RELWRITE_SEQ */ - nla_total_size_64bit(8) + /* MPTCP_SUBFLOW_ATTR_MAP_SEQ */ - nla_total_size(4) + /* MPTCP_SUBFLOW_ATTR_MAP_SFSEQ */ - nla_total_size(4) + /* MPTCP_SUBFLOW_ATTR_SSN_OFFSET */ - nla_total_size(2) + /* MPTCP_SUBFLOW_ATTR_MAP_DATALEN */ nla_total_size(4) + /* MPTCP_SUBFLOW_ATTR_FLAGS */ nla_total_size(1) + /* MPTCP_SUBFLOW_ATTR_ID_REM */ nla_total_size(1) + /* MPTCP_SUBFLOW_ATTR_ID_LOC */ 0; + + if (net_admin) + size += nla_total_size(4) + /* MPTCP_SUBFLOW_ATTR_RELWRITE_SEQ */ + nla_total_size_64bit(8) + /* MPTCP_SUBFLOW_ATTR_MAP_SEQ */ + nla_total_size(4) + /* MPTCP_SUBFLOW_ATTR_MAP_SFSEQ */ + nla_total_size(4) + /* MPTCP_SUBFLOW_ATTR_SSN_OFFSET */ + nla_total_size(2) + /* MPTCP_SUBFLOW_ATTR_MAP_DATALEN */ + 0; + return size; } diff --git a/net/mptcp/fastopen.c b/net/mptcp/fastopen.c index a29ff901df75..b9e451197902 100644 --- a/net/mptcp/fastopen.c +++ b/net/mptcp/fastopen.c @@ -40,17 +40,17 @@ void mptcp_fastopen_subflow_synack_set_params(struct mptcp_subflow_context *subf tp->copied_seq += skb->len; subflow->ssn_offset += skb->len; - /* initialize a dummy sequence number, we will update it at MPC - * completion, if needed - */ + /* Only the sequence delta is relevant */ MPTCP_SKB_CB(skb)->map_seq = -skb->len; MPTCP_SKB_CB(skb)->end_seq = 0; MPTCP_SKB_CB(skb)->offset = 0; MPTCP_SKB_CB(skb)->has_rxtstamp = TCP_SKB_CB(skb)->has_rxtstamp; + MPTCP_SKB_CB(skb)->cant_coalesce = 1; mptcp_data_lock(sk); + DEBUG_NET_WARN_ON_ONCE(sock_owned_by_user_nocheck(sk)); - mptcp_set_owner_r(skb, sk); + skb_set_owner_r(skb, sk); __skb_queue_tail(&sk->sk_receive_queue, skb); mptcp_sk(sk)->bytes_received += skb->len; @@ -58,22 +58,3 @@ void mptcp_fastopen_subflow_synack_set_params(struct mptcp_subflow_context *subf mptcp_data_unlock(sk); } - -void __mptcp_fastopen_gen_msk_ackseq(struct mptcp_sock *msk, struct mptcp_subflow_context *subflow, - const struct mptcp_options_received *mp_opt) -{ - struct sock *sk = (struct sock *)msk; - struct sk_buff *skb; - - skb = skb_peek_tail(&sk->sk_receive_queue); - if (skb) { - WARN_ON_ONCE(MPTCP_SKB_CB(skb)->end_seq); - pr_debug("msk %p moving seq %llx -> %llx end_seq %llx -> %llx\n", sk, - MPTCP_SKB_CB(skb)->map_seq, MPTCP_SKB_CB(skb)->map_seq + msk->ack_seq, - MPTCP_SKB_CB(skb)->end_seq, MPTCP_SKB_CB(skb)->end_seq + msk->ack_seq); - MPTCP_SKB_CB(skb)->map_seq += msk->ack_seq; - MPTCP_SKB_CB(skb)->end_seq += msk->ack_seq; - } - - pr_debug("msk=%p ack_seq=%llx\n", msk, msk->ack_seq); -} diff --git a/net/mptcp/options.c b/net/mptcp/options.c index 23949ae2a3a8..421ced031289 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -432,7 +432,6 @@ static void clear_3rdack_retransmission(struct sock *sk) struct inet_connection_sock *icsk = inet_csk(sk); sk_stop_timer(sk, &icsk->icsk_delack_timer); - icsk->icsk_ack.timeout = 0; icsk->icsk_ack.ato = 0; icsk->icsk_ack.pending &= ~(ICSK_ACK_SCHED | ICSK_ACK_TIMER); } diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index 16c336c51940..18b19dbccbba 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -5,12 +5,390 @@ */ #define pr_fmt(fmt) "MPTCP: " fmt -#include <linux/kernel.h> -#include <net/mptcp.h> +#include <linux/rculist.h> +#include <linux/spinlock.h> #include "protocol.h" - #include "mib.h" +#define ADD_ADDR_RETRANS_MAX 3 + +struct mptcp_pm_add_entry { + struct list_head list; + struct mptcp_addr_info addr; + u8 retrans_times; + struct timer_list add_timer; + struct mptcp_sock *sock; +}; + +static DEFINE_SPINLOCK(mptcp_pm_list_lock); +static LIST_HEAD(mptcp_pm_list); + +/* path manager helpers */ + +/* if sk is ipv4 or ipv6_only allows only same-family local and remote addresses, + * otherwise allow any matching local/remote pair + */ +bool mptcp_pm_addr_families_match(const struct sock *sk, + const struct mptcp_addr_info *loc, + const struct mptcp_addr_info *rem) +{ + bool mptcp_is_v4 = sk->sk_family == AF_INET; + +#if IS_ENABLED(CONFIG_MPTCP_IPV6) + bool loc_is_v4 = loc->family == AF_INET || ipv6_addr_v4mapped(&loc->addr6); + bool rem_is_v4 = rem->family == AF_INET || ipv6_addr_v4mapped(&rem->addr6); + + if (mptcp_is_v4) + return loc_is_v4 && rem_is_v4; + + if (ipv6_only_sock(sk)) + return !loc_is_v4 && !rem_is_v4; + + return loc_is_v4 == rem_is_v4; +#else + return mptcp_is_v4 && loc->family == AF_INET && rem->family == AF_INET; +#endif +} + +bool mptcp_addresses_equal(const struct mptcp_addr_info *a, + const struct mptcp_addr_info *b, bool use_port) +{ + bool addr_equals = false; + + if (a->family == b->family) { + if (a->family == AF_INET) + addr_equals = a->addr.s_addr == b->addr.s_addr; +#if IS_ENABLED(CONFIG_MPTCP_IPV6) + else + addr_equals = ipv6_addr_equal(&a->addr6, &b->addr6); + } else if (a->family == AF_INET) { + if (ipv6_addr_v4mapped(&b->addr6)) + addr_equals = a->addr.s_addr == b->addr6.s6_addr32[3]; + } else if (b->family == AF_INET) { + if (ipv6_addr_v4mapped(&a->addr6)) + addr_equals = a->addr6.s6_addr32[3] == b->addr.s_addr; +#endif + } + + if (!addr_equals) + return false; + if (!use_port) + return true; + + return a->port == b->port; +} + +void mptcp_local_address(const struct sock_common *skc, + struct mptcp_addr_info *addr) +{ + addr->family = skc->skc_family; + addr->port = htons(skc->skc_num); + if (addr->family == AF_INET) + addr->addr.s_addr = skc->skc_rcv_saddr; +#if IS_ENABLED(CONFIG_MPTCP_IPV6) + else if (addr->family == AF_INET6) + addr->addr6 = skc->skc_v6_rcv_saddr; +#endif +} + +void mptcp_remote_address(const struct sock_common *skc, + struct mptcp_addr_info *addr) +{ + addr->family = skc->skc_family; + addr->port = skc->skc_dport; + if (addr->family == AF_INET) + addr->addr.s_addr = skc->skc_daddr; +#if IS_ENABLED(CONFIG_MPTCP_IPV6) + else if (addr->family == AF_INET6) + addr->addr6 = skc->skc_v6_daddr; +#endif +} + +static bool mptcp_pm_is_init_remote_addr(struct mptcp_sock *msk, + const struct mptcp_addr_info *remote) +{ + struct mptcp_addr_info mpc_remote; + + mptcp_remote_address((struct sock_common *)msk, &mpc_remote); + return mptcp_addresses_equal(&mpc_remote, remote, remote->port); +} + +bool mptcp_lookup_subflow_by_saddr(const struct list_head *list, + const struct mptcp_addr_info *saddr) +{ + struct mptcp_subflow_context *subflow; + struct mptcp_addr_info cur; + struct sock_common *skc; + + list_for_each_entry(subflow, list, node) { + skc = (struct sock_common *)mptcp_subflow_tcp_sock(subflow); + + mptcp_local_address(skc, &cur); + if (mptcp_addresses_equal(&cur, saddr, saddr->port)) + return true; + } + + return false; +} + +static struct mptcp_pm_add_entry * +mptcp_lookup_anno_list_by_saddr(const struct mptcp_sock *msk, + const struct mptcp_addr_info *addr) +{ + struct mptcp_pm_add_entry *entry; + + lockdep_assert_held(&msk->pm.lock); + + list_for_each_entry(entry, &msk->pm.anno_list, list) { + if (mptcp_addresses_equal(&entry->addr, addr, true)) + return entry; + } + + return NULL; +} + +bool mptcp_remove_anno_list_by_saddr(struct mptcp_sock *msk, + const struct mptcp_addr_info *addr) +{ + struct mptcp_pm_add_entry *entry; + + entry = mptcp_pm_del_add_timer(msk, addr, false); + kfree(entry); + return entry; +} + +bool mptcp_pm_sport_in_anno_list(struct mptcp_sock *msk, const struct sock *sk) +{ + struct mptcp_pm_add_entry *entry; + struct mptcp_addr_info saddr; + bool ret = false; + + mptcp_local_address((struct sock_common *)sk, &saddr); + + spin_lock_bh(&msk->pm.lock); + list_for_each_entry(entry, &msk->pm.anno_list, list) { + if (mptcp_addresses_equal(&entry->addr, &saddr, true)) { + ret = true; + goto out; + } + } + +out: + spin_unlock_bh(&msk->pm.lock); + return ret; +} + +static void __mptcp_pm_send_ack(struct mptcp_sock *msk, + struct mptcp_subflow_context *subflow, + bool prio, bool backup) +{ + struct sock *ssk = mptcp_subflow_tcp_sock(subflow); + bool slow; + + pr_debug("send ack for %s\n", + prio ? "mp_prio" : + (mptcp_pm_should_add_signal(msk) ? "add_addr" : "rm_addr")); + + slow = lock_sock_fast(ssk); + if (prio) { + subflow->send_mp_prio = 1; + subflow->request_bkup = backup; + } + + __mptcp_subflow_send_ack(ssk); + unlock_sock_fast(ssk, slow); +} + +void mptcp_pm_send_ack(struct mptcp_sock *msk, + struct mptcp_subflow_context *subflow, + bool prio, bool backup) +{ + spin_unlock_bh(&msk->pm.lock); + __mptcp_pm_send_ack(msk, subflow, prio, backup); + spin_lock_bh(&msk->pm.lock); +} + +void mptcp_pm_addr_send_ack(struct mptcp_sock *msk) +{ + struct mptcp_subflow_context *subflow, *alt = NULL; + + msk_owned_by_me(msk); + lockdep_assert_held(&msk->pm.lock); + + if (!mptcp_pm_should_add_signal(msk) && + !mptcp_pm_should_rm_signal(msk)) + return; + + mptcp_for_each_subflow(msk, subflow) { + if (__mptcp_subflow_active(subflow)) { + if (!subflow->stale) { + mptcp_pm_send_ack(msk, subflow, false, false); + return; + } + + if (!alt) + alt = subflow; + } + } + + if (alt) + mptcp_pm_send_ack(msk, alt, false, false); +} + +int mptcp_pm_mp_prio_send_ack(struct mptcp_sock *msk, + struct mptcp_addr_info *addr, + struct mptcp_addr_info *rem, + u8 bkup) +{ + struct mptcp_subflow_context *subflow; + + pr_debug("bkup=%d\n", bkup); + + mptcp_for_each_subflow(msk, subflow) { + struct sock *ssk = mptcp_subflow_tcp_sock(subflow); + struct mptcp_addr_info local, remote; + + mptcp_local_address((struct sock_common *)ssk, &local); + if (!mptcp_addresses_equal(&local, addr, addr->port)) + continue; + + if (rem && rem->family != AF_UNSPEC) { + mptcp_remote_address((struct sock_common *)ssk, &remote); + if (!mptcp_addresses_equal(&remote, rem, rem->port)) + continue; + } + + __mptcp_pm_send_ack(msk, subflow, true, bkup); + return 0; + } + + return -EINVAL; +} + +static void mptcp_pm_add_timer(struct timer_list *timer) +{ + struct mptcp_pm_add_entry *entry = from_timer(entry, timer, add_timer); + struct mptcp_sock *msk = entry->sock; + struct sock *sk = (struct sock *)msk; + + pr_debug("msk=%p\n", msk); + + if (!msk) + return; + + if (inet_sk_state_load(sk) == TCP_CLOSE) + return; + + if (!entry->addr.id) + return; + + if (mptcp_pm_should_add_signal_addr(msk)) { + sk_reset_timer(sk, timer, jiffies + TCP_RTO_MAX / 8); + goto out; + } + + spin_lock_bh(&msk->pm.lock); + + if (!mptcp_pm_should_add_signal_addr(msk)) { + pr_debug("retransmit ADD_ADDR id=%d\n", entry->addr.id); + mptcp_pm_announce_addr(msk, &entry->addr, false); + mptcp_pm_add_addr_send_ack(msk); + entry->retrans_times++; + } + + if (entry->retrans_times < ADD_ADDR_RETRANS_MAX) + sk_reset_timer(sk, timer, + jiffies + mptcp_get_add_addr_timeout(sock_net(sk))); + + spin_unlock_bh(&msk->pm.lock); + + if (entry->retrans_times == ADD_ADDR_RETRANS_MAX) + mptcp_pm_subflow_established(msk); + +out: + __sock_put(sk); +} + +struct mptcp_pm_add_entry * +mptcp_pm_del_add_timer(struct mptcp_sock *msk, + const struct mptcp_addr_info *addr, bool check_id) +{ + struct mptcp_pm_add_entry *entry; + struct sock *sk = (struct sock *)msk; + struct timer_list *add_timer = NULL; + + spin_lock_bh(&msk->pm.lock); + entry = mptcp_lookup_anno_list_by_saddr(msk, addr); + if (entry && (!check_id || entry->addr.id == addr->id)) { + entry->retrans_times = ADD_ADDR_RETRANS_MAX; + add_timer = &entry->add_timer; + } + if (!check_id && entry) + list_del(&entry->list); + spin_unlock_bh(&msk->pm.lock); + + /* no lock, because sk_stop_timer_sync() is calling del_timer_sync() */ + if (add_timer) + sk_stop_timer_sync(sk, add_timer); + + return entry; +} + +bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk, + const struct mptcp_addr_info *addr) +{ + struct mptcp_pm_add_entry *add_entry = NULL; + struct sock *sk = (struct sock *)msk; + struct net *net = sock_net(sk); + + lockdep_assert_held(&msk->pm.lock); + + add_entry = mptcp_lookup_anno_list_by_saddr(msk, addr); + + if (add_entry) { + if (WARN_ON_ONCE(mptcp_pm_is_kernel(msk))) + return false; + + sk_reset_timer(sk, &add_entry->add_timer, + jiffies + mptcp_get_add_addr_timeout(net)); + return true; + } + + add_entry = kmalloc(sizeof(*add_entry), GFP_ATOMIC); + if (!add_entry) + return false; + + list_add(&add_entry->list, &msk->pm.anno_list); + + add_entry->addr = *addr; + add_entry->sock = msk; + add_entry->retrans_times = 0; + + timer_setup(&add_entry->add_timer, mptcp_pm_add_timer, 0); + sk_reset_timer(sk, &add_entry->add_timer, + jiffies + mptcp_get_add_addr_timeout(net)); + + return true; +} + +static void mptcp_pm_free_anno_list(struct mptcp_sock *msk) +{ + struct mptcp_pm_add_entry *entry, *tmp; + struct sock *sk = (struct sock *)msk; + LIST_HEAD(free_list); + + pr_debug("msk=%p\n", msk); + + spin_lock_bh(&msk->pm.lock); + list_splice_init(&msk->pm.anno_list, &free_list); + spin_unlock_bh(&msk->pm.lock); + + list_for_each_entry_safe(entry, tmp, &free_list, list) { + sk_stop_timer_sync(sk, &entry->add_timer); + kfree(entry); + } +} + /* path manager command handlers */ int mptcp_pm_announce_addr(struct mptcp_sock *msk, @@ -56,7 +434,7 @@ int mptcp_pm_remove_addr(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_ msk->pm.rm_list_tx = *rm_list; rm_addr |= BIT(MPTCP_RM_ADDR_SIGNAL); WRITE_ONCE(msk->pm.addr_signal, rm_addr); - mptcp_pm_nl_addr_send_ack(msk); + mptcp_pm_addr_send_ack(msk); return 0; } @@ -138,13 +516,13 @@ void mptcp_pm_fully_established(struct mptcp_sock *msk, const struct sock *ssk) * be sure to serve this event only once. */ if (READ_ONCE(pm->work_pending) && - !(msk->pm.status & BIT(MPTCP_PM_ALREADY_ESTABLISHED))) + !(pm->status & BIT(MPTCP_PM_ALREADY_ESTABLISHED))) mptcp_pm_schedule_work(msk, MPTCP_PM_ESTABLISHED); - if ((msk->pm.status & BIT(MPTCP_PM_ALREADY_ESTABLISHED)) == 0) + if ((pm->status & BIT(MPTCP_PM_ALREADY_ESTABLISHED)) == 0) announce = true; - msk->pm.status |= BIT(MPTCP_PM_ALREADY_ESTABLISHED); + pm->status |= BIT(MPTCP_PM_ALREADY_ESTABLISHED); spin_unlock_bh(&pm->lock); if (announce) @@ -230,7 +608,7 @@ void mptcp_pm_add_addr_received(const struct sock *ssk, __MPTCP_INC_STATS(sock_net((struct sock *)msk), MPTCP_MIB_ADDADDRDROP); } /* id0 should not have a different address */ - } else if ((addr->id == 0 && !mptcp_pm_nl_is_init_remote_addr(msk, addr)) || + } else if ((addr->id == 0 && !mptcp_pm_is_init_remote_addr(msk, addr)) || (addr->id > 0 && !READ_ONCE(pm->accept_addr))) { mptcp_pm_announce_addr(msk, addr, true); mptcp_pm_add_addr_send_ack(msk); @@ -250,6 +628,9 @@ void mptcp_pm_add_addr_echoed(struct mptcp_sock *msk, pr_debug("msk=%p\n", msk); + if (!READ_ONCE(pm->work_pending)) + return; + spin_lock_bh(&pm->lock); if (mptcp_lookup_anno_list_by_saddr(msk, addr) && READ_ONCE(pm->work_pending)) @@ -266,6 +647,80 @@ void mptcp_pm_add_addr_send_ack(struct mptcp_sock *msk) mptcp_pm_schedule_work(msk, MPTCP_PM_ADD_ADDR_SEND_ACK); } +static void mptcp_pm_rm_addr_or_subflow(struct mptcp_sock *msk, + const struct mptcp_rm_list *rm_list, + enum linux_mptcp_mib_field rm_type) +{ + struct mptcp_subflow_context *subflow, *tmp; + struct sock *sk = (struct sock *)msk; + u8 i; + + pr_debug("%s rm_list_nr %d\n", + rm_type == MPTCP_MIB_RMADDR ? "address" : "subflow", rm_list->nr); + + msk_owned_by_me(msk); + + if (sk->sk_state == TCP_LISTEN) + return; + + if (!rm_list->nr) + return; + + if (list_empty(&msk->conn_list)) + return; + + for (i = 0; i < rm_list->nr; i++) { + u8 rm_id = rm_list->ids[i]; + bool removed = false; + + mptcp_for_each_subflow_safe(msk, subflow, tmp) { + struct sock *ssk = mptcp_subflow_tcp_sock(subflow); + u8 remote_id = READ_ONCE(subflow->remote_id); + int how = RCV_SHUTDOWN | SEND_SHUTDOWN; + u8 id = subflow_get_local_id(subflow); + + if ((1 << inet_sk_state_load(ssk)) & + (TCPF_FIN_WAIT1 | TCPF_FIN_WAIT2 | TCPF_CLOSING | TCPF_CLOSE)) + continue; + if (rm_type == MPTCP_MIB_RMADDR && remote_id != rm_id) + continue; + if (rm_type == MPTCP_MIB_RMSUBFLOW && id != rm_id) + continue; + + pr_debug(" -> %s rm_list_ids[%d]=%u local_id=%u remote_id=%u mpc_id=%u\n", + rm_type == MPTCP_MIB_RMADDR ? "address" : "subflow", + i, rm_id, id, remote_id, msk->mpc_endpoint_id); + spin_unlock_bh(&msk->pm.lock); + mptcp_subflow_shutdown(sk, ssk, how); + removed |= subflow->request_join; + + /* the following takes care of updating the subflows counter */ + mptcp_close_ssk(sk, ssk, subflow); + spin_lock_bh(&msk->pm.lock); + + if (rm_type == MPTCP_MIB_RMSUBFLOW) + __MPTCP_INC_STATS(sock_net(sk), rm_type); + } + + if (rm_type == MPTCP_MIB_RMADDR) { + __MPTCP_INC_STATS(sock_net(sk), rm_type); + if (removed && mptcp_pm_is_kernel(msk)) + mptcp_pm_nl_rm_addr(msk, rm_id); + } + } +} + +static void mptcp_pm_rm_addr_recv(struct mptcp_sock *msk) +{ + mptcp_pm_rm_addr_or_subflow(msk, &msk->pm.rm_list_rx, MPTCP_MIB_RMADDR); +} + +void mptcp_pm_rm_subflow(struct mptcp_sock *msk, + const struct mptcp_rm_list *rm_list) +{ + mptcp_pm_rm_addr_or_subflow(msk, rm_list, MPTCP_MIB_RMSUBFLOW); +} + void mptcp_pm_rm_addr_received(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list) { @@ -321,8 +776,6 @@ void mptcp_pm_mp_fail_received(struct sock *sk, u64 fail_seq) } } -/* path manager helpers */ - bool mptcp_pm_add_addr_signal(struct mptcp_sock *msk, const struct sk_buff *skb, unsigned int opt_size, unsigned int remaining, struct mptcp_addr_info *addr, bool *echo, @@ -402,7 +855,7 @@ out_unlock: int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc) { - struct mptcp_addr_info skc_local; + struct mptcp_pm_addr_entry skc_local = { 0 }; struct mptcp_addr_info msk_local; if (WARN_ON_ONCE(!msk)) @@ -412,10 +865,13 @@ int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc) * addr */ mptcp_local_address((struct sock_common *)msk, &msk_local); - mptcp_local_address((struct sock_common *)skc, &skc_local); - if (mptcp_addresses_equal(&msk_local, &skc_local, false)) + mptcp_local_address((struct sock_common *)skc, &skc_local.addr); + if (mptcp_addresses_equal(&msk_local, &skc_local.addr, false)) return 0; + skc_local.addr.id = 0; + skc_local.flags = MPTCP_PM_ADDR_FLAG_IMPLICIT; + if (mptcp_pm_is_userspace(msk)) return mptcp_userspace_pm_get_local_id(msk, &skc_local); return mptcp_pm_nl_get_local_id(msk, &skc_local); @@ -433,27 +889,41 @@ bool mptcp_pm_is_backup(struct mptcp_sock *msk, struct sock_common *skc) return mptcp_pm_nl_is_backup(msk, &skc_local); } -int mptcp_pm_get_addr(struct sk_buff *skb, struct genl_info *info) -{ - if (info->attrs[MPTCP_PM_ATTR_TOKEN]) - return mptcp_userspace_pm_get_addr(skb, info); - return mptcp_pm_nl_get_addr(skb, info); -} - -int mptcp_pm_dump_addr(struct sk_buff *msg, struct netlink_callback *cb) +static void mptcp_pm_subflows_chk_stale(const struct mptcp_sock *msk, struct sock *ssk) { - const struct genl_info *info = genl_info_dump(cb); - - if (info->attrs[MPTCP_PM_ATTR_TOKEN]) - return mptcp_userspace_pm_dump_addr(msg, cb); - return mptcp_pm_nl_dump_addr(msg, cb); -} + struct mptcp_subflow_context *iter, *subflow = mptcp_subflow_ctx(ssk); + struct sock *sk = (struct sock *)msk; + unsigned int active_max_loss_cnt; + struct net *net = sock_net(sk); + unsigned int stale_loss_cnt; + bool slow; + + stale_loss_cnt = mptcp_stale_loss_cnt(net); + if (subflow->stale || !stale_loss_cnt || subflow->stale_count <= stale_loss_cnt) + return; -int mptcp_pm_set_flags(struct sk_buff *skb, struct genl_info *info) -{ - if (info->attrs[MPTCP_PM_ATTR_TOKEN]) - return mptcp_userspace_pm_set_flags(skb, info); - return mptcp_pm_nl_set_flags(skb, info); + /* look for another available subflow not in loss state */ + active_max_loss_cnt = max_t(int, stale_loss_cnt - 1, 1); + mptcp_for_each_subflow(msk, iter) { + if (iter != subflow && mptcp_subflow_active(iter) && + iter->stale_count < active_max_loss_cnt) { + /* we have some alternatives, try to mark this subflow as idle ...*/ + slow = lock_sock_fast(ssk); + if (!tcp_rtx_and_write_queues_empty(ssk)) { + subflow->stale = 1; + __mptcp_retransmit_pending_data(sk); + MPTCP_INC_STATS(net, MPTCP_MIB_SUBFLOWSTALE); + } + unlock_sock_fast(ssk, slow); + + /* always try to push the pending data regardless of re-injections: + * we can possibly use backup subflows now, and subflow selection + * is cheap under the msk socket lock + */ + __mptcp_push_pending(sk, 0); + return; + } + } } void mptcp_pm_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk) @@ -468,36 +938,44 @@ void mptcp_pm_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk) } else if (subflow->stale_rcv_tstamp == rcv_tstamp) { if (subflow->stale_count < U8_MAX) subflow->stale_count++; - mptcp_pm_nl_subflow_chk_stale(msk, ssk); + mptcp_pm_subflows_chk_stale(msk, ssk); } else { subflow->stale_count = 0; mptcp_subflow_set_active(subflow); } } -/* if sk is ipv4 or ipv6_only allows only same-family local and remote addresses, - * otherwise allow any matching local/remote pair - */ -bool mptcp_pm_addr_families_match(const struct sock *sk, - const struct mptcp_addr_info *loc, - const struct mptcp_addr_info *rem) +void mptcp_pm_worker(struct mptcp_sock *msk) { - bool mptcp_is_v4 = sk->sk_family == AF_INET; + struct mptcp_pm_data *pm = &msk->pm; -#if IS_ENABLED(CONFIG_MPTCP_IPV6) - bool loc_is_v4 = loc->family == AF_INET || ipv6_addr_v4mapped(&loc->addr6); - bool rem_is_v4 = rem->family == AF_INET || ipv6_addr_v4mapped(&rem->addr6); + msk_owned_by_me(msk); - if (mptcp_is_v4) - return loc_is_v4 && rem_is_v4; + if (!(pm->status & MPTCP_PM_WORK_MASK)) + return; - if (ipv6_only_sock(sk)) - return !loc_is_v4 && !rem_is_v4; + spin_lock_bh(&msk->pm.lock); - return loc_is_v4 == rem_is_v4; -#else - return mptcp_is_v4 && loc->family == AF_INET && rem->family == AF_INET; -#endif + pr_debug("msk=%p status=%x\n", msk, pm->status); + if (pm->status & BIT(MPTCP_PM_ADD_ADDR_SEND_ACK)) { + pm->status &= ~BIT(MPTCP_PM_ADD_ADDR_SEND_ACK); + mptcp_pm_addr_send_ack(msk); + } + if (pm->status & BIT(MPTCP_PM_RM_ADDR_RECEIVED)) { + pm->status &= ~BIT(MPTCP_PM_RM_ADDR_RECEIVED); + mptcp_pm_rm_addr_recv(msk); + } + __mptcp_pm_kernel_worker(msk); + + spin_unlock_bh(&msk->pm.lock); +} + +void mptcp_pm_destroy(struct mptcp_sock *msk) +{ + mptcp_pm_free_anno_list(msk); + + if (mptcp_pm_is_userspace(msk)) + mptcp_userspace_pm_free_local_addr_list(msk); } void mptcp_pm_data_reset(struct mptcp_sock *msk) @@ -505,10 +983,7 @@ void mptcp_pm_data_reset(struct mptcp_sock *msk) u8 pm_type = mptcp_get_pm_type(sock_net((struct sock *)msk)); struct mptcp_pm_data *pm = &msk->pm; - pm->add_addr_signaled = 0; - pm->add_addr_accepted = 0; - pm->local_addr_used = 0; - pm->subflows = 0; + memset(&pm->reset, 0, sizeof(pm->reset)); pm->rm_list_tx.nr = 0; pm->rm_list_rx.nr = 0; WRITE_ONCE(pm->pm_type, pm_type); @@ -527,16 +1002,9 @@ void mptcp_pm_data_reset(struct mptcp_sock *msk) !!mptcp_pm_get_add_addr_accept_max(msk) && subflows_allowed); WRITE_ONCE(pm->accept_subflow, subflows_allowed); - } else { - WRITE_ONCE(pm->work_pending, 0); - WRITE_ONCE(pm->accept_addr, 0); - WRITE_ONCE(pm->accept_subflow, 0); - } - WRITE_ONCE(pm->addr_signal, 0); - WRITE_ONCE(pm->remote_deny_join_id0, false); - pm->status = 0; - bitmap_fill(msk->pm.id_avail_bitmap, MPTCP_PM_MAX_ADDR_ID + 1); + bitmap_fill(pm->id_avail_bitmap, MPTCP_PM_MAX_ADDR_ID + 1); + } } void mptcp_pm_data_init(struct mptcp_sock *msk) @@ -549,5 +1017,75 @@ void mptcp_pm_data_init(struct mptcp_sock *msk) void __init mptcp_pm_init(void) { + mptcp_pm_kernel_register(); + mptcp_pm_userspace_register(); mptcp_pm_nl_init(); } + +/* Must be called with rcu read lock held */ +struct mptcp_pm_ops *mptcp_pm_find(const char *name) +{ + struct mptcp_pm_ops *pm_ops; + + list_for_each_entry_rcu(pm_ops, &mptcp_pm_list, list) { + if (!strcmp(pm_ops->name, name)) + return pm_ops; + } + + return NULL; +} + +int mptcp_pm_validate(struct mptcp_pm_ops *pm_ops) +{ + return 0; +} + +int mptcp_pm_register(struct mptcp_pm_ops *pm_ops) +{ + int ret; + + ret = mptcp_pm_validate(pm_ops); + if (ret) + return ret; + + spin_lock(&mptcp_pm_list_lock); + if (mptcp_pm_find(pm_ops->name)) { + spin_unlock(&mptcp_pm_list_lock); + return -EEXIST; + } + list_add_tail_rcu(&pm_ops->list, &mptcp_pm_list); + spin_unlock(&mptcp_pm_list_lock); + + pr_debug("%s registered\n", pm_ops->name); + return 0; +} + +void mptcp_pm_unregister(struct mptcp_pm_ops *pm_ops) +{ + /* skip unregistering the default path manager */ + if (WARN_ON_ONCE(pm_ops == &mptcp_pm_kernel)) + return; + + spin_lock(&mptcp_pm_list_lock); + list_del_rcu(&pm_ops->list); + spin_unlock(&mptcp_pm_list_lock); +} + +/* Build string with list of available path manager values. + * Similar to tcp_get_available_congestion_control() + */ +void mptcp_pm_get_available(char *buf, size_t maxlen) +{ + struct mptcp_pm_ops *pm_ops; + size_t offs = 0; + + rcu_read_lock(); + list_for_each_entry_rcu(pm_ops, &mptcp_pm_list, list) { + offs += snprintf(buf + offs, maxlen - offs, "%s%s", + offs == 0 ? "" : " ", pm_ops->name); + + if (WARN_ON_ONCE(offs >= maxlen)) + break; + } + rcu_read_unlock(); +} diff --git a/net/mptcp/pm_kernel.c b/net/mptcp/pm_kernel.c new file mode 100644 index 000000000000..d39e7c178460 --- /dev/null +++ b/net/mptcp/pm_kernel.c @@ -0,0 +1,1412 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Multipath TCP + * + * Copyright (c) 2025, Matthieu Baerts. + */ + +#define pr_fmt(fmt) "MPTCP: " fmt + +#include <net/netns/generic.h> + +#include "protocol.h" +#include "mib.h" +#include "mptcp_pm_gen.h" + +static int pm_nl_pernet_id; + +struct pm_nl_pernet { + /* protects pernet updates */ + spinlock_t lock; + struct list_head local_addr_list; + unsigned int addrs; + unsigned int stale_loss_cnt; + unsigned int add_addr_signal_max; + unsigned int add_addr_accept_max; + unsigned int local_addr_max; + unsigned int subflows_max; + unsigned int next_id; + DECLARE_BITMAP(id_bitmap, MPTCP_PM_MAX_ADDR_ID + 1); +}; + +#define MPTCP_PM_ADDR_MAX 8 + +static struct pm_nl_pernet *pm_nl_get_pernet(const struct net *net) +{ + return net_generic(net, pm_nl_pernet_id); +} + +static struct pm_nl_pernet * +pm_nl_get_pernet_from_msk(const struct mptcp_sock *msk) +{ + return pm_nl_get_pernet(sock_net((struct sock *)msk)); +} + +static struct pm_nl_pernet *genl_info_pm_nl(struct genl_info *info) +{ + return pm_nl_get_pernet(genl_info_net(info)); +} + +unsigned int mptcp_pm_get_add_addr_signal_max(const struct mptcp_sock *msk) +{ + const struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk); + + return READ_ONCE(pernet->add_addr_signal_max); +} +EXPORT_SYMBOL_GPL(mptcp_pm_get_add_addr_signal_max); + +unsigned int mptcp_pm_get_add_addr_accept_max(const struct mptcp_sock *msk) +{ + struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk); + + return READ_ONCE(pernet->add_addr_accept_max); +} +EXPORT_SYMBOL_GPL(mptcp_pm_get_add_addr_accept_max); + +unsigned int mptcp_pm_get_subflows_max(const struct mptcp_sock *msk) +{ + struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk); + + return READ_ONCE(pernet->subflows_max); +} +EXPORT_SYMBOL_GPL(mptcp_pm_get_subflows_max); + +unsigned int mptcp_pm_get_local_addr_max(const struct mptcp_sock *msk) +{ + struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk); + + return READ_ONCE(pernet->local_addr_max); +} +EXPORT_SYMBOL_GPL(mptcp_pm_get_local_addr_max); + +static bool lookup_subflow_by_daddr(const struct list_head *list, + const struct mptcp_addr_info *daddr) +{ + struct mptcp_subflow_context *subflow; + struct mptcp_addr_info cur; + + list_for_each_entry(subflow, list, node) { + struct sock *ssk = mptcp_subflow_tcp_sock(subflow); + + if (!((1 << inet_sk_state_load(ssk)) & + (TCPF_ESTABLISHED | TCPF_SYN_SENT | TCPF_SYN_RECV))) + continue; + + mptcp_remote_address((struct sock_common *)ssk, &cur); + if (mptcp_addresses_equal(&cur, daddr, daddr->port)) + return true; + } + + return false; +} + +static bool +select_local_address(const struct pm_nl_pernet *pernet, + const struct mptcp_sock *msk, + struct mptcp_pm_local *new_local) +{ + struct mptcp_pm_addr_entry *entry; + bool found = false; + + msk_owned_by_me(msk); + + rcu_read_lock(); + list_for_each_entry_rcu(entry, &pernet->local_addr_list, list) { + if (!(entry->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW)) + continue; + + if (!test_bit(entry->addr.id, msk->pm.id_avail_bitmap)) + continue; + + new_local->addr = entry->addr; + new_local->flags = entry->flags; + new_local->ifindex = entry->ifindex; + found = true; + break; + } + rcu_read_unlock(); + + return found; +} + +static bool +select_signal_address(struct pm_nl_pernet *pernet, const struct mptcp_sock *msk, + struct mptcp_pm_local *new_local) +{ + struct mptcp_pm_addr_entry *entry; + bool found = false; + + rcu_read_lock(); + /* do not keep any additional per socket state, just signal + * the address list in order. + * Note: removal from the local address list during the msk life-cycle + * can lead to additional addresses not being announced. + */ + list_for_each_entry_rcu(entry, &pernet->local_addr_list, list) { + if (!test_bit(entry->addr.id, msk->pm.id_avail_bitmap)) + continue; + + if (!(entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL)) + continue; + + new_local->addr = entry->addr; + new_local->flags = entry->flags; + new_local->ifindex = entry->ifindex; + found = true; + break; + } + rcu_read_unlock(); + + return found; +} + +/* Fill all the remote addresses into the array addrs[], + * and return the array size. + */ +static unsigned int fill_remote_addresses_vec(struct mptcp_sock *msk, + struct mptcp_addr_info *local, + bool fullmesh, + struct mptcp_addr_info *addrs) +{ + bool deny_id0 = READ_ONCE(msk->pm.remote_deny_join_id0); + struct sock *sk = (struct sock *)msk, *ssk; + struct mptcp_subflow_context *subflow; + struct mptcp_addr_info remote = { 0 }; + unsigned int subflows_max; + int i = 0; + + subflows_max = mptcp_pm_get_subflows_max(msk); + mptcp_remote_address((struct sock_common *)sk, &remote); + + /* Non-fullmesh endpoint, fill in the single entry + * corresponding to the primary MPC subflow remote address + */ + if (!fullmesh) { + if (deny_id0) + return 0; + + if (!mptcp_pm_addr_families_match(sk, local, &remote)) + return 0; + + msk->pm.subflows++; + addrs[i++] = remote; + } else { + DECLARE_BITMAP(unavail_id, MPTCP_PM_MAX_ADDR_ID + 1); + + /* Forbid creation of new subflows matching existing + * ones, possibly already created by incoming ADD_ADDR + */ + bitmap_zero(unavail_id, MPTCP_PM_MAX_ADDR_ID + 1); + mptcp_for_each_subflow(msk, subflow) + if (READ_ONCE(subflow->local_id) == local->id) + __set_bit(subflow->remote_id, unavail_id); + + mptcp_for_each_subflow(msk, subflow) { + ssk = mptcp_subflow_tcp_sock(subflow); + mptcp_remote_address((struct sock_common *)ssk, &addrs[i]); + addrs[i].id = READ_ONCE(subflow->remote_id); + if (deny_id0 && !addrs[i].id) + continue; + + if (test_bit(addrs[i].id, unavail_id)) + continue; + + if (!mptcp_pm_addr_families_match(sk, local, &addrs[i])) + continue; + + if (msk->pm.subflows < subflows_max) { + /* forbid creating multiple address towards + * this id + */ + __set_bit(addrs[i].id, unavail_id); + msk->pm.subflows++; + i++; + } + } + } + + return i; +} + +static struct mptcp_pm_addr_entry * +__lookup_addr_by_id(struct pm_nl_pernet *pernet, unsigned int id) +{ + struct mptcp_pm_addr_entry *entry; + + list_for_each_entry_rcu(entry, &pernet->local_addr_list, list, + lockdep_is_held(&pernet->lock)) { + if (entry->addr.id == id) + return entry; + } + return NULL; +} + +static struct mptcp_pm_addr_entry * +__lookup_addr(struct pm_nl_pernet *pernet, const struct mptcp_addr_info *info) +{ + struct mptcp_pm_addr_entry *entry; + + list_for_each_entry_rcu(entry, &pernet->local_addr_list, list, + lockdep_is_held(&pernet->lock)) { + if (mptcp_addresses_equal(&entry->addr, info, entry->addr.port)) + return entry; + } + return NULL; +} + +static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk) +{ + struct sock *sk = (struct sock *)msk; + unsigned int add_addr_signal_max; + bool signal_and_subflow = false; + unsigned int local_addr_max; + struct pm_nl_pernet *pernet; + struct mptcp_pm_local local; + unsigned int subflows_max; + + pernet = pm_nl_get_pernet(sock_net(sk)); + + add_addr_signal_max = mptcp_pm_get_add_addr_signal_max(msk); + local_addr_max = mptcp_pm_get_local_addr_max(msk); + subflows_max = mptcp_pm_get_subflows_max(msk); + + /* do lazy endpoint usage accounting for the MPC subflows */ + if (unlikely(!(msk->pm.status & BIT(MPTCP_PM_MPC_ENDPOINT_ACCOUNTED))) && msk->first) { + struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(msk->first); + struct mptcp_pm_addr_entry *entry; + struct mptcp_addr_info mpc_addr; + bool backup = false; + + mptcp_local_address((struct sock_common *)msk->first, &mpc_addr); + rcu_read_lock(); + entry = __lookup_addr(pernet, &mpc_addr); + if (entry) { + __clear_bit(entry->addr.id, msk->pm.id_avail_bitmap); + msk->mpc_endpoint_id = entry->addr.id; + backup = !!(entry->flags & MPTCP_PM_ADDR_FLAG_BACKUP); + } + rcu_read_unlock(); + + if (backup) + mptcp_pm_send_ack(msk, subflow, true, backup); + + msk->pm.status |= BIT(MPTCP_PM_MPC_ENDPOINT_ACCOUNTED); + } + + pr_debug("local %d:%d signal %d:%d subflows %d:%d\n", + msk->pm.local_addr_used, local_addr_max, + msk->pm.add_addr_signaled, add_addr_signal_max, + msk->pm.subflows, subflows_max); + + /* check first for announce */ + if (msk->pm.add_addr_signaled < add_addr_signal_max) { + /* due to racing events on both ends we can reach here while + * previous add address is still running: if we invoke now + * mptcp_pm_announce_addr(), that will fail and the + * corresponding id will be marked as used. + * Instead let the PM machinery reschedule us when the + * current address announce will be completed. + */ + if (msk->pm.addr_signal & BIT(MPTCP_ADD_ADDR_SIGNAL)) + return; + + if (!select_signal_address(pernet, msk, &local)) + goto subflow; + + /* If the alloc fails, we are on memory pressure, not worth + * continuing, and trying to create subflows. + */ + if (!mptcp_pm_alloc_anno_list(msk, &local.addr)) + return; + + __clear_bit(local.addr.id, msk->pm.id_avail_bitmap); + msk->pm.add_addr_signaled++; + + /* Special case for ID0: set the correct ID */ + if (local.addr.id == msk->mpc_endpoint_id) + local.addr.id = 0; + + mptcp_pm_announce_addr(msk, &local.addr, false); + mptcp_pm_addr_send_ack(msk); + + if (local.flags & MPTCP_PM_ADDR_FLAG_SUBFLOW) + signal_and_subflow = true; + } + +subflow: + /* check if should create a new subflow */ + while (msk->pm.local_addr_used < local_addr_max && + msk->pm.subflows < subflows_max) { + struct mptcp_addr_info addrs[MPTCP_PM_ADDR_MAX]; + bool fullmesh; + int i, nr; + + if (signal_and_subflow) + signal_and_subflow = false; + else if (!select_local_address(pernet, msk, &local)) + break; + + fullmesh = !!(local.flags & MPTCP_PM_ADDR_FLAG_FULLMESH); + + __clear_bit(local.addr.id, msk->pm.id_avail_bitmap); + + /* Special case for ID0: set the correct ID */ + if (local.addr.id == msk->mpc_endpoint_id) + local.addr.id = 0; + else /* local_addr_used is not decr for ID 0 */ + msk->pm.local_addr_used++; + + nr = fill_remote_addresses_vec(msk, &local.addr, fullmesh, addrs); + if (nr == 0) + continue; + + spin_unlock_bh(&msk->pm.lock); + for (i = 0; i < nr; i++) + __mptcp_subflow_connect(sk, &local, &addrs[i]); + spin_lock_bh(&msk->pm.lock); + } + mptcp_pm_nl_check_work_pending(msk); +} + +static void mptcp_pm_nl_fully_established(struct mptcp_sock *msk) +{ + mptcp_pm_create_subflow_or_signal_addr(msk); +} + +static void mptcp_pm_nl_subflow_established(struct mptcp_sock *msk) +{ + mptcp_pm_create_subflow_or_signal_addr(msk); +} + +/* Fill all the local addresses into the array addrs[], + * and return the array size. + */ +static unsigned int fill_local_addresses_vec(struct mptcp_sock *msk, + struct mptcp_addr_info *remote, + struct mptcp_pm_local *locals) +{ + struct sock *sk = (struct sock *)msk; + struct mptcp_pm_addr_entry *entry; + struct mptcp_addr_info mpc_addr; + struct pm_nl_pernet *pernet; + unsigned int subflows_max; + int i = 0; + + pernet = pm_nl_get_pernet_from_msk(msk); + subflows_max = mptcp_pm_get_subflows_max(msk); + + mptcp_local_address((struct sock_common *)msk, &mpc_addr); + + rcu_read_lock(); + list_for_each_entry_rcu(entry, &pernet->local_addr_list, list) { + if (!(entry->flags & MPTCP_PM_ADDR_FLAG_FULLMESH)) + continue; + + if (!mptcp_pm_addr_families_match(sk, &entry->addr, remote)) + continue; + + if (msk->pm.subflows < subflows_max) { + locals[i].addr = entry->addr; + locals[i].flags = entry->flags; + locals[i].ifindex = entry->ifindex; + + /* Special case for ID0: set the correct ID */ + if (mptcp_addresses_equal(&locals[i].addr, &mpc_addr, locals[i].addr.port)) + locals[i].addr.id = 0; + + msk->pm.subflows++; + i++; + } + } + rcu_read_unlock(); + + /* If the array is empty, fill in the single + * 'IPADDRANY' local address + */ + if (!i) { + memset(&locals[i], 0, sizeof(locals[i])); + locals[i].addr.family = +#if IS_ENABLED(CONFIG_MPTCP_IPV6) + remote->family == AF_INET6 && + ipv6_addr_v4mapped(&remote->addr6) ? AF_INET : +#endif + remote->family; + + if (!mptcp_pm_addr_families_match(sk, &locals[i].addr, remote)) + return 0; + + msk->pm.subflows++; + i++; + } + + return i; +} + +static void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk) +{ + struct mptcp_pm_local locals[MPTCP_PM_ADDR_MAX]; + struct sock *sk = (struct sock *)msk; + unsigned int add_addr_accept_max; + struct mptcp_addr_info remote; + unsigned int subflows_max; + bool sf_created = false; + int i, nr; + + add_addr_accept_max = mptcp_pm_get_add_addr_accept_max(msk); + subflows_max = mptcp_pm_get_subflows_max(msk); + + pr_debug("accepted %d:%d remote family %d\n", + msk->pm.add_addr_accepted, add_addr_accept_max, + msk->pm.remote.family); + + remote = msk->pm.remote; + mptcp_pm_announce_addr(msk, &remote, true); + mptcp_pm_addr_send_ack(msk); + + if (lookup_subflow_by_daddr(&msk->conn_list, &remote)) + return; + + /* pick id 0 port, if none is provided the remote address */ + if (!remote.port) + remote.port = sk->sk_dport; + + /* connect to the specified remote address, using whatever + * local address the routing configuration will pick. + */ + nr = fill_local_addresses_vec(msk, &remote, locals); + if (nr == 0) + return; + + spin_unlock_bh(&msk->pm.lock); + for (i = 0; i < nr; i++) + if (__mptcp_subflow_connect(sk, &locals[i], &remote) == 0) + sf_created = true; + spin_lock_bh(&msk->pm.lock); + + if (sf_created) { + /* add_addr_accepted is not decr for ID 0 */ + if (remote.id) + msk->pm.add_addr_accepted++; + if (msk->pm.add_addr_accepted >= add_addr_accept_max || + msk->pm.subflows >= subflows_max) + WRITE_ONCE(msk->pm.accept_addr, false); + } +} + +void mptcp_pm_nl_rm_addr(struct mptcp_sock *msk, u8 rm_id) +{ + if (rm_id && WARN_ON_ONCE(msk->pm.add_addr_accepted == 0)) { + /* Note: if the subflow has been closed before, this + * add_addr_accepted counter will not be decremented. + */ + if (--msk->pm.add_addr_accepted < mptcp_pm_get_add_addr_accept_max(msk)) + WRITE_ONCE(msk->pm.accept_addr, true); + } +} + +static bool address_use_port(struct mptcp_pm_addr_entry *entry) +{ + return (entry->flags & + (MPTCP_PM_ADDR_FLAG_SIGNAL | MPTCP_PM_ADDR_FLAG_SUBFLOW)) == + MPTCP_PM_ADDR_FLAG_SIGNAL; +} + +/* caller must ensure the RCU grace period is already elapsed */ +static void __mptcp_pm_release_addr_entry(struct mptcp_pm_addr_entry *entry) +{ + if (entry->lsk) + sock_release(entry->lsk); + kfree(entry); +} + +static int mptcp_pm_nl_append_new_local_addr(struct pm_nl_pernet *pernet, + struct mptcp_pm_addr_entry *entry, + bool needs_id, bool replace) +{ + struct mptcp_pm_addr_entry *cur, *del_entry = NULL; + unsigned int addr_max; + int ret = -EINVAL; + + spin_lock_bh(&pernet->lock); + /* to keep the code simple, don't do IDR-like allocation for address ID, + * just bail when we exceed limits + */ + if (pernet->next_id == MPTCP_PM_MAX_ADDR_ID) + pernet->next_id = 1; + if (pernet->addrs >= MPTCP_PM_ADDR_MAX) { + ret = -ERANGE; + goto out; + } + if (test_bit(entry->addr.id, pernet->id_bitmap)) { + ret = -EBUSY; + goto out; + } + + /* do not insert duplicate address, differentiate on port only + * singled addresses + */ + if (!address_use_port(entry)) + entry->addr.port = 0; + list_for_each_entry(cur, &pernet->local_addr_list, list) { + if (mptcp_addresses_equal(&cur->addr, &entry->addr, + cur->addr.port || entry->addr.port)) { + /* allow replacing the exiting endpoint only if such + * endpoint is an implicit one and the user-space + * did not provide an endpoint id + */ + if (!(cur->flags & MPTCP_PM_ADDR_FLAG_IMPLICIT)) { + ret = -EEXIST; + goto out; + } + if (entry->addr.id) + goto out; + + /* allow callers that only need to look up the local + * addr's id to skip replacement. This allows them to + * avoid calling synchronize_rcu in the packet recv + * path. + */ + if (!replace) { + kfree(entry); + ret = cur->addr.id; + goto out; + } + + pernet->addrs--; + entry->addr.id = cur->addr.id; + list_del_rcu(&cur->list); + del_entry = cur; + break; + } + } + + if (!entry->addr.id && needs_id) { +find_next: + entry->addr.id = find_next_zero_bit(pernet->id_bitmap, + MPTCP_PM_MAX_ADDR_ID + 1, + pernet->next_id); + if (!entry->addr.id && pernet->next_id != 1) { + pernet->next_id = 1; + goto find_next; + } + } + + if (!entry->addr.id && needs_id) + goto out; + + __set_bit(entry->addr.id, pernet->id_bitmap); + if (entry->addr.id > pernet->next_id) + pernet->next_id = entry->addr.id; + + if (entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL) { + addr_max = pernet->add_addr_signal_max; + WRITE_ONCE(pernet->add_addr_signal_max, addr_max + 1); + } + if (entry->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW) { + addr_max = pernet->local_addr_max; + WRITE_ONCE(pernet->local_addr_max, addr_max + 1); + } + + pernet->addrs++; + if (!entry->addr.port) + list_add_tail_rcu(&entry->list, &pernet->local_addr_list); + else + list_add_rcu(&entry->list, &pernet->local_addr_list); + ret = entry->addr.id; + +out: + spin_unlock_bh(&pernet->lock); + + /* just replaced an existing entry, free it */ + if (del_entry) { + synchronize_rcu(); + __mptcp_pm_release_addr_entry(del_entry); + } + return ret; +} + +static struct lock_class_key mptcp_slock_keys[2]; +static struct lock_class_key mptcp_keys[2]; + +static int mptcp_pm_nl_create_listen_socket(struct sock *sk, + struct mptcp_pm_addr_entry *entry) +{ + bool is_ipv6 = sk->sk_family == AF_INET6; + int addrlen = sizeof(struct sockaddr_in); + struct sockaddr_storage addr; + struct sock *newsk, *ssk; + int backlog = 1024; + int err; + + err = sock_create_kern(sock_net(sk), entry->addr.family, + SOCK_STREAM, IPPROTO_MPTCP, &entry->lsk); + if (err) + return err; + + newsk = entry->lsk->sk; + if (!newsk) + return -EINVAL; + + /* The subflow socket lock is acquired in a nested to the msk one + * in several places, even by the TCP stack, and this msk is a kernel + * socket: lockdep complains. Instead of propagating the _nested + * modifiers in several places, re-init the lock class for the msk + * socket to an mptcp specific one. + */ + sock_lock_init_class_and_name(newsk, + is_ipv6 ? "mlock-AF_INET6" : "mlock-AF_INET", + &mptcp_slock_keys[is_ipv6], + is_ipv6 ? "msk_lock-AF_INET6" : "msk_lock-AF_INET", + &mptcp_keys[is_ipv6]); + + lock_sock(newsk); + ssk = __mptcp_nmpc_sk(mptcp_sk(newsk)); + release_sock(newsk); + if (IS_ERR(ssk)) + return PTR_ERR(ssk); + + mptcp_info2sockaddr(&entry->addr, &addr, entry->addr.family); +#if IS_ENABLED(CONFIG_MPTCP_IPV6) + if (entry->addr.family == AF_INET6) + addrlen = sizeof(struct sockaddr_in6); +#endif + if (ssk->sk_family == AF_INET) + err = inet_bind_sk(ssk, (struct sockaddr *)&addr, addrlen); +#if IS_ENABLED(CONFIG_MPTCP_IPV6) + else if (ssk->sk_family == AF_INET6) + err = inet6_bind_sk(ssk, (struct sockaddr *)&addr, addrlen); +#endif + if (err) + return err; + + /* We don't use mptcp_set_state() here because it needs to be called + * under the msk socket lock. For the moment, that will not bring + * anything more than only calling inet_sk_state_store(), because the + * old status is known (TCP_CLOSE). + */ + inet_sk_state_store(newsk, TCP_LISTEN); + lock_sock(ssk); + WRITE_ONCE(mptcp_subflow_ctx(ssk)->pm_listener, true); + err = __inet_listen_sk(ssk, backlog); + if (!err) + mptcp_event_pm_listener(ssk, MPTCP_EVENT_LISTENER_CREATED); + release_sock(ssk); + return err; +} + +int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, + struct mptcp_pm_addr_entry *skc) +{ + struct mptcp_pm_addr_entry *entry; + struct pm_nl_pernet *pernet; + int ret; + + pernet = pm_nl_get_pernet_from_msk(msk); + + rcu_read_lock(); + entry = __lookup_addr(pernet, &skc->addr); + ret = entry ? entry->addr.id : -1; + rcu_read_unlock(); + if (ret >= 0) + return ret; + + /* address not found, add to local list */ + entry = kmemdup(skc, sizeof(*skc), GFP_ATOMIC); + if (!entry) + return -ENOMEM; + + entry->addr.port = 0; + ret = mptcp_pm_nl_append_new_local_addr(pernet, entry, true, false); + if (ret < 0) + kfree(entry); + + return ret; +} + +bool mptcp_pm_nl_is_backup(struct mptcp_sock *msk, struct mptcp_addr_info *skc) +{ + struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk); + struct mptcp_pm_addr_entry *entry; + bool backup; + + rcu_read_lock(); + entry = __lookup_addr(pernet, skc); + backup = entry && !!(entry->flags & MPTCP_PM_ADDR_FLAG_BACKUP); + rcu_read_unlock(); + + return backup; +} + +static int mptcp_nl_add_subflow_or_signal_addr(struct net *net, + struct mptcp_addr_info *addr) +{ + struct mptcp_sock *msk; + long s_slot = 0, s_num = 0; + + while ((msk = mptcp_token_iter_next(net, &s_slot, &s_num)) != NULL) { + struct sock *sk = (struct sock *)msk; + struct mptcp_addr_info mpc_addr; + + if (!READ_ONCE(msk->fully_established) || + mptcp_pm_is_userspace(msk)) + goto next; + + /* if the endp linked to the init sf is re-added with a != ID */ + mptcp_local_address((struct sock_common *)msk, &mpc_addr); + + lock_sock(sk); + spin_lock_bh(&msk->pm.lock); + if (mptcp_addresses_equal(addr, &mpc_addr, addr->port)) + msk->mpc_endpoint_id = addr->id; + mptcp_pm_create_subflow_or_signal_addr(msk); + spin_unlock_bh(&msk->pm.lock); + release_sock(sk); + +next: + sock_put(sk); + cond_resched(); + } + + return 0; +} + +static bool mptcp_pm_has_addr_attr_id(const struct nlattr *attr, + struct genl_info *info) +{ + struct nlattr *tb[MPTCP_PM_ADDR_ATTR_MAX + 1]; + + if (!nla_parse_nested_deprecated(tb, MPTCP_PM_ADDR_ATTR_MAX, attr, + mptcp_pm_address_nl_policy, info->extack) && + tb[MPTCP_PM_ADDR_ATTR_ID]) + return true; + return false; +} + +/* Add an MPTCP endpoint */ +int mptcp_pm_nl_add_addr_doit(struct sk_buff *skb, struct genl_info *info) +{ + struct pm_nl_pernet *pernet = genl_info_pm_nl(info); + struct mptcp_pm_addr_entry addr, *entry; + struct nlattr *attr; + int ret; + + if (GENL_REQ_ATTR_CHECK(info, MPTCP_PM_ENDPOINT_ADDR)) + return -EINVAL; + + attr = info->attrs[MPTCP_PM_ENDPOINT_ADDR]; + ret = mptcp_pm_parse_entry(attr, info, true, &addr); + if (ret < 0) + return ret; + + if (addr.addr.port && !address_use_port(&addr)) { + NL_SET_ERR_MSG_ATTR(info->extack, attr, + "flags must have signal and not subflow when using port"); + return -EINVAL; + } + + if (addr.flags & MPTCP_PM_ADDR_FLAG_SIGNAL && + addr.flags & MPTCP_PM_ADDR_FLAG_FULLMESH) { + NL_SET_ERR_MSG_ATTR(info->extack, attr, + "flags mustn't have both signal and fullmesh"); + return -EINVAL; + } + + if (addr.flags & MPTCP_PM_ADDR_FLAG_IMPLICIT) { + NL_SET_ERR_MSG_ATTR(info->extack, attr, + "can't create IMPLICIT endpoint"); + return -EINVAL; + } + + entry = kmemdup(&addr, sizeof(addr), GFP_KERNEL_ACCOUNT); + if (!entry) { + GENL_SET_ERR_MSG(info, "can't allocate addr"); + return -ENOMEM; + } + + if (entry->addr.port) { + ret = mptcp_pm_nl_create_listen_socket(skb->sk, entry); + if (ret) { + GENL_SET_ERR_MSG_FMT(info, "create listen socket error: %d", ret); + goto out_free; + } + } + ret = mptcp_pm_nl_append_new_local_addr(pernet, entry, + !mptcp_pm_has_addr_attr_id(attr, info), + true); + if (ret < 0) { + GENL_SET_ERR_MSG_FMT(info, "too many addresses or duplicate one: %d", ret); + goto out_free; + } + + mptcp_nl_add_subflow_or_signal_addr(sock_net(skb->sk), &entry->addr); + return 0; + +out_free: + __mptcp_pm_release_addr_entry(entry); + return ret; +} + +static u8 mptcp_endp_get_local_id(struct mptcp_sock *msk, + const struct mptcp_addr_info *addr) +{ + return msk->mpc_endpoint_id == addr->id ? 0 : addr->id; +} + +static bool mptcp_pm_remove_anno_addr(struct mptcp_sock *msk, + const struct mptcp_addr_info *addr, + bool force) +{ + struct mptcp_rm_list list = { .nr = 0 }; + bool ret; + + list.ids[list.nr++] = mptcp_endp_get_local_id(msk, addr); + + ret = mptcp_remove_anno_list_by_saddr(msk, addr); + if (ret || force) { + spin_lock_bh(&msk->pm.lock); + if (ret) { + __set_bit(addr->id, msk->pm.id_avail_bitmap); + msk->pm.add_addr_signaled--; + } + mptcp_pm_remove_addr(msk, &list); + spin_unlock_bh(&msk->pm.lock); + } + return ret; +} + +static void __mark_subflow_endp_available(struct mptcp_sock *msk, u8 id) +{ + /* If it was marked as used, and not ID 0, decrement local_addr_used */ + if (!__test_and_set_bit(id ? : msk->mpc_endpoint_id, msk->pm.id_avail_bitmap) && + id && !WARN_ON_ONCE(msk->pm.local_addr_used == 0)) + msk->pm.local_addr_used--; +} + +static int mptcp_nl_remove_subflow_and_signal_addr(struct net *net, + const struct mptcp_pm_addr_entry *entry) +{ + const struct mptcp_addr_info *addr = &entry->addr; + struct mptcp_rm_list list = { .nr = 1 }; + long s_slot = 0, s_num = 0; + struct mptcp_sock *msk; + + pr_debug("remove_id=%d\n", addr->id); + + while ((msk = mptcp_token_iter_next(net, &s_slot, &s_num)) != NULL) { + struct sock *sk = (struct sock *)msk; + bool remove_subflow; + + if (mptcp_pm_is_userspace(msk)) + goto next; + + lock_sock(sk); + remove_subflow = mptcp_lookup_subflow_by_saddr(&msk->conn_list, addr); + mptcp_pm_remove_anno_addr(msk, addr, remove_subflow && + !(entry->flags & MPTCP_PM_ADDR_FLAG_IMPLICIT)); + + list.ids[0] = mptcp_endp_get_local_id(msk, addr); + if (remove_subflow) { + spin_lock_bh(&msk->pm.lock); + mptcp_pm_rm_subflow(msk, &list); + spin_unlock_bh(&msk->pm.lock); + } + + if (entry->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW) { + spin_lock_bh(&msk->pm.lock); + __mark_subflow_endp_available(msk, list.ids[0]); + spin_unlock_bh(&msk->pm.lock); + } + + if (msk->mpc_endpoint_id == entry->addr.id) + msk->mpc_endpoint_id = 0; + release_sock(sk); + +next: + sock_put(sk); + cond_resched(); + } + + return 0; +} + +static int mptcp_nl_remove_id_zero_address(struct net *net, + struct mptcp_addr_info *addr) +{ + struct mptcp_rm_list list = { .nr = 0 }; + long s_slot = 0, s_num = 0; + struct mptcp_sock *msk; + + list.ids[list.nr++] = 0; + + while ((msk = mptcp_token_iter_next(net, &s_slot, &s_num)) != NULL) { + struct sock *sk = (struct sock *)msk; + struct mptcp_addr_info msk_local; + + if (list_empty(&msk->conn_list) || mptcp_pm_is_userspace(msk)) + goto next; + + mptcp_local_address((struct sock_common *)msk, &msk_local); + if (!mptcp_addresses_equal(&msk_local, addr, addr->port)) + goto next; + + lock_sock(sk); + spin_lock_bh(&msk->pm.lock); + mptcp_pm_remove_addr(msk, &list); + mptcp_pm_rm_subflow(msk, &list); + __mark_subflow_endp_available(msk, 0); + spin_unlock_bh(&msk->pm.lock); + release_sock(sk); + +next: + sock_put(sk); + cond_resched(); + } + + return 0; +} + +/* Remove an MPTCP endpoint */ +int mptcp_pm_nl_del_addr_doit(struct sk_buff *skb, struct genl_info *info) +{ + struct pm_nl_pernet *pernet = genl_info_pm_nl(info); + struct mptcp_pm_addr_entry addr, *entry; + unsigned int addr_max; + struct nlattr *attr; + int ret; + + if (GENL_REQ_ATTR_CHECK(info, MPTCP_PM_ENDPOINT_ADDR)) + return -EINVAL; + + attr = info->attrs[MPTCP_PM_ENDPOINT_ADDR]; + ret = mptcp_pm_parse_entry(attr, info, false, &addr); + if (ret < 0) + return ret; + + /* the zero id address is special: the first address used by the msk + * always gets such an id, so different subflows can have different zero + * id addresses. Additionally zero id is not accounted for in id_bitmap. + * Let's use an 'mptcp_rm_list' instead of the common remove code. + */ + if (addr.addr.id == 0) + return mptcp_nl_remove_id_zero_address(sock_net(skb->sk), &addr.addr); + + spin_lock_bh(&pernet->lock); + entry = __lookup_addr_by_id(pernet, addr.addr.id); + if (!entry) { + NL_SET_ERR_MSG_ATTR(info->extack, attr, "address not found"); + spin_unlock_bh(&pernet->lock); + return -EINVAL; + } + if (entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL) { + addr_max = pernet->add_addr_signal_max; + WRITE_ONCE(pernet->add_addr_signal_max, addr_max - 1); + } + if (entry->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW) { + addr_max = pernet->local_addr_max; + WRITE_ONCE(pernet->local_addr_max, addr_max - 1); + } + + pernet->addrs--; + list_del_rcu(&entry->list); + __clear_bit(entry->addr.id, pernet->id_bitmap); + spin_unlock_bh(&pernet->lock); + + mptcp_nl_remove_subflow_and_signal_addr(sock_net(skb->sk), entry); + synchronize_rcu(); + __mptcp_pm_release_addr_entry(entry); + + return ret; +} + +static void mptcp_pm_flush_addrs_and_subflows(struct mptcp_sock *msk, + struct list_head *rm_list) +{ + struct mptcp_rm_list alist = { .nr = 0 }, slist = { .nr = 0 }; + struct mptcp_pm_addr_entry *entry; + + list_for_each_entry(entry, rm_list, list) { + if (slist.nr < MPTCP_RM_IDS_MAX && + mptcp_lookup_subflow_by_saddr(&msk->conn_list, &entry->addr)) + slist.ids[slist.nr++] = mptcp_endp_get_local_id(msk, &entry->addr); + + if (alist.nr < MPTCP_RM_IDS_MAX && + mptcp_remove_anno_list_by_saddr(msk, &entry->addr)) + alist.ids[alist.nr++] = mptcp_endp_get_local_id(msk, &entry->addr); + } + + spin_lock_bh(&msk->pm.lock); + if (alist.nr) { + msk->pm.add_addr_signaled -= alist.nr; + mptcp_pm_remove_addr(msk, &alist); + } + if (slist.nr) + mptcp_pm_rm_subflow(msk, &slist); + /* Reset counters: maybe some subflows have been removed before */ + bitmap_fill(msk->pm.id_avail_bitmap, MPTCP_PM_MAX_ADDR_ID + 1); + msk->pm.local_addr_used = 0; + spin_unlock_bh(&msk->pm.lock); +} + +static void mptcp_nl_flush_addrs_list(struct net *net, + struct list_head *rm_list) +{ + long s_slot = 0, s_num = 0; + struct mptcp_sock *msk; + + if (list_empty(rm_list)) + return; + + while ((msk = mptcp_token_iter_next(net, &s_slot, &s_num)) != NULL) { + struct sock *sk = (struct sock *)msk; + + if (!mptcp_pm_is_userspace(msk)) { + lock_sock(sk); + mptcp_pm_flush_addrs_and_subflows(msk, rm_list); + release_sock(sk); + } + + sock_put(sk); + cond_resched(); + } +} + +/* caller must ensure the RCU grace period is already elapsed */ +static void __flush_addrs(struct list_head *list) +{ + while (!list_empty(list)) { + struct mptcp_pm_addr_entry *cur; + + cur = list_entry(list->next, + struct mptcp_pm_addr_entry, list); + list_del_rcu(&cur->list); + __mptcp_pm_release_addr_entry(cur); + } +} + +static void __reset_counters(struct pm_nl_pernet *pernet) +{ + WRITE_ONCE(pernet->add_addr_signal_max, 0); + WRITE_ONCE(pernet->add_addr_accept_max, 0); + WRITE_ONCE(pernet->local_addr_max, 0); + pernet->addrs = 0; +} + +int mptcp_pm_nl_flush_addrs_doit(struct sk_buff *skb, struct genl_info *info) +{ + struct pm_nl_pernet *pernet = genl_info_pm_nl(info); + LIST_HEAD(free_list); + + spin_lock_bh(&pernet->lock); + list_splice_init(&pernet->local_addr_list, &free_list); + __reset_counters(pernet); + pernet->next_id = 1; + bitmap_zero(pernet->id_bitmap, MPTCP_PM_MAX_ADDR_ID + 1); + spin_unlock_bh(&pernet->lock); + mptcp_nl_flush_addrs_list(sock_net(skb->sk), &free_list); + synchronize_rcu(); + __flush_addrs(&free_list); + return 0; +} + +int mptcp_pm_nl_get_addr(u8 id, struct mptcp_pm_addr_entry *addr, + struct genl_info *info) +{ + struct pm_nl_pernet *pernet = genl_info_pm_nl(info); + struct mptcp_pm_addr_entry *entry; + int ret = -EINVAL; + + rcu_read_lock(); + entry = __lookup_addr_by_id(pernet, id); + if (entry) { + *addr = *entry; + ret = 0; + } + rcu_read_unlock(); + + return ret; +} + +int mptcp_pm_nl_dump_addr(struct sk_buff *msg, + struct netlink_callback *cb) +{ + struct net *net = sock_net(msg->sk); + struct mptcp_pm_addr_entry *entry; + struct pm_nl_pernet *pernet; + int id = cb->args[0]; + int i; + + pernet = pm_nl_get_pernet(net); + + rcu_read_lock(); + for (i = id; i < MPTCP_PM_MAX_ADDR_ID + 1; i++) { + if (test_bit(i, pernet->id_bitmap)) { + entry = __lookup_addr_by_id(pernet, i); + if (!entry) + break; + + if (entry->addr.id <= id) + continue; + + if (mptcp_pm_genl_fill_addr(msg, cb, entry) < 0) + break; + + id = entry->addr.id; + } + } + rcu_read_unlock(); + + cb->args[0] = id; + return msg->len; +} + +static int parse_limit(struct genl_info *info, int id, unsigned int *limit) +{ + struct nlattr *attr = info->attrs[id]; + + if (!attr) + return 0; + + *limit = nla_get_u32(attr); + if (*limit > MPTCP_PM_ADDR_MAX) { + NL_SET_ERR_MSG_ATTR_FMT(info->extack, attr, + "limit greater than maximum (%u)", + MPTCP_PM_ADDR_MAX); + return -EINVAL; + } + return 0; +} + +int mptcp_pm_nl_set_limits_doit(struct sk_buff *skb, struct genl_info *info) +{ + struct pm_nl_pernet *pernet = genl_info_pm_nl(info); + unsigned int rcv_addrs, subflows; + int ret; + + spin_lock_bh(&pernet->lock); + rcv_addrs = pernet->add_addr_accept_max; + ret = parse_limit(info, MPTCP_PM_ATTR_RCV_ADD_ADDRS, &rcv_addrs); + if (ret) + goto unlock; + + subflows = pernet->subflows_max; + ret = parse_limit(info, MPTCP_PM_ATTR_SUBFLOWS, &subflows); + if (ret) + goto unlock; + + WRITE_ONCE(pernet->add_addr_accept_max, rcv_addrs); + WRITE_ONCE(pernet->subflows_max, subflows); + +unlock: + spin_unlock_bh(&pernet->lock); + return ret; +} + +int mptcp_pm_nl_get_limits_doit(struct sk_buff *skb, struct genl_info *info) +{ + struct pm_nl_pernet *pernet = genl_info_pm_nl(info); + struct sk_buff *msg; + void *reply; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + reply = genlmsg_put_reply(msg, info, &mptcp_genl_family, 0, + MPTCP_PM_CMD_GET_LIMITS); + if (!reply) + goto fail; + + if (nla_put_u32(msg, MPTCP_PM_ATTR_RCV_ADD_ADDRS, + READ_ONCE(pernet->add_addr_accept_max))) + goto fail; + + if (nla_put_u32(msg, MPTCP_PM_ATTR_SUBFLOWS, + READ_ONCE(pernet->subflows_max))) + goto fail; + + genlmsg_end(msg, reply); + return genlmsg_reply(msg, info); + +fail: + GENL_SET_ERR_MSG(info, "not enough space in Netlink message"); + nlmsg_free(msg); + return -EMSGSIZE; +} + +static void mptcp_pm_nl_fullmesh(struct mptcp_sock *msk, + struct mptcp_addr_info *addr) +{ + struct mptcp_rm_list list = { .nr = 0 }; + + list.ids[list.nr++] = mptcp_endp_get_local_id(msk, addr); + + spin_lock_bh(&msk->pm.lock); + mptcp_pm_rm_subflow(msk, &list); + __mark_subflow_endp_available(msk, list.ids[0]); + mptcp_pm_create_subflow_or_signal_addr(msk); + spin_unlock_bh(&msk->pm.lock); +} + +static void mptcp_pm_nl_set_flags_all(struct net *net, + struct mptcp_pm_addr_entry *local, + u8 changed) +{ + u8 is_subflow = !!(local->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW); + u8 bkup = !!(local->flags & MPTCP_PM_ADDR_FLAG_BACKUP); + long s_slot = 0, s_num = 0; + struct mptcp_sock *msk; + + if (changed == MPTCP_PM_ADDR_FLAG_FULLMESH && !is_subflow) + return; + + while ((msk = mptcp_token_iter_next(net, &s_slot, &s_num)) != NULL) { + struct sock *sk = (struct sock *)msk; + + if (list_empty(&msk->conn_list) || mptcp_pm_is_userspace(msk)) + goto next; + + lock_sock(sk); + if (changed & MPTCP_PM_ADDR_FLAG_BACKUP) + mptcp_pm_mp_prio_send_ack(msk, &local->addr, NULL, bkup); + /* Subflows will only be recreated if the SUBFLOW flag is set */ + if (is_subflow && (changed & MPTCP_PM_ADDR_FLAG_FULLMESH)) + mptcp_pm_nl_fullmesh(msk, &local->addr); + release_sock(sk); + +next: + sock_put(sk); + cond_resched(); + } +} + +int mptcp_pm_nl_set_flags(struct mptcp_pm_addr_entry *local, + struct genl_info *info) +{ + struct nlattr *attr = info->attrs[MPTCP_PM_ATTR_ADDR]; + u8 changed, mask = MPTCP_PM_ADDR_FLAG_BACKUP | + MPTCP_PM_ADDR_FLAG_FULLMESH; + struct net *net = genl_info_net(info); + struct mptcp_pm_addr_entry *entry; + struct pm_nl_pernet *pernet; + u8 lookup_by_id = 0; + + pernet = pm_nl_get_pernet(net); + + if (local->addr.family == AF_UNSPEC) { + lookup_by_id = 1; + if (!local->addr.id) { + NL_SET_ERR_MSG_ATTR(info->extack, attr, + "missing address ID"); + return -EOPNOTSUPP; + } + } + + spin_lock_bh(&pernet->lock); + entry = lookup_by_id ? __lookup_addr_by_id(pernet, local->addr.id) : + __lookup_addr(pernet, &local->addr); + if (!entry) { + spin_unlock_bh(&pernet->lock); + NL_SET_ERR_MSG_ATTR(info->extack, attr, "address not found"); + return -EINVAL; + } + if ((local->flags & MPTCP_PM_ADDR_FLAG_FULLMESH) && + (entry->flags & (MPTCP_PM_ADDR_FLAG_SIGNAL | + MPTCP_PM_ADDR_FLAG_IMPLICIT))) { + spin_unlock_bh(&pernet->lock); + NL_SET_ERR_MSG_ATTR(info->extack, attr, "invalid addr flags"); + return -EINVAL; + } + + changed = (local->flags ^ entry->flags) & mask; + entry->flags = (entry->flags & ~mask) | (local->flags & mask); + *local = *entry; + spin_unlock_bh(&pernet->lock); + + mptcp_pm_nl_set_flags_all(net, local, changed); + return 0; +} + +bool mptcp_pm_nl_check_work_pending(struct mptcp_sock *msk) +{ + struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk); + + if (msk->pm.subflows == mptcp_pm_get_subflows_max(msk) || + (find_next_and_bit(pernet->id_bitmap, msk->pm.id_avail_bitmap, + MPTCP_PM_MAX_ADDR_ID + 1, 0) == MPTCP_PM_MAX_ADDR_ID + 1)) { + WRITE_ONCE(msk->pm.work_pending, false); + return false; + } + return true; +} + +/* Called under PM lock */ +void __mptcp_pm_kernel_worker(struct mptcp_sock *msk) +{ + struct mptcp_pm_data *pm = &msk->pm; + + if (pm->status & BIT(MPTCP_PM_ADD_ADDR_RECEIVED)) { + pm->status &= ~BIT(MPTCP_PM_ADD_ADDR_RECEIVED); + mptcp_pm_nl_add_addr_received(msk); + } + if (pm->status & BIT(MPTCP_PM_ESTABLISHED)) { + pm->status &= ~BIT(MPTCP_PM_ESTABLISHED); + mptcp_pm_nl_fully_established(msk); + } + if (pm->status & BIT(MPTCP_PM_SUBFLOW_ESTABLISHED)) { + pm->status &= ~BIT(MPTCP_PM_SUBFLOW_ESTABLISHED); + mptcp_pm_nl_subflow_established(msk); + } +} + +static int __net_init pm_nl_init_net(struct net *net) +{ + struct pm_nl_pernet *pernet = pm_nl_get_pernet(net); + + INIT_LIST_HEAD_RCU(&pernet->local_addr_list); + + /* Cit. 2 subflows ought to be enough for anybody. */ + pernet->subflows_max = 2; + pernet->next_id = 1; + pernet->stale_loss_cnt = 4; + spin_lock_init(&pernet->lock); + + /* No need to initialize other pernet fields, the struct is zeroed at + * allocation time. + */ + + return 0; +} + +static void __net_exit pm_nl_exit_net(struct list_head *net_list) +{ + struct net *net; + + list_for_each_entry(net, net_list, exit_list) { + struct pm_nl_pernet *pernet = pm_nl_get_pernet(net); + + /* net is removed from namespace list, can't race with + * other modifiers, also netns core already waited for a + * RCU grace period. + */ + __flush_addrs(&pernet->local_addr_list); + } +} + +static struct pernet_operations mptcp_pm_pernet_ops = { + .init = pm_nl_init_net, + .exit_batch = pm_nl_exit_net, + .id = &pm_nl_pernet_id, + .size = sizeof(struct pm_nl_pernet), +}; + +struct mptcp_pm_ops mptcp_pm_kernel = { + .name = "kernel", + .owner = THIS_MODULE, +}; + +void __init mptcp_pm_kernel_register(void) +{ + if (register_pernet_subsys(&mptcp_pm_pernet_ops) < 0) + panic("Failed to register MPTCP PM pernet subsystem.\n"); + + mptcp_pm_register(&mptcp_pm_kernel); +} diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 7868207c4e9d..50aaf259959a 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -6,1197 +6,9 @@ #define pr_fmt(fmt) "MPTCP: " fmt -#include <linux/inet.h> -#include <linux/kernel.h> -#include <net/inet_common.h> -#include <net/netns/generic.h> -#include <net/mptcp.h> - #include "protocol.h" -#include "mib.h" #include "mptcp_pm_gen.h" -static int pm_nl_pernet_id; - -struct mptcp_pm_add_entry { - struct list_head list; - struct mptcp_addr_info addr; - u8 retrans_times; - struct timer_list add_timer; - struct mptcp_sock *sock; -}; - -struct pm_nl_pernet { - /* protects pernet updates */ - spinlock_t lock; - struct list_head local_addr_list; - unsigned int addrs; - unsigned int stale_loss_cnt; - unsigned int add_addr_signal_max; - unsigned int add_addr_accept_max; - unsigned int local_addr_max; - unsigned int subflows_max; - unsigned int next_id; - DECLARE_BITMAP(id_bitmap, MPTCP_PM_MAX_ADDR_ID + 1); -}; - -#define MPTCP_PM_ADDR_MAX 8 -#define ADD_ADDR_RETRANS_MAX 3 - -static struct pm_nl_pernet *pm_nl_get_pernet(const struct net *net) -{ - return net_generic(net, pm_nl_pernet_id); -} - -static struct pm_nl_pernet * -pm_nl_get_pernet_from_msk(const struct mptcp_sock *msk) -{ - return pm_nl_get_pernet(sock_net((struct sock *)msk)); -} - -bool mptcp_addresses_equal(const struct mptcp_addr_info *a, - const struct mptcp_addr_info *b, bool use_port) -{ - bool addr_equals = false; - - if (a->family == b->family) { - if (a->family == AF_INET) - addr_equals = a->addr.s_addr == b->addr.s_addr; -#if IS_ENABLED(CONFIG_MPTCP_IPV6) - else - addr_equals = !ipv6_addr_cmp(&a->addr6, &b->addr6); - } else if (a->family == AF_INET) { - if (ipv6_addr_v4mapped(&b->addr6)) - addr_equals = a->addr.s_addr == b->addr6.s6_addr32[3]; - } else if (b->family == AF_INET) { - if (ipv6_addr_v4mapped(&a->addr6)) - addr_equals = a->addr6.s6_addr32[3] == b->addr.s_addr; -#endif - } - - if (!addr_equals) - return false; - if (!use_port) - return true; - - return a->port == b->port; -} - -void mptcp_local_address(const struct sock_common *skc, struct mptcp_addr_info *addr) -{ - addr->family = skc->skc_family; - addr->port = htons(skc->skc_num); - if (addr->family == AF_INET) - addr->addr.s_addr = skc->skc_rcv_saddr; -#if IS_ENABLED(CONFIG_MPTCP_IPV6) - else if (addr->family == AF_INET6) - addr->addr6 = skc->skc_v6_rcv_saddr; -#endif -} - -static void remote_address(const struct sock_common *skc, - struct mptcp_addr_info *addr) -{ - addr->family = skc->skc_family; - addr->port = skc->skc_dport; - if (addr->family == AF_INET) - addr->addr.s_addr = skc->skc_daddr; -#if IS_ENABLED(CONFIG_MPTCP_IPV6) - else if (addr->family == AF_INET6) - addr->addr6 = skc->skc_v6_daddr; -#endif -} - -bool mptcp_lookup_subflow_by_saddr(const struct list_head *list, - const struct mptcp_addr_info *saddr) -{ - struct mptcp_subflow_context *subflow; - struct mptcp_addr_info cur; - struct sock_common *skc; - - list_for_each_entry(subflow, list, node) { - skc = (struct sock_common *)mptcp_subflow_tcp_sock(subflow); - - mptcp_local_address(skc, &cur); - if (mptcp_addresses_equal(&cur, saddr, saddr->port)) - return true; - } - - return false; -} - -static bool lookup_subflow_by_daddr(const struct list_head *list, - const struct mptcp_addr_info *daddr) -{ - struct mptcp_subflow_context *subflow; - struct mptcp_addr_info cur; - - list_for_each_entry(subflow, list, node) { - struct sock *ssk = mptcp_subflow_tcp_sock(subflow); - - if (!((1 << inet_sk_state_load(ssk)) & - (TCPF_ESTABLISHED | TCPF_SYN_SENT | TCPF_SYN_RECV))) - continue; - - remote_address((struct sock_common *)ssk, &cur); - if (mptcp_addresses_equal(&cur, daddr, daddr->port)) - return true; - } - - return false; -} - -static bool -select_local_address(const struct pm_nl_pernet *pernet, - const struct mptcp_sock *msk, - struct mptcp_pm_local *new_local) -{ - struct mptcp_pm_addr_entry *entry; - bool found = false; - - msk_owned_by_me(msk); - - rcu_read_lock(); - list_for_each_entry_rcu(entry, &pernet->local_addr_list, list) { - if (!(entry->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW)) - continue; - - if (!test_bit(entry->addr.id, msk->pm.id_avail_bitmap)) - continue; - - new_local->addr = entry->addr; - new_local->flags = entry->flags; - new_local->ifindex = entry->ifindex; - found = true; - break; - } - rcu_read_unlock(); - - return found; -} - -static bool -select_signal_address(struct pm_nl_pernet *pernet, const struct mptcp_sock *msk, - struct mptcp_pm_local *new_local) -{ - struct mptcp_pm_addr_entry *entry; - bool found = false; - - rcu_read_lock(); - /* do not keep any additional per socket state, just signal - * the address list in order. - * Note: removal from the local address list during the msk life-cycle - * can lead to additional addresses not being announced. - */ - list_for_each_entry_rcu(entry, &pernet->local_addr_list, list) { - if (!test_bit(entry->addr.id, msk->pm.id_avail_bitmap)) - continue; - - if (!(entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL)) - continue; - - new_local->addr = entry->addr; - new_local->flags = entry->flags; - new_local->ifindex = entry->ifindex; - found = true; - break; - } - rcu_read_unlock(); - - return found; -} - -unsigned int mptcp_pm_get_add_addr_signal_max(const struct mptcp_sock *msk) -{ - const struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk); - - return READ_ONCE(pernet->add_addr_signal_max); -} -EXPORT_SYMBOL_GPL(mptcp_pm_get_add_addr_signal_max); - -unsigned int mptcp_pm_get_add_addr_accept_max(const struct mptcp_sock *msk) -{ - struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk); - - return READ_ONCE(pernet->add_addr_accept_max); -} -EXPORT_SYMBOL_GPL(mptcp_pm_get_add_addr_accept_max); - -unsigned int mptcp_pm_get_subflows_max(const struct mptcp_sock *msk) -{ - struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk); - - return READ_ONCE(pernet->subflows_max); -} -EXPORT_SYMBOL_GPL(mptcp_pm_get_subflows_max); - -unsigned int mptcp_pm_get_local_addr_max(const struct mptcp_sock *msk) -{ - struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk); - - return READ_ONCE(pernet->local_addr_max); -} -EXPORT_SYMBOL_GPL(mptcp_pm_get_local_addr_max); - -bool mptcp_pm_nl_check_work_pending(struct mptcp_sock *msk) -{ - struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk); - - if (msk->pm.subflows == mptcp_pm_get_subflows_max(msk) || - (find_next_and_bit(pernet->id_bitmap, msk->pm.id_avail_bitmap, - MPTCP_PM_MAX_ADDR_ID + 1, 0) == MPTCP_PM_MAX_ADDR_ID + 1)) { - WRITE_ONCE(msk->pm.work_pending, false); - return false; - } - return true; -} - -struct mptcp_pm_add_entry * -mptcp_lookup_anno_list_by_saddr(const struct mptcp_sock *msk, - const struct mptcp_addr_info *addr) -{ - struct mptcp_pm_add_entry *entry; - - lockdep_assert_held(&msk->pm.lock); - - list_for_each_entry(entry, &msk->pm.anno_list, list) { - if (mptcp_addresses_equal(&entry->addr, addr, true)) - return entry; - } - - return NULL; -} - -bool mptcp_pm_sport_in_anno_list(struct mptcp_sock *msk, const struct sock *sk) -{ - struct mptcp_pm_add_entry *entry; - struct mptcp_addr_info saddr; - bool ret = false; - - mptcp_local_address((struct sock_common *)sk, &saddr); - - spin_lock_bh(&msk->pm.lock); - list_for_each_entry(entry, &msk->pm.anno_list, list) { - if (mptcp_addresses_equal(&entry->addr, &saddr, true)) { - ret = true; - goto out; - } - } - -out: - spin_unlock_bh(&msk->pm.lock); - return ret; -} - -static void mptcp_pm_add_timer(struct timer_list *timer) -{ - struct mptcp_pm_add_entry *entry = from_timer(entry, timer, add_timer); - struct mptcp_sock *msk = entry->sock; - struct sock *sk = (struct sock *)msk; - - pr_debug("msk=%p\n", msk); - - if (!msk) - return; - - if (inet_sk_state_load(sk) == TCP_CLOSE) - return; - - if (!entry->addr.id) - return; - - if (mptcp_pm_should_add_signal_addr(msk)) { - sk_reset_timer(sk, timer, jiffies + TCP_RTO_MAX / 8); - goto out; - } - - spin_lock_bh(&msk->pm.lock); - - if (!mptcp_pm_should_add_signal_addr(msk)) { - pr_debug("retransmit ADD_ADDR id=%d\n", entry->addr.id); - mptcp_pm_announce_addr(msk, &entry->addr, false); - mptcp_pm_add_addr_send_ack(msk); - entry->retrans_times++; - } - - if (entry->retrans_times < ADD_ADDR_RETRANS_MAX) - sk_reset_timer(sk, timer, - jiffies + mptcp_get_add_addr_timeout(sock_net(sk))); - - spin_unlock_bh(&msk->pm.lock); - - if (entry->retrans_times == ADD_ADDR_RETRANS_MAX) - mptcp_pm_subflow_established(msk); - -out: - __sock_put(sk); -} - -struct mptcp_pm_add_entry * -mptcp_pm_del_add_timer(struct mptcp_sock *msk, - const struct mptcp_addr_info *addr, bool check_id) -{ - struct mptcp_pm_add_entry *entry; - struct sock *sk = (struct sock *)msk; - struct timer_list *add_timer = NULL; - - spin_lock_bh(&msk->pm.lock); - entry = mptcp_lookup_anno_list_by_saddr(msk, addr); - if (entry && (!check_id || entry->addr.id == addr->id)) { - entry->retrans_times = ADD_ADDR_RETRANS_MAX; - add_timer = &entry->add_timer; - } - if (!check_id && entry) - list_del(&entry->list); - spin_unlock_bh(&msk->pm.lock); - - /* no lock, because sk_stop_timer_sync() is calling del_timer_sync() */ - if (add_timer) - sk_stop_timer_sync(sk, add_timer); - - return entry; -} - -bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk, - const struct mptcp_addr_info *addr) -{ - struct mptcp_pm_add_entry *add_entry = NULL; - struct sock *sk = (struct sock *)msk; - struct net *net = sock_net(sk); - - lockdep_assert_held(&msk->pm.lock); - - add_entry = mptcp_lookup_anno_list_by_saddr(msk, addr); - - if (add_entry) { - if (WARN_ON_ONCE(mptcp_pm_is_kernel(msk))) - return false; - - sk_reset_timer(sk, &add_entry->add_timer, - jiffies + mptcp_get_add_addr_timeout(net)); - return true; - } - - add_entry = kmalloc(sizeof(*add_entry), GFP_ATOMIC); - if (!add_entry) - return false; - - list_add(&add_entry->list, &msk->pm.anno_list); - - add_entry->addr = *addr; - add_entry->sock = msk; - add_entry->retrans_times = 0; - - timer_setup(&add_entry->add_timer, mptcp_pm_add_timer, 0); - sk_reset_timer(sk, &add_entry->add_timer, - jiffies + mptcp_get_add_addr_timeout(net)); - - return true; -} - -void mptcp_pm_free_anno_list(struct mptcp_sock *msk) -{ - struct mptcp_pm_add_entry *entry, *tmp; - struct sock *sk = (struct sock *)msk; - LIST_HEAD(free_list); - - pr_debug("msk=%p\n", msk); - - spin_lock_bh(&msk->pm.lock); - list_splice_init(&msk->pm.anno_list, &free_list); - spin_unlock_bh(&msk->pm.lock); - - list_for_each_entry_safe(entry, tmp, &free_list, list) { - sk_stop_timer_sync(sk, &entry->add_timer); - kfree(entry); - } -} - -/* Fill all the remote addresses into the array addrs[], - * and return the array size. - */ -static unsigned int fill_remote_addresses_vec(struct mptcp_sock *msk, - struct mptcp_addr_info *local, - bool fullmesh, - struct mptcp_addr_info *addrs) -{ - bool deny_id0 = READ_ONCE(msk->pm.remote_deny_join_id0); - struct sock *sk = (struct sock *)msk, *ssk; - struct mptcp_subflow_context *subflow; - struct mptcp_addr_info remote = { 0 }; - unsigned int subflows_max; - int i = 0; - - subflows_max = mptcp_pm_get_subflows_max(msk); - remote_address((struct sock_common *)sk, &remote); - - /* Non-fullmesh endpoint, fill in the single entry - * corresponding to the primary MPC subflow remote address - */ - if (!fullmesh) { - if (deny_id0) - return 0; - - if (!mptcp_pm_addr_families_match(sk, local, &remote)) - return 0; - - msk->pm.subflows++; - addrs[i++] = remote; - } else { - DECLARE_BITMAP(unavail_id, MPTCP_PM_MAX_ADDR_ID + 1); - - /* Forbid creation of new subflows matching existing - * ones, possibly already created by incoming ADD_ADDR - */ - bitmap_zero(unavail_id, MPTCP_PM_MAX_ADDR_ID + 1); - mptcp_for_each_subflow(msk, subflow) - if (READ_ONCE(subflow->local_id) == local->id) - __set_bit(subflow->remote_id, unavail_id); - - mptcp_for_each_subflow(msk, subflow) { - ssk = mptcp_subflow_tcp_sock(subflow); - remote_address((struct sock_common *)ssk, &addrs[i]); - addrs[i].id = READ_ONCE(subflow->remote_id); - if (deny_id0 && !addrs[i].id) - continue; - - if (test_bit(addrs[i].id, unavail_id)) - continue; - - if (!mptcp_pm_addr_families_match(sk, local, &addrs[i])) - continue; - - if (msk->pm.subflows < subflows_max) { - /* forbid creating multiple address towards - * this id - */ - __set_bit(addrs[i].id, unavail_id); - msk->pm.subflows++; - i++; - } - } - } - - return i; -} - -static void __mptcp_pm_send_ack(struct mptcp_sock *msk, struct mptcp_subflow_context *subflow, - bool prio, bool backup) -{ - struct sock *ssk = mptcp_subflow_tcp_sock(subflow); - bool slow; - - pr_debug("send ack for %s\n", - prio ? "mp_prio" : (mptcp_pm_should_add_signal(msk) ? "add_addr" : "rm_addr")); - - slow = lock_sock_fast(ssk); - if (prio) { - subflow->send_mp_prio = 1; - subflow->request_bkup = backup; - } - - __mptcp_subflow_send_ack(ssk); - unlock_sock_fast(ssk, slow); -} - -static void mptcp_pm_send_ack(struct mptcp_sock *msk, struct mptcp_subflow_context *subflow, - bool prio, bool backup) -{ - spin_unlock_bh(&msk->pm.lock); - __mptcp_pm_send_ack(msk, subflow, prio, backup); - spin_lock_bh(&msk->pm.lock); -} - -static struct mptcp_pm_addr_entry * -__lookup_addr_by_id(struct pm_nl_pernet *pernet, unsigned int id) -{ - struct mptcp_pm_addr_entry *entry; - - list_for_each_entry_rcu(entry, &pernet->local_addr_list, list, - lockdep_is_held(&pernet->lock)) { - if (entry->addr.id == id) - return entry; - } - return NULL; -} - -static struct mptcp_pm_addr_entry * -__lookup_addr(struct pm_nl_pernet *pernet, const struct mptcp_addr_info *info) -{ - struct mptcp_pm_addr_entry *entry; - - list_for_each_entry_rcu(entry, &pernet->local_addr_list, list, - lockdep_is_held(&pernet->lock)) { - if (mptcp_addresses_equal(&entry->addr, info, entry->addr.port)) - return entry; - } - return NULL; -} - -static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk) -{ - struct sock *sk = (struct sock *)msk; - unsigned int add_addr_signal_max; - bool signal_and_subflow = false; - unsigned int local_addr_max; - struct pm_nl_pernet *pernet; - struct mptcp_pm_local local; - unsigned int subflows_max; - - pernet = pm_nl_get_pernet(sock_net(sk)); - - add_addr_signal_max = mptcp_pm_get_add_addr_signal_max(msk); - local_addr_max = mptcp_pm_get_local_addr_max(msk); - subflows_max = mptcp_pm_get_subflows_max(msk); - - /* do lazy endpoint usage accounting for the MPC subflows */ - if (unlikely(!(msk->pm.status & BIT(MPTCP_PM_MPC_ENDPOINT_ACCOUNTED))) && msk->first) { - struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(msk->first); - struct mptcp_pm_addr_entry *entry; - struct mptcp_addr_info mpc_addr; - bool backup = false; - - mptcp_local_address((struct sock_common *)msk->first, &mpc_addr); - rcu_read_lock(); - entry = __lookup_addr(pernet, &mpc_addr); - if (entry) { - __clear_bit(entry->addr.id, msk->pm.id_avail_bitmap); - msk->mpc_endpoint_id = entry->addr.id; - backup = !!(entry->flags & MPTCP_PM_ADDR_FLAG_BACKUP); - } - rcu_read_unlock(); - - if (backup) - mptcp_pm_send_ack(msk, subflow, true, backup); - - msk->pm.status |= BIT(MPTCP_PM_MPC_ENDPOINT_ACCOUNTED); - } - - pr_debug("local %d:%d signal %d:%d subflows %d:%d\n", - msk->pm.local_addr_used, local_addr_max, - msk->pm.add_addr_signaled, add_addr_signal_max, - msk->pm.subflows, subflows_max); - - /* check first for announce */ - if (msk->pm.add_addr_signaled < add_addr_signal_max) { - /* due to racing events on both ends we can reach here while - * previous add address is still running: if we invoke now - * mptcp_pm_announce_addr(), that will fail and the - * corresponding id will be marked as used. - * Instead let the PM machinery reschedule us when the - * current address announce will be completed. - */ - if (msk->pm.addr_signal & BIT(MPTCP_ADD_ADDR_SIGNAL)) - return; - - if (!select_signal_address(pernet, msk, &local)) - goto subflow; - - /* If the alloc fails, we are on memory pressure, not worth - * continuing, and trying to create subflows. - */ - if (!mptcp_pm_alloc_anno_list(msk, &local.addr)) - return; - - __clear_bit(local.addr.id, msk->pm.id_avail_bitmap); - msk->pm.add_addr_signaled++; - - /* Special case for ID0: set the correct ID */ - if (local.addr.id == msk->mpc_endpoint_id) - local.addr.id = 0; - - mptcp_pm_announce_addr(msk, &local.addr, false); - mptcp_pm_nl_addr_send_ack(msk); - - if (local.flags & MPTCP_PM_ADDR_FLAG_SUBFLOW) - signal_and_subflow = true; - } - -subflow: - /* check if should create a new subflow */ - while (msk->pm.local_addr_used < local_addr_max && - msk->pm.subflows < subflows_max) { - struct mptcp_addr_info addrs[MPTCP_PM_ADDR_MAX]; - bool fullmesh; - int i, nr; - - if (signal_and_subflow) - signal_and_subflow = false; - else if (!select_local_address(pernet, msk, &local)) - break; - - fullmesh = !!(local.flags & MPTCP_PM_ADDR_FLAG_FULLMESH); - - __clear_bit(local.addr.id, msk->pm.id_avail_bitmap); - - /* Special case for ID0: set the correct ID */ - if (local.addr.id == msk->mpc_endpoint_id) - local.addr.id = 0; - else /* local_addr_used is not decr for ID 0 */ - msk->pm.local_addr_used++; - - nr = fill_remote_addresses_vec(msk, &local.addr, fullmesh, addrs); - if (nr == 0) - continue; - - spin_unlock_bh(&msk->pm.lock); - for (i = 0; i < nr; i++) - __mptcp_subflow_connect(sk, &local, &addrs[i]); - spin_lock_bh(&msk->pm.lock); - } - mptcp_pm_nl_check_work_pending(msk); -} - -static void mptcp_pm_nl_fully_established(struct mptcp_sock *msk) -{ - mptcp_pm_create_subflow_or_signal_addr(msk); -} - -static void mptcp_pm_nl_subflow_established(struct mptcp_sock *msk) -{ - mptcp_pm_create_subflow_or_signal_addr(msk); -} - -/* Fill all the local addresses into the array addrs[], - * and return the array size. - */ -static unsigned int fill_local_addresses_vec(struct mptcp_sock *msk, - struct mptcp_addr_info *remote, - struct mptcp_pm_local *locals) -{ - struct sock *sk = (struct sock *)msk; - struct mptcp_pm_addr_entry *entry; - struct mptcp_addr_info mpc_addr; - struct pm_nl_pernet *pernet; - unsigned int subflows_max; - int i = 0; - - pernet = pm_nl_get_pernet_from_msk(msk); - subflows_max = mptcp_pm_get_subflows_max(msk); - - mptcp_local_address((struct sock_common *)msk, &mpc_addr); - - rcu_read_lock(); - list_for_each_entry_rcu(entry, &pernet->local_addr_list, list) { - if (!(entry->flags & MPTCP_PM_ADDR_FLAG_FULLMESH)) - continue; - - if (!mptcp_pm_addr_families_match(sk, &entry->addr, remote)) - continue; - - if (msk->pm.subflows < subflows_max) { - locals[i].addr = entry->addr; - locals[i].flags = entry->flags; - locals[i].ifindex = entry->ifindex; - - /* Special case for ID0: set the correct ID */ - if (mptcp_addresses_equal(&locals[i].addr, &mpc_addr, locals[i].addr.port)) - locals[i].addr.id = 0; - - msk->pm.subflows++; - i++; - } - } - rcu_read_unlock(); - - /* If the array is empty, fill in the single - * 'IPADDRANY' local address - */ - if (!i) { - memset(&locals[i], 0, sizeof(locals[i])); - locals[i].addr.family = -#if IS_ENABLED(CONFIG_MPTCP_IPV6) - remote->family == AF_INET6 && - ipv6_addr_v4mapped(&remote->addr6) ? AF_INET : -#endif - remote->family; - - if (!mptcp_pm_addr_families_match(sk, &locals[i].addr, remote)) - return 0; - - msk->pm.subflows++; - i++; - } - - return i; -} - -static void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk) -{ - struct mptcp_pm_local locals[MPTCP_PM_ADDR_MAX]; - struct sock *sk = (struct sock *)msk; - unsigned int add_addr_accept_max; - struct mptcp_addr_info remote; - unsigned int subflows_max; - bool sf_created = false; - int i, nr; - - add_addr_accept_max = mptcp_pm_get_add_addr_accept_max(msk); - subflows_max = mptcp_pm_get_subflows_max(msk); - - pr_debug("accepted %d:%d remote family %d\n", - msk->pm.add_addr_accepted, add_addr_accept_max, - msk->pm.remote.family); - - remote = msk->pm.remote; - mptcp_pm_announce_addr(msk, &remote, true); - mptcp_pm_nl_addr_send_ack(msk); - - if (lookup_subflow_by_daddr(&msk->conn_list, &remote)) - return; - - /* pick id 0 port, if none is provided the remote address */ - if (!remote.port) - remote.port = sk->sk_dport; - - /* connect to the specified remote address, using whatever - * local address the routing configuration will pick. - */ - nr = fill_local_addresses_vec(msk, &remote, locals); - if (nr == 0) - return; - - spin_unlock_bh(&msk->pm.lock); - for (i = 0; i < nr; i++) - if (__mptcp_subflow_connect(sk, &locals[i], &remote) == 0) - sf_created = true; - spin_lock_bh(&msk->pm.lock); - - if (sf_created) { - /* add_addr_accepted is not decr for ID 0 */ - if (remote.id) - msk->pm.add_addr_accepted++; - if (msk->pm.add_addr_accepted >= add_addr_accept_max || - msk->pm.subflows >= subflows_max) - WRITE_ONCE(msk->pm.accept_addr, false); - } -} - -bool mptcp_pm_nl_is_init_remote_addr(struct mptcp_sock *msk, - const struct mptcp_addr_info *remote) -{ - struct mptcp_addr_info mpc_remote; - - remote_address((struct sock_common *)msk, &mpc_remote); - return mptcp_addresses_equal(&mpc_remote, remote, remote->port); -} - -void mptcp_pm_nl_addr_send_ack(struct mptcp_sock *msk) -{ - struct mptcp_subflow_context *subflow, *alt = NULL; - - msk_owned_by_me(msk); - lockdep_assert_held(&msk->pm.lock); - - if (!mptcp_pm_should_add_signal(msk) && - !mptcp_pm_should_rm_signal(msk)) - return; - - mptcp_for_each_subflow(msk, subflow) { - if (__mptcp_subflow_active(subflow)) { - if (!subflow->stale) { - mptcp_pm_send_ack(msk, subflow, false, false); - return; - } - - if (!alt) - alt = subflow; - } - } - - if (alt) - mptcp_pm_send_ack(msk, alt, false, false); -} - -int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk, - struct mptcp_addr_info *addr, - struct mptcp_addr_info *rem, - u8 bkup) -{ - struct mptcp_subflow_context *subflow; - - pr_debug("bkup=%d\n", bkup); - - mptcp_for_each_subflow(msk, subflow) { - struct sock *ssk = mptcp_subflow_tcp_sock(subflow); - struct mptcp_addr_info local, remote; - - mptcp_local_address((struct sock_common *)ssk, &local); - if (!mptcp_addresses_equal(&local, addr, addr->port)) - continue; - - if (rem && rem->family != AF_UNSPEC) { - remote_address((struct sock_common *)ssk, &remote); - if (!mptcp_addresses_equal(&remote, rem, rem->port)) - continue; - } - - __mptcp_pm_send_ack(msk, subflow, true, bkup); - return 0; - } - - return -EINVAL; -} - -static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk, - const struct mptcp_rm_list *rm_list, - enum linux_mptcp_mib_field rm_type) -{ - struct mptcp_subflow_context *subflow, *tmp; - struct sock *sk = (struct sock *)msk; - u8 i; - - pr_debug("%s rm_list_nr %d\n", - rm_type == MPTCP_MIB_RMADDR ? "address" : "subflow", rm_list->nr); - - msk_owned_by_me(msk); - - if (sk->sk_state == TCP_LISTEN) - return; - - if (!rm_list->nr) - return; - - if (list_empty(&msk->conn_list)) - return; - - for (i = 0; i < rm_list->nr; i++) { - u8 rm_id = rm_list->ids[i]; - bool removed = false; - - mptcp_for_each_subflow_safe(msk, subflow, tmp) { - struct sock *ssk = mptcp_subflow_tcp_sock(subflow); - u8 remote_id = READ_ONCE(subflow->remote_id); - int how = RCV_SHUTDOWN | SEND_SHUTDOWN; - u8 id = subflow_get_local_id(subflow); - - if ((1 << inet_sk_state_load(ssk)) & - (TCPF_FIN_WAIT1 | TCPF_FIN_WAIT2 | TCPF_CLOSING | TCPF_CLOSE)) - continue; - if (rm_type == MPTCP_MIB_RMADDR && remote_id != rm_id) - continue; - if (rm_type == MPTCP_MIB_RMSUBFLOW && id != rm_id) - continue; - - pr_debug(" -> %s rm_list_ids[%d]=%u local_id=%u remote_id=%u mpc_id=%u\n", - rm_type == MPTCP_MIB_RMADDR ? "address" : "subflow", - i, rm_id, id, remote_id, msk->mpc_endpoint_id); - spin_unlock_bh(&msk->pm.lock); - mptcp_subflow_shutdown(sk, ssk, how); - removed |= subflow->request_join; - - /* the following takes care of updating the subflows counter */ - mptcp_close_ssk(sk, ssk, subflow); - spin_lock_bh(&msk->pm.lock); - - if (rm_type == MPTCP_MIB_RMSUBFLOW) - __MPTCP_INC_STATS(sock_net(sk), rm_type); - } - - if (rm_type == MPTCP_MIB_RMADDR) - __MPTCP_INC_STATS(sock_net(sk), rm_type); - - if (!removed) - continue; - - if (!mptcp_pm_is_kernel(msk)) - continue; - - if (rm_type == MPTCP_MIB_RMADDR && rm_id && - !WARN_ON_ONCE(msk->pm.add_addr_accepted == 0)) { - /* Note: if the subflow has been closed before, this - * add_addr_accepted counter will not be decremented. - */ - if (--msk->pm.add_addr_accepted < mptcp_pm_get_add_addr_accept_max(msk)) - WRITE_ONCE(msk->pm.accept_addr, true); - } - } -} - -static void mptcp_pm_nl_rm_addr_received(struct mptcp_sock *msk) -{ - mptcp_pm_nl_rm_addr_or_subflow(msk, &msk->pm.rm_list_rx, MPTCP_MIB_RMADDR); -} - -static void mptcp_pm_nl_rm_subflow_received(struct mptcp_sock *msk, - const struct mptcp_rm_list *rm_list) -{ - mptcp_pm_nl_rm_addr_or_subflow(msk, rm_list, MPTCP_MIB_RMSUBFLOW); -} - -void mptcp_pm_nl_work(struct mptcp_sock *msk) -{ - struct mptcp_pm_data *pm = &msk->pm; - - msk_owned_by_me(msk); - - if (!(pm->status & MPTCP_PM_WORK_MASK)) - return; - - spin_lock_bh(&msk->pm.lock); - - pr_debug("msk=%p status=%x\n", msk, pm->status); - if (pm->status & BIT(MPTCP_PM_ADD_ADDR_RECEIVED)) { - pm->status &= ~BIT(MPTCP_PM_ADD_ADDR_RECEIVED); - mptcp_pm_nl_add_addr_received(msk); - } - if (pm->status & BIT(MPTCP_PM_ADD_ADDR_SEND_ACK)) { - pm->status &= ~BIT(MPTCP_PM_ADD_ADDR_SEND_ACK); - mptcp_pm_nl_addr_send_ack(msk); - } - if (pm->status & BIT(MPTCP_PM_RM_ADDR_RECEIVED)) { - pm->status &= ~BIT(MPTCP_PM_RM_ADDR_RECEIVED); - mptcp_pm_nl_rm_addr_received(msk); - } - if (pm->status & BIT(MPTCP_PM_ESTABLISHED)) { - pm->status &= ~BIT(MPTCP_PM_ESTABLISHED); - mptcp_pm_nl_fully_established(msk); - } - if (pm->status & BIT(MPTCP_PM_SUBFLOW_ESTABLISHED)) { - pm->status &= ~BIT(MPTCP_PM_SUBFLOW_ESTABLISHED); - mptcp_pm_nl_subflow_established(msk); - } - - spin_unlock_bh(&msk->pm.lock); -} - -static bool address_use_port(struct mptcp_pm_addr_entry *entry) -{ - return (entry->flags & - (MPTCP_PM_ADDR_FLAG_SIGNAL | MPTCP_PM_ADDR_FLAG_SUBFLOW)) == - MPTCP_PM_ADDR_FLAG_SIGNAL; -} - -/* caller must ensure the RCU grace period is already elapsed */ -static void __mptcp_pm_release_addr_entry(struct mptcp_pm_addr_entry *entry) -{ - if (entry->lsk) - sock_release(entry->lsk); - kfree(entry); -} - -static int mptcp_pm_nl_append_new_local_addr(struct pm_nl_pernet *pernet, - struct mptcp_pm_addr_entry *entry, - bool needs_id, bool replace) -{ - struct mptcp_pm_addr_entry *cur, *del_entry = NULL; - unsigned int addr_max; - int ret = -EINVAL; - - spin_lock_bh(&pernet->lock); - /* to keep the code simple, don't do IDR-like allocation for address ID, - * just bail when we exceed limits - */ - if (pernet->next_id == MPTCP_PM_MAX_ADDR_ID) - pernet->next_id = 1; - if (pernet->addrs >= MPTCP_PM_ADDR_MAX) { - ret = -ERANGE; - goto out; - } - if (test_bit(entry->addr.id, pernet->id_bitmap)) { - ret = -EBUSY; - goto out; - } - - /* do not insert duplicate address, differentiate on port only - * singled addresses - */ - if (!address_use_port(entry)) - entry->addr.port = 0; - list_for_each_entry(cur, &pernet->local_addr_list, list) { - if (mptcp_addresses_equal(&cur->addr, &entry->addr, - cur->addr.port || entry->addr.port)) { - /* allow replacing the exiting endpoint only if such - * endpoint is an implicit one and the user-space - * did not provide an endpoint id - */ - if (!(cur->flags & MPTCP_PM_ADDR_FLAG_IMPLICIT)) { - ret = -EEXIST; - goto out; - } - if (entry->addr.id) - goto out; - - /* allow callers that only need to look up the local - * addr's id to skip replacement. This allows them to - * avoid calling synchronize_rcu in the packet recv - * path. - */ - if (!replace) { - kfree(entry); - ret = cur->addr.id; - goto out; - } - - pernet->addrs--; - entry->addr.id = cur->addr.id; - list_del_rcu(&cur->list); - del_entry = cur; - break; - } - } - - if (!entry->addr.id && needs_id) { -find_next: - entry->addr.id = find_next_zero_bit(pernet->id_bitmap, - MPTCP_PM_MAX_ADDR_ID + 1, - pernet->next_id); - if (!entry->addr.id && pernet->next_id != 1) { - pernet->next_id = 1; - goto find_next; - } - } - - if (!entry->addr.id && needs_id) - goto out; - - __set_bit(entry->addr.id, pernet->id_bitmap); - if (entry->addr.id > pernet->next_id) - pernet->next_id = entry->addr.id; - - if (entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL) { - addr_max = pernet->add_addr_signal_max; - WRITE_ONCE(pernet->add_addr_signal_max, addr_max + 1); - } - if (entry->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW) { - addr_max = pernet->local_addr_max; - WRITE_ONCE(pernet->local_addr_max, addr_max + 1); - } - - pernet->addrs++; - if (!entry->addr.port) - list_add_tail_rcu(&entry->list, &pernet->local_addr_list); - else - list_add_rcu(&entry->list, &pernet->local_addr_list); - ret = entry->addr.id; - -out: - spin_unlock_bh(&pernet->lock); - - /* just replaced an existing entry, free it */ - if (del_entry) { - synchronize_rcu(); - __mptcp_pm_release_addr_entry(del_entry); - } - return ret; -} - -static struct lock_class_key mptcp_slock_keys[2]; -static struct lock_class_key mptcp_keys[2]; - -static int mptcp_pm_nl_create_listen_socket(struct sock *sk, - struct mptcp_pm_addr_entry *entry) -{ - bool is_ipv6 = sk->sk_family == AF_INET6; - int addrlen = sizeof(struct sockaddr_in); - struct sockaddr_storage addr; - struct sock *newsk, *ssk; - int backlog = 1024; - int err; - - err = sock_create_kern(sock_net(sk), entry->addr.family, - SOCK_STREAM, IPPROTO_MPTCP, &entry->lsk); - if (err) - return err; - - newsk = entry->lsk->sk; - if (!newsk) - return -EINVAL; - - /* The subflow socket lock is acquired in a nested to the msk one - * in several places, even by the TCP stack, and this msk is a kernel - * socket: lockdep complains. Instead of propagating the _nested - * modifiers in several places, re-init the lock class for the msk - * socket to an mptcp specific one. - */ - sock_lock_init_class_and_name(newsk, - is_ipv6 ? "mlock-AF_INET6" : "mlock-AF_INET", - &mptcp_slock_keys[is_ipv6], - is_ipv6 ? "msk_lock-AF_INET6" : "msk_lock-AF_INET", - &mptcp_keys[is_ipv6]); - - lock_sock(newsk); - ssk = __mptcp_nmpc_sk(mptcp_sk(newsk)); - release_sock(newsk); - if (IS_ERR(ssk)) - return PTR_ERR(ssk); - - mptcp_info2sockaddr(&entry->addr, &addr, entry->addr.family); -#if IS_ENABLED(CONFIG_MPTCP_IPV6) - if (entry->addr.family == AF_INET6) - addrlen = sizeof(struct sockaddr_in6); -#endif - if (ssk->sk_family == AF_INET) - err = inet_bind_sk(ssk, (struct sockaddr *)&addr, addrlen); -#if IS_ENABLED(CONFIG_MPTCP_IPV6) - else if (ssk->sk_family == AF_INET6) - err = inet6_bind_sk(ssk, (struct sockaddr *)&addr, addrlen); -#endif - if (err) - return err; - - /* We don't use mptcp_set_state() here because it needs to be called - * under the msk socket lock. For the moment, that will not bring - * anything more than only calling inet_sk_state_store(), because the - * old status is known (TCP_CLOSE). - */ - inet_sk_state_store(newsk, TCP_LISTEN); - lock_sock(ssk); - WRITE_ONCE(mptcp_subflow_ctx(ssk)->pm_listener, true); - err = __inet_listen_sk(ssk, backlog); - if (!err) - mptcp_event_pm_listener(ssk, MPTCP_EVENT_LISTENER_CREATED); - release_sock(ssk); - return err; -} - -int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc) -{ - struct mptcp_pm_addr_entry *entry; - struct pm_nl_pernet *pernet; - int ret; - - pernet = pm_nl_get_pernet_from_msk(msk); - - rcu_read_lock(); - entry = __lookup_addr(pernet, skc); - ret = entry ? entry->addr.id : -1; - rcu_read_unlock(); - if (ret >= 0) - return ret; - - /* address not found, add to local list */ - entry = kmalloc(sizeof(*entry), GFP_ATOMIC); - if (!entry) - return -ENOMEM; - - entry->addr = *skc; - entry->addr.id = 0; - entry->addr.port = 0; - entry->ifindex = 0; - entry->flags = MPTCP_PM_ADDR_FLAG_IMPLICIT; - entry->lsk = NULL; - ret = mptcp_pm_nl_append_new_local_addr(pernet, entry, true, false); - if (ret < 0) - kfree(entry); - - return ret; -} - -bool mptcp_pm_nl_is_backup(struct mptcp_sock *msk, struct mptcp_addr_info *skc) -{ - struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk); - struct mptcp_pm_addr_entry *entry; - bool backup; - - rcu_read_lock(); - entry = __lookup_addr(pernet, skc); - backup = entry && !!(entry->flags & MPTCP_PM_ADDR_FLAG_BACKUP); - rcu_read_unlock(); - - return backup; -} - #define MPTCP_PM_CMD_GRP_OFFSET 0 #define MPTCP_PM_EV_GRP_OFFSET 1 @@ -1207,43 +19,6 @@ static const struct genl_multicast_group mptcp_pm_mcgrps[] = { }, }; -void mptcp_pm_nl_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk) -{ - struct mptcp_subflow_context *iter, *subflow = mptcp_subflow_ctx(ssk); - struct sock *sk = (struct sock *)msk; - unsigned int active_max_loss_cnt; - struct net *net = sock_net(sk); - unsigned int stale_loss_cnt; - bool slow; - - stale_loss_cnt = mptcp_stale_loss_cnt(net); - if (subflow->stale || !stale_loss_cnt || subflow->stale_count <= stale_loss_cnt) - return; - - /* look for another available subflow not in loss state */ - active_max_loss_cnt = max_t(int, stale_loss_cnt - 1, 1); - mptcp_for_each_subflow(msk, iter) { - if (iter != subflow && mptcp_subflow_active(iter) && - iter->stale_count < active_max_loss_cnt) { - /* we have some alternatives, try to mark this subflow as idle ...*/ - slow = lock_sock_fast(ssk); - if (!tcp_rtx_and_write_queues_empty(ssk)) { - subflow->stale = 1; - __mptcp_retransmit_pending_data(sk); - MPTCP_INC_STATS(net, MPTCP_MIB_SUBFLOWSTALE); - } - unlock_sock_fast(ssk, slow); - - /* always try to push the pending data regardless of re-injections: - * we can possibly use backup subflows now, and subflow selection - * is cheap under the msk socket lock - */ - __mptcp_push_pending(sk, 0); - return; - } - } -} - static int mptcp_pm_family_to_addr(int family) { #if IS_ENABLED(CONFIG_MPTCP_IPV6) @@ -1352,386 +127,8 @@ int mptcp_pm_parse_entry(struct nlattr *attr, struct genl_info *info, return 0; } -static struct pm_nl_pernet *genl_info_pm_nl(struct genl_info *info) -{ - return pm_nl_get_pernet(genl_info_net(info)); -} - -static int mptcp_nl_add_subflow_or_signal_addr(struct net *net, - struct mptcp_addr_info *addr) -{ - struct mptcp_sock *msk; - long s_slot = 0, s_num = 0; - - while ((msk = mptcp_token_iter_next(net, &s_slot, &s_num)) != NULL) { - struct sock *sk = (struct sock *)msk; - struct mptcp_addr_info mpc_addr; - - if (!READ_ONCE(msk->fully_established) || - mptcp_pm_is_userspace(msk)) - goto next; - - /* if the endp linked to the init sf is re-added with a != ID */ - mptcp_local_address((struct sock_common *)msk, &mpc_addr); - - lock_sock(sk); - spin_lock_bh(&msk->pm.lock); - if (mptcp_addresses_equal(addr, &mpc_addr, addr->port)) - msk->mpc_endpoint_id = addr->id; - mptcp_pm_create_subflow_or_signal_addr(msk); - spin_unlock_bh(&msk->pm.lock); - release_sock(sk); - -next: - sock_put(sk); - cond_resched(); - } - - return 0; -} - -static bool mptcp_pm_has_addr_attr_id(const struct nlattr *attr, - struct genl_info *info) -{ - struct nlattr *tb[MPTCP_PM_ADDR_ATTR_MAX + 1]; - - if (!nla_parse_nested_deprecated(tb, MPTCP_PM_ADDR_ATTR_MAX, attr, - mptcp_pm_address_nl_policy, info->extack) && - tb[MPTCP_PM_ADDR_ATTR_ID]) - return true; - return false; -} - -int mptcp_pm_nl_add_addr_doit(struct sk_buff *skb, struct genl_info *info) -{ - struct nlattr *attr = info->attrs[MPTCP_PM_ENDPOINT_ADDR]; - struct pm_nl_pernet *pernet = genl_info_pm_nl(info); - struct mptcp_pm_addr_entry addr, *entry; - int ret; - - ret = mptcp_pm_parse_entry(attr, info, true, &addr); - if (ret < 0) - return ret; - - if (addr.addr.port && !address_use_port(&addr)) { - GENL_SET_ERR_MSG(info, "flags must have signal and not subflow when using port"); - return -EINVAL; - } - - if (addr.flags & MPTCP_PM_ADDR_FLAG_SIGNAL && - addr.flags & MPTCP_PM_ADDR_FLAG_FULLMESH) { - GENL_SET_ERR_MSG(info, "flags mustn't have both signal and fullmesh"); - return -EINVAL; - } - - if (addr.flags & MPTCP_PM_ADDR_FLAG_IMPLICIT) { - GENL_SET_ERR_MSG(info, "can't create IMPLICIT endpoint"); - return -EINVAL; - } - - entry = kzalloc(sizeof(*entry), GFP_KERNEL_ACCOUNT); - if (!entry) { - GENL_SET_ERR_MSG(info, "can't allocate addr"); - return -ENOMEM; - } - - *entry = addr; - if (entry->addr.port) { - ret = mptcp_pm_nl_create_listen_socket(skb->sk, entry); - if (ret) { - GENL_SET_ERR_MSG_FMT(info, "create listen socket error: %d", ret); - goto out_free; - } - } - ret = mptcp_pm_nl_append_new_local_addr(pernet, entry, - !mptcp_pm_has_addr_attr_id(attr, info), - true); - if (ret < 0) { - GENL_SET_ERR_MSG_FMT(info, "too many addresses or duplicate one: %d", ret); - goto out_free; - } - - mptcp_nl_add_subflow_or_signal_addr(sock_net(skb->sk), &entry->addr); - return 0; - -out_free: - __mptcp_pm_release_addr_entry(entry); - return ret; -} - -bool mptcp_remove_anno_list_by_saddr(struct mptcp_sock *msk, - const struct mptcp_addr_info *addr) -{ - struct mptcp_pm_add_entry *entry; - - entry = mptcp_pm_del_add_timer(msk, addr, false); - if (entry) { - kfree(entry); - return true; - } - - return false; -} - -static u8 mptcp_endp_get_local_id(struct mptcp_sock *msk, - const struct mptcp_addr_info *addr) -{ - return msk->mpc_endpoint_id == addr->id ? 0 : addr->id; -} - -static bool mptcp_pm_remove_anno_addr(struct mptcp_sock *msk, - const struct mptcp_addr_info *addr, - bool force) -{ - struct mptcp_rm_list list = { .nr = 0 }; - bool ret; - - list.ids[list.nr++] = mptcp_endp_get_local_id(msk, addr); - - ret = mptcp_remove_anno_list_by_saddr(msk, addr); - if (ret || force) { - spin_lock_bh(&msk->pm.lock); - if (ret) { - __set_bit(addr->id, msk->pm.id_avail_bitmap); - msk->pm.add_addr_signaled--; - } - mptcp_pm_remove_addr(msk, &list); - spin_unlock_bh(&msk->pm.lock); - } - return ret; -} - -static void __mark_subflow_endp_available(struct mptcp_sock *msk, u8 id) -{ - /* If it was marked as used, and not ID 0, decrement local_addr_used */ - if (!__test_and_set_bit(id ? : msk->mpc_endpoint_id, msk->pm.id_avail_bitmap) && - id && !WARN_ON_ONCE(msk->pm.local_addr_used == 0)) - msk->pm.local_addr_used--; -} - -static int mptcp_nl_remove_subflow_and_signal_addr(struct net *net, - const struct mptcp_pm_addr_entry *entry) -{ - const struct mptcp_addr_info *addr = &entry->addr; - struct mptcp_rm_list list = { .nr = 1 }; - long s_slot = 0, s_num = 0; - struct mptcp_sock *msk; - - pr_debug("remove_id=%d\n", addr->id); - - while ((msk = mptcp_token_iter_next(net, &s_slot, &s_num)) != NULL) { - struct sock *sk = (struct sock *)msk; - bool remove_subflow; - - if (mptcp_pm_is_userspace(msk)) - goto next; - - lock_sock(sk); - remove_subflow = mptcp_lookup_subflow_by_saddr(&msk->conn_list, addr); - mptcp_pm_remove_anno_addr(msk, addr, remove_subflow && - !(entry->flags & MPTCP_PM_ADDR_FLAG_IMPLICIT)); - - list.ids[0] = mptcp_endp_get_local_id(msk, addr); - if (remove_subflow) { - spin_lock_bh(&msk->pm.lock); - mptcp_pm_nl_rm_subflow_received(msk, &list); - spin_unlock_bh(&msk->pm.lock); - } - - if (entry->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW) { - spin_lock_bh(&msk->pm.lock); - __mark_subflow_endp_available(msk, list.ids[0]); - spin_unlock_bh(&msk->pm.lock); - } - - if (msk->mpc_endpoint_id == entry->addr.id) - msk->mpc_endpoint_id = 0; - release_sock(sk); - -next: - sock_put(sk); - cond_resched(); - } - - return 0; -} - -static int mptcp_nl_remove_id_zero_address(struct net *net, - struct mptcp_addr_info *addr) -{ - struct mptcp_rm_list list = { .nr = 0 }; - long s_slot = 0, s_num = 0; - struct mptcp_sock *msk; - - list.ids[list.nr++] = 0; - - while ((msk = mptcp_token_iter_next(net, &s_slot, &s_num)) != NULL) { - struct sock *sk = (struct sock *)msk; - struct mptcp_addr_info msk_local; - - if (list_empty(&msk->conn_list) || mptcp_pm_is_userspace(msk)) - goto next; - - mptcp_local_address((struct sock_common *)msk, &msk_local); - if (!mptcp_addresses_equal(&msk_local, addr, addr->port)) - goto next; - - lock_sock(sk); - spin_lock_bh(&msk->pm.lock); - mptcp_pm_remove_addr(msk, &list); - mptcp_pm_nl_rm_subflow_received(msk, &list); - __mark_subflow_endp_available(msk, 0); - spin_unlock_bh(&msk->pm.lock); - release_sock(sk); - -next: - sock_put(sk); - cond_resched(); - } - - return 0; -} - -int mptcp_pm_nl_del_addr_doit(struct sk_buff *skb, struct genl_info *info) -{ - struct nlattr *attr = info->attrs[MPTCP_PM_ENDPOINT_ADDR]; - struct pm_nl_pernet *pernet = genl_info_pm_nl(info); - struct mptcp_pm_addr_entry addr, *entry; - unsigned int addr_max; - int ret; - - ret = mptcp_pm_parse_entry(attr, info, false, &addr); - if (ret < 0) - return ret; - - /* the zero id address is special: the first address used by the msk - * always gets such an id, so different subflows can have different zero - * id addresses. Additionally zero id is not accounted for in id_bitmap. - * Let's use an 'mptcp_rm_list' instead of the common remove code. - */ - if (addr.addr.id == 0) - return mptcp_nl_remove_id_zero_address(sock_net(skb->sk), &addr.addr); - - spin_lock_bh(&pernet->lock); - entry = __lookup_addr_by_id(pernet, addr.addr.id); - if (!entry) { - GENL_SET_ERR_MSG(info, "address not found"); - spin_unlock_bh(&pernet->lock); - return -EINVAL; - } - if (entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL) { - addr_max = pernet->add_addr_signal_max; - WRITE_ONCE(pernet->add_addr_signal_max, addr_max - 1); - } - if (entry->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW) { - addr_max = pernet->local_addr_max; - WRITE_ONCE(pernet->local_addr_max, addr_max - 1); - } - - pernet->addrs--; - list_del_rcu(&entry->list); - __clear_bit(entry->addr.id, pernet->id_bitmap); - spin_unlock_bh(&pernet->lock); - - mptcp_nl_remove_subflow_and_signal_addr(sock_net(skb->sk), entry); - synchronize_rcu(); - __mptcp_pm_release_addr_entry(entry); - - return ret; -} - -static void mptcp_pm_flush_addrs_and_subflows(struct mptcp_sock *msk, - struct list_head *rm_list) -{ - struct mptcp_rm_list alist = { .nr = 0 }, slist = { .nr = 0 }; - struct mptcp_pm_addr_entry *entry; - - list_for_each_entry(entry, rm_list, list) { - if (slist.nr < MPTCP_RM_IDS_MAX && - mptcp_lookup_subflow_by_saddr(&msk->conn_list, &entry->addr)) - slist.ids[slist.nr++] = mptcp_endp_get_local_id(msk, &entry->addr); - - if (alist.nr < MPTCP_RM_IDS_MAX && - mptcp_remove_anno_list_by_saddr(msk, &entry->addr)) - alist.ids[alist.nr++] = mptcp_endp_get_local_id(msk, &entry->addr); - } - - spin_lock_bh(&msk->pm.lock); - if (alist.nr) { - msk->pm.add_addr_signaled -= alist.nr; - mptcp_pm_remove_addr(msk, &alist); - } - if (slist.nr) - mptcp_pm_nl_rm_subflow_received(msk, &slist); - /* Reset counters: maybe some subflows have been removed before */ - bitmap_fill(msk->pm.id_avail_bitmap, MPTCP_PM_MAX_ADDR_ID + 1); - msk->pm.local_addr_used = 0; - spin_unlock_bh(&msk->pm.lock); -} - -static void mptcp_nl_flush_addrs_list(struct net *net, - struct list_head *rm_list) -{ - long s_slot = 0, s_num = 0; - struct mptcp_sock *msk; - - if (list_empty(rm_list)) - return; - - while ((msk = mptcp_token_iter_next(net, &s_slot, &s_num)) != NULL) { - struct sock *sk = (struct sock *)msk; - - if (!mptcp_pm_is_userspace(msk)) { - lock_sock(sk); - mptcp_pm_flush_addrs_and_subflows(msk, rm_list); - release_sock(sk); - } - - sock_put(sk); - cond_resched(); - } -} - -/* caller must ensure the RCU grace period is already elapsed */ -static void __flush_addrs(struct list_head *list) -{ - while (!list_empty(list)) { - struct mptcp_pm_addr_entry *cur; - - cur = list_entry(list->next, - struct mptcp_pm_addr_entry, list); - list_del_rcu(&cur->list); - __mptcp_pm_release_addr_entry(cur); - } -} - -static void __reset_counters(struct pm_nl_pernet *pernet) -{ - WRITE_ONCE(pernet->add_addr_signal_max, 0); - WRITE_ONCE(pernet->add_addr_accept_max, 0); - WRITE_ONCE(pernet->local_addr_max, 0); - pernet->addrs = 0; -} - -int mptcp_pm_nl_flush_addrs_doit(struct sk_buff *skb, struct genl_info *info) -{ - struct pm_nl_pernet *pernet = genl_info_pm_nl(info); - LIST_HEAD(free_list); - - spin_lock_bh(&pernet->lock); - list_splice_init(&pernet->local_addr_list, &free_list); - __reset_counters(pernet); - pernet->next_id = 1; - bitmap_zero(pernet->id_bitmap, MPTCP_PM_MAX_ADDR_ID + 1); - spin_unlock_bh(&pernet->lock); - mptcp_nl_flush_addrs_list(sock_net(skb->sk), &free_list); - synchronize_rcu(); - __flush_addrs(&free_list); - return 0; -} - -int mptcp_nl_fill_addr(struct sk_buff *skb, - struct mptcp_pm_addr_entry *entry) +static int mptcp_nl_fill_addr(struct sk_buff *skb, + struct mptcp_pm_addr_entry *entry) { struct mptcp_addr_info *addr = &entry->addr; struct nlattr *attr; @@ -1769,15 +166,26 @@ nla_put_failure: return -EMSGSIZE; } -int mptcp_pm_nl_get_addr(struct sk_buff *skb, struct genl_info *info) +static int mptcp_pm_get_addr(u8 id, struct mptcp_pm_addr_entry *addr, + struct genl_info *info) { - struct nlattr *attr = info->attrs[MPTCP_PM_ENDPOINT_ADDR]; - struct pm_nl_pernet *pernet = genl_info_pm_nl(info); - struct mptcp_pm_addr_entry addr, *entry; + if (info->attrs[MPTCP_PM_ATTR_TOKEN]) + return mptcp_userspace_pm_get_addr(id, addr, info); + return mptcp_pm_nl_get_addr(id, addr, info); +} + +int mptcp_pm_nl_get_addr_doit(struct sk_buff *skb, struct genl_info *info) +{ + struct mptcp_pm_addr_entry addr; + struct nlattr *attr; struct sk_buff *msg; void *reply; int ret; + if (GENL_REQ_ATTR_CHECK(info, MPTCP_PM_ENDPOINT_ADDR)) + return -EINVAL; + + attr = info->attrs[MPTCP_PM_ENDPOINT_ADDR]; ret = mptcp_pm_parse_entry(attr, info, false, &addr); if (ret < 0) return ret; @@ -1794,258 +202,83 @@ int mptcp_pm_nl_get_addr(struct sk_buff *skb, struct genl_info *info) goto fail; } - rcu_read_lock(); - entry = __lookup_addr_by_id(pernet, addr.addr.id); - if (!entry) { - GENL_SET_ERR_MSG(info, "address not found"); - ret = -EINVAL; - goto unlock_fail; + ret = mptcp_pm_get_addr(addr.addr.id, &addr, info); + if (ret) { + NL_SET_ERR_MSG_ATTR(info->extack, attr, "address not found"); + goto fail; } - ret = mptcp_nl_fill_addr(msg, entry); + ret = mptcp_nl_fill_addr(msg, &addr); if (ret) - goto unlock_fail; + goto fail; genlmsg_end(msg, reply); ret = genlmsg_reply(msg, info); - rcu_read_unlock(); return ret; -unlock_fail: - rcu_read_unlock(); - fail: nlmsg_free(msg); return ret; } -int mptcp_pm_nl_get_addr_doit(struct sk_buff *skb, struct genl_info *info) +int mptcp_pm_genl_fill_addr(struct sk_buff *msg, + struct netlink_callback *cb, + struct mptcp_pm_addr_entry *entry) { - return mptcp_pm_get_addr(skb, info); -} - -int mptcp_pm_nl_dump_addr(struct sk_buff *msg, - struct netlink_callback *cb) -{ - struct net *net = sock_net(msg->sk); - struct mptcp_pm_addr_entry *entry; - struct pm_nl_pernet *pernet; - int id = cb->args[0]; void *hdr; - int i; - - pernet = pm_nl_get_pernet(net); - - rcu_read_lock(); - for (i = id; i < MPTCP_PM_MAX_ADDR_ID + 1; i++) { - if (test_bit(i, pernet->id_bitmap)) { - entry = __lookup_addr_by_id(pernet, i); - if (!entry) - break; - - if (entry->addr.id <= id) - continue; - - hdr = genlmsg_put(msg, NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, &mptcp_genl_family, - NLM_F_MULTI, MPTCP_PM_CMD_GET_ADDR); - if (!hdr) - break; - - if (mptcp_nl_fill_addr(msg, entry) < 0) { - genlmsg_cancel(msg, hdr); - break; - } - - id = entry->addr.id; - genlmsg_end(msg, hdr); - } - } - rcu_read_unlock(); - - cb->args[0] = id; - return msg->len; -} - -int mptcp_pm_nl_get_addr_dumpit(struct sk_buff *msg, - struct netlink_callback *cb) -{ - return mptcp_pm_dump_addr(msg, cb); -} -static int parse_limit(struct genl_info *info, int id, unsigned int *limit) -{ - struct nlattr *attr = info->attrs[id]; - - if (!attr) - return 0; + hdr = genlmsg_put(msg, NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, &mptcp_genl_family, + NLM_F_MULTI, MPTCP_PM_CMD_GET_ADDR); + if (!hdr) + return -EINVAL; - *limit = nla_get_u32(attr); - if (*limit > MPTCP_PM_ADDR_MAX) { - GENL_SET_ERR_MSG(info, "limit greater than maximum"); + if (mptcp_nl_fill_addr(msg, entry) < 0) { + genlmsg_cancel(msg, hdr); return -EINVAL; } - return 0; -} - -int mptcp_pm_nl_set_limits_doit(struct sk_buff *skb, struct genl_info *info) -{ - struct pm_nl_pernet *pernet = genl_info_pm_nl(info); - unsigned int rcv_addrs, subflows; - int ret; - - spin_lock_bh(&pernet->lock); - rcv_addrs = pernet->add_addr_accept_max; - ret = parse_limit(info, MPTCP_PM_ATTR_RCV_ADD_ADDRS, &rcv_addrs); - if (ret) - goto unlock; - - subflows = pernet->subflows_max; - ret = parse_limit(info, MPTCP_PM_ATTR_SUBFLOWS, &subflows); - if (ret) - goto unlock; - - WRITE_ONCE(pernet->add_addr_accept_max, rcv_addrs); - WRITE_ONCE(pernet->subflows_max, subflows); -unlock: - spin_unlock_bh(&pernet->lock); - return ret; + genlmsg_end(msg, hdr); + return 0; } -int mptcp_pm_nl_get_limits_doit(struct sk_buff *skb, struct genl_info *info) +static int mptcp_pm_dump_addr(struct sk_buff *msg, struct netlink_callback *cb) { - struct pm_nl_pernet *pernet = genl_info_pm_nl(info); - struct sk_buff *msg; - void *reply; - - msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); - if (!msg) - return -ENOMEM; + const struct genl_info *info = genl_info_dump(cb); - reply = genlmsg_put_reply(msg, info, &mptcp_genl_family, 0, - MPTCP_PM_CMD_GET_LIMITS); - if (!reply) - goto fail; - - if (nla_put_u32(msg, MPTCP_PM_ATTR_RCV_ADD_ADDRS, - READ_ONCE(pernet->add_addr_accept_max))) - goto fail; - - if (nla_put_u32(msg, MPTCP_PM_ATTR_SUBFLOWS, - READ_ONCE(pernet->subflows_max))) - goto fail; - - genlmsg_end(msg, reply); - return genlmsg_reply(msg, info); - -fail: - GENL_SET_ERR_MSG(info, "not enough space in Netlink message"); - nlmsg_free(msg); - return -EMSGSIZE; + if (info->attrs[MPTCP_PM_ATTR_TOKEN]) + return mptcp_userspace_pm_dump_addr(msg, cb); + return mptcp_pm_nl_dump_addr(msg, cb); } -static void mptcp_pm_nl_fullmesh(struct mptcp_sock *msk, - struct mptcp_addr_info *addr) +int mptcp_pm_nl_get_addr_dumpit(struct sk_buff *msg, + struct netlink_callback *cb) { - struct mptcp_rm_list list = { .nr = 0 }; - - list.ids[list.nr++] = mptcp_endp_get_local_id(msk, addr); - - spin_lock_bh(&msk->pm.lock); - mptcp_pm_nl_rm_subflow_received(msk, &list); - __mark_subflow_endp_available(msk, list.ids[0]); - mptcp_pm_create_subflow_or_signal_addr(msk); - spin_unlock_bh(&msk->pm.lock); + return mptcp_pm_dump_addr(msg, cb); } -static int mptcp_nl_set_flags(struct net *net, - struct mptcp_addr_info *addr, - u8 bkup, u8 changed) +static int mptcp_pm_set_flags(struct genl_info *info) { - long s_slot = 0, s_num = 0; - struct mptcp_sock *msk; + struct mptcp_pm_addr_entry loc = { .addr = { .family = AF_UNSPEC }, }; + struct nlattr *attr_loc; int ret = -EINVAL; - while ((msk = mptcp_token_iter_next(net, &s_slot, &s_num)) != NULL) { - struct sock *sk = (struct sock *)msk; - - if (list_empty(&msk->conn_list) || mptcp_pm_is_userspace(msk)) - goto next; - - lock_sock(sk); - if (changed & MPTCP_PM_ADDR_FLAG_BACKUP) - ret = mptcp_pm_nl_mp_prio_send_ack(msk, addr, NULL, bkup); - if (changed & MPTCP_PM_ADDR_FLAG_FULLMESH) - mptcp_pm_nl_fullmesh(msk, addr); - release_sock(sk); - -next: - sock_put(sk); - cond_resched(); - } - - return ret; -} - -int mptcp_pm_nl_set_flags(struct sk_buff *skb, struct genl_info *info) -{ - struct mptcp_pm_addr_entry addr = { .addr = { .family = AF_UNSPEC }, }; - struct nlattr *attr = info->attrs[MPTCP_PM_ATTR_ADDR]; - u8 changed, mask = MPTCP_PM_ADDR_FLAG_BACKUP | - MPTCP_PM_ADDR_FLAG_FULLMESH; - struct net *net = sock_net(skb->sk); - struct mptcp_pm_addr_entry *entry; - struct pm_nl_pernet *pernet; - u8 lookup_by_id = 0; - u8 bkup = 0; - int ret; - - pernet = pm_nl_get_pernet(net); + if (GENL_REQ_ATTR_CHECK(info, MPTCP_PM_ATTR_ADDR)) + return ret; - ret = mptcp_pm_parse_entry(attr, info, false, &addr); + attr_loc = info->attrs[MPTCP_PM_ATTR_ADDR]; + ret = mptcp_pm_parse_entry(attr_loc, info, false, &loc); if (ret < 0) return ret; - if (addr.addr.family == AF_UNSPEC) { - lookup_by_id = 1; - if (!addr.addr.id) { - GENL_SET_ERR_MSG(info, "missing required inputs"); - return -EOPNOTSUPP; - } - } - - if (addr.flags & MPTCP_PM_ADDR_FLAG_BACKUP) - bkup = 1; - - spin_lock_bh(&pernet->lock); - entry = lookup_by_id ? __lookup_addr_by_id(pernet, addr.addr.id) : - __lookup_addr(pernet, &addr.addr); - if (!entry) { - spin_unlock_bh(&pernet->lock); - GENL_SET_ERR_MSG(info, "address not found"); - return -EINVAL; - } - if ((addr.flags & MPTCP_PM_ADDR_FLAG_FULLMESH) && - (entry->flags & (MPTCP_PM_ADDR_FLAG_SIGNAL | - MPTCP_PM_ADDR_FLAG_IMPLICIT))) { - spin_unlock_bh(&pernet->lock); - GENL_SET_ERR_MSG(info, "invalid addr flags"); - return -EINVAL; - } - - changed = (addr.flags ^ entry->flags) & mask; - entry->flags = (entry->flags & ~mask) | (addr.flags & mask); - addr = *entry; - spin_unlock_bh(&pernet->lock); - - mptcp_nl_set_flags(net, &addr.addr, bkup, changed); - return 0; + if (info->attrs[MPTCP_PM_ATTR_TOKEN]) + return mptcp_userspace_pm_set_flags(&loc, info); + return mptcp_pm_nl_set_flags(&loc, info); } int mptcp_pm_nl_set_flags_doit(struct sk_buff *skb, struct genl_info *info) { - return mptcp_pm_set_flags(skb, info); + return mptcp_pm_set_flags(info); } static void mptcp_nl_mcast_send(struct net *net, struct sk_buff *nlskb, gfp_t gfp) @@ -2078,9 +311,7 @@ static int mptcp_event_add_subflow(struct sk_buff *skb, const struct sock *ssk) break; #if IS_ENABLED(CONFIG_MPTCP_IPV6) case AF_INET6: { - const struct ipv6_pinfo *np = inet6_sk(ssk); - - if (nla_put_in6_addr(skb, MPTCP_ATTR_SADDR6, &np->saddr)) + if (nla_put_in6_addr(skb, MPTCP_ATTR_SADDR6, &issk->pinet6->saddr)) return -EMSGSIZE; if (nla_put_in6_addr(skb, MPTCP_ATTR_DADDR6, &ssk->sk_v6_daddr)) return -EMSGSIZE; @@ -2307,9 +538,7 @@ void mptcp_event_pm_listener(const struct sock *ssk, break; #if IS_ENABLED(CONFIG_MPTCP_IPV6) case AF_INET6: { - const struct ipv6_pinfo *np = inet6_sk(ssk); - - if (nla_put_in6_addr(skb, MPTCP_ATTR_SADDR6, &np->saddr)) + if (nla_put_in6_addr(skb, MPTCP_ATTR_SADDR6, &issk->pinet6->saddr)) goto nla_put_failure; break; } @@ -2397,52 +626,8 @@ struct genl_family mptcp_genl_family __ro_after_init = { .n_mcgrps = ARRAY_SIZE(mptcp_pm_mcgrps), }; -static int __net_init pm_nl_init_net(struct net *net) -{ - struct pm_nl_pernet *pernet = pm_nl_get_pernet(net); - - INIT_LIST_HEAD_RCU(&pernet->local_addr_list); - - /* Cit. 2 subflows ought to be enough for anybody. */ - pernet->subflows_max = 2; - pernet->next_id = 1; - pernet->stale_loss_cnt = 4; - spin_lock_init(&pernet->lock); - - /* No need to initialize other pernet fields, the struct is zeroed at - * allocation time. - */ - - return 0; -} - -static void __net_exit pm_nl_exit_net(struct list_head *net_list) -{ - struct net *net; - - list_for_each_entry(net, net_list, exit_list) { - struct pm_nl_pernet *pernet = pm_nl_get_pernet(net); - - /* net is removed from namespace list, can't race with - * other modifiers, also netns core already waited for a - * RCU grace period. - */ - __flush_addrs(&pernet->local_addr_list); - } -} - -static struct pernet_operations mptcp_pm_pernet_ops = { - .init = pm_nl_init_net, - .exit_batch = pm_nl_exit_net, - .id = &pm_nl_pernet_id, - .size = sizeof(struct pm_nl_pernet), -}; - void __init mptcp_pm_nl_init(void) { - if (register_pernet_subsys(&mptcp_pm_pernet_ops) < 0) - panic("Failed to register MPTCP PM pernet subsystem.\n"); - if (genl_register_family(&mptcp_genl_family)) panic("Failed to register MPTCP PM netlink family\n"); } diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c index a3d477059b11..2cb62f026b1f 100644 --- a/net/mptcp/pm_userspace.c +++ b/net/mptcp/pm_userspace.c @@ -12,15 +12,12 @@ list_for_each_entry(__entry, \ &((__msk)->pm.userspace_pm_local_addr_list), list) -void mptcp_free_local_addr_list(struct mptcp_sock *msk) +void mptcp_userspace_pm_free_local_addr_list(struct mptcp_sock *msk) { struct mptcp_pm_addr_entry *entry, *tmp; struct sock *sk = (struct sock *)msk; LIST_HEAD(free_list); - if (!mptcp_pm_is_userspace(msk)) - return; - spin_lock_bh(&msk->pm.lock); list_splice_init(&msk->pm.userspace_pm_local_addr_list, &free_list); spin_unlock_bh(&msk->pm.lock); @@ -48,7 +45,6 @@ static int mptcp_userspace_pm_append_new_local_addr(struct mptcp_sock *msk, bool needs_id) { DECLARE_BITMAP(id_bitmap, MPTCP_PM_MAX_ADDR_ID + 1); - struct mptcp_pm_addr_entry *match = NULL; struct sock *sk = (struct sock *)msk; struct mptcp_pm_addr_entry *e; bool addr_match = false; @@ -63,26 +59,21 @@ static int mptcp_userspace_pm_append_new_local_addr(struct mptcp_sock *msk, if (addr_match && entry->addr.id == 0 && needs_id) entry->addr.id = e->addr.id; id_match = (e->addr.id == entry->addr.id); - if (addr_match && id_match) { - match = e; - break; - } else if (addr_match || id_match) { + if (addr_match || id_match) break; - } __set_bit(e->addr.id, id_bitmap); } - if (!match && !addr_match && !id_match) { + if (!addr_match && !id_match) { /* Memory for the entry is allocated from the * sock option buffer. */ - e = sock_kmalloc(sk, sizeof(*e), GFP_ATOMIC); + e = sock_kmemdup(sk, entry, sizeof(*entry), GFP_ATOMIC); if (!e) { ret = -ENOMEM; goto append_err; } - *e = *entry; if (!e->addr.id && needs_id) e->addr.id = find_next_zero_bit(id_bitmap, MPTCP_PM_MAX_ADDR_ID + 1, @@ -90,7 +81,7 @@ static int mptcp_userspace_pm_append_new_local_addr(struct mptcp_sock *msk, list_add_tail_rcu(&e->list, &msk->pm.userspace_pm_local_addr_list); msk->pm.local_addr_used++; ret = e->addr.id; - } else if (match) { + } else if (addr_match && id_match) { ret = entry->addr.id; } @@ -136,27 +127,22 @@ mptcp_userspace_pm_lookup_addr_by_id(struct mptcp_sock *msk, unsigned int id) } int mptcp_userspace_pm_get_local_id(struct mptcp_sock *msk, - struct mptcp_addr_info *skc) + struct mptcp_pm_addr_entry *skc) { - struct mptcp_pm_addr_entry *entry = NULL, new_entry; __be16 msk_sport = ((struct inet_sock *) inet_sk((struct sock *)msk))->inet_sport; + struct mptcp_pm_addr_entry *entry; spin_lock_bh(&msk->pm.lock); - entry = mptcp_userspace_pm_lookup_addr(msk, skc); + entry = mptcp_userspace_pm_lookup_addr(msk, &skc->addr); spin_unlock_bh(&msk->pm.lock); if (entry) return entry->addr.id; - memset(&new_entry, 0, sizeof(struct mptcp_pm_addr_entry)); - new_entry.addr = *skc; - new_entry.addr.id = 0; - new_entry.flags = MPTCP_PM_ADDR_FLAG_IMPLICIT; - - if (new_entry.addr.port == msk_sport) - new_entry.addr.port = 0; + if (skc->addr.port == msk_sport) + skc->addr.port = 0; - return mptcp_userspace_pm_append_new_local_addr(msk, &new_entry, true); + return mptcp_userspace_pm_append_new_local_addr(msk, skc, true); } bool mptcp_userspace_pm_is_backup(struct mptcp_sock *msk, @@ -175,14 +161,13 @@ bool mptcp_userspace_pm_is_backup(struct mptcp_sock *msk, static struct mptcp_sock *mptcp_userspace_pm_get_sock(const struct genl_info *info) { - struct nlattr *token = info->attrs[MPTCP_PM_ATTR_TOKEN]; struct mptcp_sock *msk; + struct nlattr *token; - if (!token) { - GENL_SET_ERR_MSG(info, "missing required token"); + if (GENL_REQ_ATTR_CHECK(info, MPTCP_PM_ATTR_TOKEN)) return NULL; - } + token = info->attrs[MPTCP_PM_ATTR_TOKEN]; msk = mptcp_token_get_sock(genl_info_net(info), nla_get_u32(token)); if (!msk) { NL_SET_ERR_MSG_ATTR(info->extack, token, "invalid token"); @@ -190,7 +175,8 @@ static struct mptcp_sock *mptcp_userspace_pm_get_sock(const struct genl_info *in } if (!mptcp_pm_is_userspace(msk)) { - GENL_SET_ERR_MSG(info, "invalid request; userspace PM not selected"); + NL_SET_ERR_MSG_ATTR(info->extack, token, + "userspace PM not selected"); sock_put((struct sock *)msk); return NULL; } @@ -200,16 +186,14 @@ static struct mptcp_sock *mptcp_userspace_pm_get_sock(const struct genl_info *in int mptcp_pm_nl_announce_doit(struct sk_buff *skb, struct genl_info *info) { - struct nlattr *addr = info->attrs[MPTCP_PM_ATTR_ADDR]; struct mptcp_pm_addr_entry addr_val; struct mptcp_sock *msk; + struct nlattr *addr; int err = -EINVAL; struct sock *sk; - if (!addr) { - GENL_SET_ERR_MSG(info, "missing required address"); + if (GENL_REQ_ATTR_CHECK(info, MPTCP_PM_ATTR_ADDR)) return err; - } msk = mptcp_userspace_pm_get_sock(info); if (!msk) @@ -217,21 +201,27 @@ int mptcp_pm_nl_announce_doit(struct sk_buff *skb, struct genl_info *info) sk = (struct sock *)msk; + addr = info->attrs[MPTCP_PM_ATTR_ADDR]; err = mptcp_pm_parse_entry(addr, info, true, &addr_val); - if (err < 0) { - GENL_SET_ERR_MSG(info, "error parsing local address"); + if (err < 0) + goto announce_err; + + if (addr_val.addr.id == 0) { + NL_SET_ERR_MSG_ATTR(info->extack, addr, "invalid addr id"); + err = -EINVAL; goto announce_err; } - if (addr_val.addr.id == 0 || !(addr_val.flags & MPTCP_PM_ADDR_FLAG_SIGNAL)) { - GENL_SET_ERR_MSG(info, "invalid addr id or flags"); + if (!(addr_val.flags & MPTCP_PM_ADDR_FLAG_SIGNAL)) { + NL_SET_ERR_MSG_ATTR(info->extack, addr, "invalid addr flags"); err = -EINVAL; goto announce_err; } err = mptcp_userspace_pm_append_new_local_addr(msk, &addr_val, false); if (err < 0) { - GENL_SET_ERR_MSG(info, "did not match address and id"); + NL_SET_ERR_MSG_ATTR(info->extack, addr, + "did not match address and id"); goto announce_err; } @@ -241,7 +231,7 @@ int mptcp_pm_nl_announce_doit(struct sk_buff *skb, struct genl_info *info) if (mptcp_pm_alloc_anno_list(msk, &addr_val.addr)) { msk->pm.add_addr_signaled++; mptcp_pm_announce_addr(msk, &addr_val.addr, false); - mptcp_pm_nl_addr_send_ack(msk); + mptcp_pm_addr_send_ack(msk); } spin_unlock_bh(&msk->pm.lock); @@ -253,8 +243,7 @@ int mptcp_pm_nl_announce_doit(struct sk_buff *skb, struct genl_info *info) return err; } -static int mptcp_userspace_pm_remove_id_zero_address(struct mptcp_sock *msk, - struct genl_info *info) +static int mptcp_userspace_pm_remove_id_zero_address(struct mptcp_sock *msk) { struct mptcp_rm_list list = { .nr = 0 }; struct mptcp_subflow_context *subflow; @@ -269,10 +258,8 @@ static int mptcp_userspace_pm_remove_id_zero_address(struct mptcp_sock *msk, break; } } - if (!has_id_0) { - GENL_SET_ERR_MSG(info, "address with id 0 not found"); + if (!has_id_0) goto remove_err; - } list.ids[list.nr++] = 0; @@ -309,18 +296,17 @@ void mptcp_pm_remove_addr_entry(struct mptcp_sock *msk, int mptcp_pm_nl_remove_doit(struct sk_buff *skb, struct genl_info *info) { - struct nlattr *id = info->attrs[MPTCP_PM_ATTR_LOC_ID]; struct mptcp_pm_addr_entry *match; struct mptcp_sock *msk; + struct nlattr *id; int err = -EINVAL; struct sock *sk; u8 id_val; - if (!id) { - GENL_SET_ERR_MSG(info, "missing required ID"); + if (GENL_REQ_ATTR_CHECK(info, MPTCP_PM_ATTR_LOC_ID)) return err; - } + id = info->attrs[MPTCP_PM_ATTR_LOC_ID]; id_val = nla_get_u8(id); msk = mptcp_userspace_pm_get_sock(info); @@ -330,7 +316,7 @@ int mptcp_pm_nl_remove_doit(struct sk_buff *skb, struct genl_info *info) sk = (struct sock *)msk; if (id_val == 0) { - err = mptcp_userspace_pm_remove_id_zero_address(msk, info); + err = mptcp_userspace_pm_remove_id_zero_address(msk); goto out; } @@ -339,7 +325,6 @@ int mptcp_pm_nl_remove_doit(struct sk_buff *skb, struct genl_info *info) spin_lock_bh(&msk->pm.lock); match = mptcp_userspace_pm_lookup_addr_by_id(msk, id_val); if (!match) { - GENL_SET_ERR_MSG(info, "address with specified id not found"); spin_unlock_bh(&msk->pm.lock); release_sock(sk); goto out; @@ -356,25 +341,28 @@ int mptcp_pm_nl_remove_doit(struct sk_buff *skb, struct genl_info *info) err = 0; out: + if (err) + NL_SET_ERR_MSG_ATTR_FMT(info->extack, id, + "address with id %u not found", + id_val); + sock_put(sk); return err; } int mptcp_pm_nl_subflow_create_doit(struct sk_buff *skb, struct genl_info *info) { - struct nlattr *raddr = info->attrs[MPTCP_PM_ATTR_ADDR_REMOTE]; - struct nlattr *laddr = info->attrs[MPTCP_PM_ATTR_ADDR]; struct mptcp_pm_addr_entry entry = { 0 }; struct mptcp_addr_info addr_r; + struct nlattr *raddr, *laddr; struct mptcp_pm_local local; struct mptcp_sock *msk; int err = -EINVAL; struct sock *sk; - if (!laddr || !raddr) { - GENL_SET_ERR_MSG(info, "missing required address(es)"); + if (GENL_REQ_ATTR_CHECK(info, MPTCP_PM_ATTR_ADDR) || + GENL_REQ_ATTR_CHECK(info, MPTCP_PM_ATTR_ADDR_REMOTE)) return err; - } msk = mptcp_userspace_pm_get_sock(info); if (!msk) @@ -382,24 +370,22 @@ int mptcp_pm_nl_subflow_create_doit(struct sk_buff *skb, struct genl_info *info) sk = (struct sock *)msk; + laddr = info->attrs[MPTCP_PM_ATTR_ADDR]; err = mptcp_pm_parse_entry(laddr, info, true, &entry); - if (err < 0) { - NL_SET_ERR_MSG_ATTR(info->extack, laddr, "error parsing local addr"); + if (err < 0) goto create_err; - } if (entry.flags & MPTCP_PM_ADDR_FLAG_SIGNAL) { - GENL_SET_ERR_MSG(info, "invalid addr flags"); + NL_SET_ERR_MSG_ATTR(info->extack, laddr, "invalid addr flags"); err = -EINVAL; goto create_err; } entry.flags |= MPTCP_PM_ADDR_FLAG_SUBFLOW; + raddr = info->attrs[MPTCP_PM_ATTR_ADDR_REMOTE]; err = mptcp_pm_parse_addr(raddr, info, &addr_r); - if (err < 0) { - NL_SET_ERR_MSG_ATTR(info->extack, raddr, "error parsing remote addr"); + if (err < 0) goto create_err; - } if (!mptcp_pm_addr_families_match(sk, &entry.addr, &addr_r)) { GENL_SET_ERR_MSG(info, "families mismatch"); @@ -409,7 +395,8 @@ int mptcp_pm_nl_subflow_create_doit(struct sk_buff *skb, struct genl_info *info) err = mptcp_userspace_pm_append_new_local_addr(msk, &entry, false); if (err < 0) { - GENL_SET_ERR_MSG(info, "did not match address and id"); + NL_SET_ERR_MSG_ATTR(info->extack, laddr, + "did not match address and id"); goto create_err; } @@ -421,6 +408,9 @@ int mptcp_pm_nl_subflow_create_doit(struct sk_buff *skb, struct genl_info *info) err = __mptcp_subflow_connect(sk, &local, &addr_r); release_sock(sk); + if (err) + GENL_SET_ERR_MSG_FMT(info, "connect error: %d", err); + spin_lock_bh(&msk->pm.lock); if (err) mptcp_userspace_pm_delete_local_addr(msk, &entry); @@ -461,9 +451,7 @@ static struct sock *mptcp_nl_find_ssk(struct mptcp_sock *msk, break; #if IS_ENABLED(CONFIG_MPTCP_IPV6) case AF_INET6: { - const struct ipv6_pinfo *pinfo = inet6_sk(ssk); - - if (!ipv6_addr_equal(&local->addr6, &pinfo->saddr) || + if (!ipv6_addr_equal(&local->addr6, &issk->pinet6->saddr) || !ipv6_addr_equal(&remote->addr6, &ssk->sk_v6_daddr)) continue; break; @@ -483,18 +471,16 @@ static struct sock *mptcp_nl_find_ssk(struct mptcp_sock *msk, int mptcp_pm_nl_subflow_destroy_doit(struct sk_buff *skb, struct genl_info *info) { - struct nlattr *raddr = info->attrs[MPTCP_PM_ATTR_ADDR_REMOTE]; - struct nlattr *laddr = info->attrs[MPTCP_PM_ATTR_ADDR]; struct mptcp_pm_addr_entry addr_l; struct mptcp_addr_info addr_r; + struct nlattr *raddr, *laddr; struct mptcp_sock *msk; struct sock *sk, *ssk; int err = -EINVAL; - if (!laddr || !raddr) { - GENL_SET_ERR_MSG(info, "missing required address(es)"); + if (GENL_REQ_ATTR_CHECK(info, MPTCP_PM_ATTR_ADDR) || + GENL_REQ_ATTR_CHECK(info, MPTCP_PM_ATTR_ADDR_REMOTE)) return err; - } msk = mptcp_userspace_pm_get_sock(info); if (!msk) @@ -502,17 +488,15 @@ int mptcp_pm_nl_subflow_destroy_doit(struct sk_buff *skb, struct genl_info *info sk = (struct sock *)msk; + laddr = info->attrs[MPTCP_PM_ATTR_ADDR]; err = mptcp_pm_parse_entry(laddr, info, true, &addr_l); - if (err < 0) { - NL_SET_ERR_MSG_ATTR(info->extack, laddr, "error parsing local addr"); + if (err < 0) goto destroy_err; - } + raddr = info->attrs[MPTCP_PM_ATTR_ADDR_REMOTE]; err = mptcp_pm_parse_addr(raddr, info, &addr_r); - if (err < 0) { - NL_SET_ERR_MSG_ATTR(info->extack, raddr, "error parsing remote addr"); + if (err < 0) goto destroy_err; - } #if IS_ENABLED(CONFIG_MPTCP_IPV6) if (addr_l.addr.family == AF_INET && ipv6_addr_v4mapped(&addr_r.addr6)) { @@ -530,8 +514,14 @@ int mptcp_pm_nl_subflow_destroy_doit(struct sk_buff *skb, struct genl_info *info goto destroy_err; } - if (!addr_l.addr.port || !addr_r.port) { - GENL_SET_ERR_MSG(info, "missing local or remote port"); + if (!addr_l.addr.port) { + NL_SET_ERR_MSG_ATTR(info->extack, laddr, "missing local port"); + err = -EINVAL; + goto destroy_err; + } + + if (!addr_r.port) { + NL_SET_ERR_MSG_ATTR(info->extack, raddr, "missing remote port"); err = -EINVAL; goto destroy_err; } @@ -539,6 +529,7 @@ int mptcp_pm_nl_subflow_destroy_doit(struct sk_buff *skb, struct genl_info *info lock_sock(sk); ssk = mptcp_nl_find_ssk(msk, &addr_l.addr, &addr_r); if (!ssk) { + GENL_SET_ERR_MSG(info, "subflow not found"); err = -ESRCH; goto release_sock; } @@ -557,46 +548,51 @@ destroy_err: return err; } -int mptcp_userspace_pm_set_flags(struct sk_buff *skb, struct genl_info *info) +int mptcp_userspace_pm_set_flags(struct mptcp_pm_addr_entry *local, + struct genl_info *info) { - struct mptcp_pm_addr_entry loc = { .addr = { .family = AF_UNSPEC }, }; - struct mptcp_pm_addr_entry rem = { .addr = { .family = AF_UNSPEC }, }; - struct nlattr *attr_rem = info->attrs[MPTCP_PM_ATTR_ADDR_REMOTE]; - struct nlattr *attr = info->attrs[MPTCP_PM_ATTR_ADDR]; + struct mptcp_addr_info rem = { .family = AF_UNSPEC, }; struct mptcp_pm_addr_entry *entry; + struct nlattr *attr, *attr_rem; struct mptcp_sock *msk; int ret = -EINVAL; struct sock *sk; u8 bkup = 0; + if (GENL_REQ_ATTR_CHECK(info, MPTCP_PM_ATTR_ADDR_REMOTE)) + return ret; + msk = mptcp_userspace_pm_get_sock(info); if (!msk) return ret; sk = (struct sock *)msk; - ret = mptcp_pm_parse_entry(attr, info, false, &loc); - if (ret < 0) + attr = info->attrs[MPTCP_PM_ATTR_ADDR]; + if (local->addr.family == AF_UNSPEC) { + NL_SET_ERR_MSG_ATTR(info->extack, attr, + "invalid local address family"); + ret = -EINVAL; goto set_flags_err; - - if (attr_rem) { - ret = mptcp_pm_parse_entry(attr_rem, info, false, &rem); - if (ret < 0) - goto set_flags_err; } - if (loc.addr.family == AF_UNSPEC || - rem.addr.family == AF_UNSPEC) { - GENL_SET_ERR_MSG(info, "invalid address families"); + attr_rem = info->attrs[MPTCP_PM_ATTR_ADDR_REMOTE]; + ret = mptcp_pm_parse_addr(attr_rem, info, &rem); + if (ret < 0) + goto set_flags_err; + + if (rem.family == AF_UNSPEC) { + NL_SET_ERR_MSG_ATTR(info->extack, attr_rem, + "invalid remote address family"); ret = -EINVAL; goto set_flags_err; } - if (loc.flags & MPTCP_PM_ADDR_FLAG_BACKUP) + if (local->flags & MPTCP_PM_ADDR_FLAG_BACKUP) bkup = 1; spin_lock_bh(&msk->pm.lock); - entry = mptcp_userspace_pm_lookup_addr(msk, &loc.addr); + entry = mptcp_userspace_pm_lookup_addr(msk, &local->addr); if (entry) { if (bkup) entry->flags |= MPTCP_PM_ADDR_FLAG_BACKUP; @@ -606,9 +602,13 @@ int mptcp_userspace_pm_set_flags(struct sk_buff *skb, struct genl_info *info) spin_unlock_bh(&msk->pm.lock); lock_sock(sk); - ret = mptcp_pm_nl_mp_prio_send_ack(msk, &loc.addr, &rem.addr, bkup); + ret = mptcp_pm_mp_prio_send_ack(msk, &local->addr, &rem, bkup); release_sock(sk); + /* mptcp_pm_mp_prio_send_ack() only fails in one case */ + if (ret < 0) + GENL_SET_ERR_MSG(info, "subflow not found"); + set_flags_err: sock_put(sk); return ret; @@ -625,7 +625,8 @@ int mptcp_userspace_pm_dump_addr(struct sk_buff *msg, struct mptcp_sock *msk; int ret = -EINVAL; struct sock *sk; - void *hdr; + + BUILD_BUG_ON(sizeof(struct id_bitmap) > sizeof(cb->ctx)); bitmap = (struct id_bitmap *)cb->ctx; @@ -641,19 +642,10 @@ int mptcp_userspace_pm_dump_addr(struct sk_buff *msg, if (test_bit(entry->addr.id, bitmap->map)) continue; - hdr = genlmsg_put(msg, NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, &mptcp_genl_family, - NLM_F_MULTI, MPTCP_PM_CMD_GET_ADDR); - if (!hdr) + if (mptcp_pm_genl_fill_addr(msg, cb, entry) < 0) break; - if (mptcp_nl_fill_addr(msg, entry) < 0) { - genlmsg_cancel(msg, hdr); - break; - } - __set_bit(entry->addr.id, bitmap->map); - genlmsg_end(msg, hdr); } spin_unlock_bh(&msk->pm.lock); release_sock(sk); @@ -663,16 +655,13 @@ int mptcp_userspace_pm_dump_addr(struct sk_buff *msg, return ret; } -int mptcp_userspace_pm_get_addr(struct sk_buff *skb, +int mptcp_userspace_pm_get_addr(u8 id, struct mptcp_pm_addr_entry *addr, struct genl_info *info) { - struct nlattr *attr = info->attrs[MPTCP_PM_ENDPOINT_ADDR]; - struct mptcp_pm_addr_entry addr, *entry; + struct mptcp_pm_addr_entry *entry; struct mptcp_sock *msk; - struct sk_buff *msg; int ret = -EINVAL; struct sock *sk; - void *reply; msk = mptcp_userspace_pm_get_sock(info); if (!msk) @@ -680,50 +669,26 @@ int mptcp_userspace_pm_get_addr(struct sk_buff *skb, sk = (struct sock *)msk; - ret = mptcp_pm_parse_entry(attr, info, false, &addr); - if (ret < 0) - goto out; - - msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); - if (!msg) { - ret = -ENOMEM; - goto out; - } - - reply = genlmsg_put_reply(msg, info, &mptcp_genl_family, 0, - info->genlhdr->cmd); - if (!reply) { - GENL_SET_ERR_MSG(info, "not enough space in Netlink message"); - ret = -EMSGSIZE; - goto fail; - } - lock_sock(sk); spin_lock_bh(&msk->pm.lock); - entry = mptcp_userspace_pm_lookup_addr_by_id(msk, addr.addr.id); - if (!entry) { - GENL_SET_ERR_MSG(info, "address not found"); - ret = -EINVAL; - goto unlock_fail; + entry = mptcp_userspace_pm_lookup_addr_by_id(msk, id); + if (entry) { + *addr = *entry; + ret = 0; } - - ret = mptcp_nl_fill_addr(msg, entry); - if (ret) - goto unlock_fail; - - genlmsg_end(msg, reply); - ret = genlmsg_reply(msg, info); spin_unlock_bh(&msk->pm.lock); release_sock(sk); - sock_put(sk); - return ret; -unlock_fail: - spin_unlock_bh(&msk->pm.lock); - release_sock(sk); -fail: - nlmsg_free(msg); -out: sock_put(sk); return ret; } + +static struct mptcp_pm_ops mptcp_pm_userspace = { + .name = "userspace", + .owner = THIS_MODULE, +}; + +void __init mptcp_pm_userspace_register(void) +{ + mptcp_pm_register(&mptcp_pm_userspace); +} diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 6bd819047470..44f7ab463d75 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -118,24 +118,14 @@ static void mptcp_drop(struct sock *sk, struct sk_buff *skb) __kfree_skb(skb); } -static void mptcp_rmem_fwd_alloc_add(struct sock *sk, int size) -{ - WRITE_ONCE(mptcp_sk(sk)->rmem_fwd_alloc, - mptcp_sk(sk)->rmem_fwd_alloc + size); -} - -static void mptcp_rmem_charge(struct sock *sk, int size) -{ - mptcp_rmem_fwd_alloc_add(sk, -size); -} - static bool mptcp_try_coalesce(struct sock *sk, struct sk_buff *to, struct sk_buff *from) { bool fragstolen; int delta; - if (MPTCP_SKB_CB(from)->offset || + if (unlikely(MPTCP_SKB_CB(to)->cant_coalesce) || + MPTCP_SKB_CB(from)->offset || ((to->len + from->len) > (sk->sk_rcvbuf >> 3)) || !skb_try_coalesce(to, from, &fragstolen, &delta)) return false; @@ -150,7 +140,7 @@ static bool mptcp_try_coalesce(struct sock *sk, struct sk_buff *to, * negative one */ atomic_add(delta, &sk->sk_rmem_alloc); - mptcp_rmem_charge(sk, delta); + sk_mem_charge(sk, delta); kfree_skb_partial(from, fragstolen); return true; @@ -165,44 +155,6 @@ static bool mptcp_ooo_try_coalesce(struct mptcp_sock *msk, struct sk_buff *to, return mptcp_try_coalesce((struct sock *)msk, to, from); } -static void __mptcp_rmem_reclaim(struct sock *sk, int amount) -{ - amount >>= PAGE_SHIFT; - mptcp_rmem_charge(sk, amount << PAGE_SHIFT); - __sk_mem_reduce_allocated(sk, amount); -} - -static void mptcp_rmem_uncharge(struct sock *sk, int size) -{ - struct mptcp_sock *msk = mptcp_sk(sk); - int reclaimable; - - mptcp_rmem_fwd_alloc_add(sk, size); - reclaimable = msk->rmem_fwd_alloc - sk_unused_reserved_mem(sk); - - /* see sk_mem_uncharge() for the rationale behind the following schema */ - if (unlikely(reclaimable >= PAGE_SIZE)) - __mptcp_rmem_reclaim(sk, reclaimable); -} - -static void mptcp_rfree(struct sk_buff *skb) -{ - unsigned int len = skb->truesize; - struct sock *sk = skb->sk; - - atomic_sub(len, &sk->sk_rmem_alloc); - mptcp_rmem_uncharge(sk, len); -} - -void mptcp_set_owner_r(struct sk_buff *skb, struct sock *sk) -{ - skb_orphan(skb); - skb->sk = sk; - skb->destructor = mptcp_rfree; - atomic_add(skb->truesize, &sk->sk_rmem_alloc); - mptcp_rmem_charge(sk, skb->truesize); -} - /* "inspired" by tcp_data_queue_ofo(), main differences: * - use mptcp seqs * - don't cope with sacks @@ -315,25 +267,7 @@ merge_right: end: skb_condense(skb); - mptcp_set_owner_r(skb, sk); -} - -static bool mptcp_rmem_schedule(struct sock *sk, struct sock *ssk, int size) -{ - struct mptcp_sock *msk = mptcp_sk(sk); - int amt, amount; - - if (size <= msk->rmem_fwd_alloc) - return true; - - size -= msk->rmem_fwd_alloc; - amt = sk_mem_pages(size); - amount = amt << PAGE_SHIFT; - if (!__sk_mem_raise_allocated(sk, size, amt, SK_MEM_RECV)) - return false; - - mptcp_rmem_fwd_alloc_add(sk, amount); - return true; + skb_set_owner_r(skb, sk); } static bool __mptcp_move_skb(struct mptcp_sock *msk, struct sock *ssk, @@ -351,7 +285,7 @@ static bool __mptcp_move_skb(struct mptcp_sock *msk, struct sock *ssk, skb_orphan(skb); /* try to fetch required memory from subflow */ - if (!mptcp_rmem_schedule(sk, ssk, skb->truesize)) { + if (!sk_rmem_schedule(sk, skb, skb->truesize)) { MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_RCVPRUNED); goto drop; } @@ -366,6 +300,7 @@ static bool __mptcp_move_skb(struct mptcp_sock *msk, struct sock *ssk, MPTCP_SKB_CB(skb)->end_seq = MPTCP_SKB_CB(skb)->map_seq + copy_len; MPTCP_SKB_CB(skb)->offset = offset; MPTCP_SKB_CB(skb)->has_rxtstamp = has_rxtstamp; + MPTCP_SKB_CB(skb)->cant_coalesce = 0; if (MPTCP_SKB_CB(skb)->map_seq == msk->ack_seq) { /* in sequence */ @@ -375,7 +310,7 @@ static bool __mptcp_move_skb(struct mptcp_sock *msk, struct sock *ssk, if (tail && mptcp_try_coalesce(sk, tail, skb)) return true; - mptcp_set_owner_r(skb, sk); + skb_set_owner_r(skb, sk); __skb_queue_tail(&sk->sk_receive_queue, skb); return true; } else if (after64(MPTCP_SKB_CB(skb)->map_seq, msk->ack_seq)) { @@ -487,7 +422,7 @@ static long mptcp_timeout_from_subflow(const struct mptcp_subflow_context *subfl const struct sock *ssk = mptcp_subflow_tcp_sock(subflow); return inet_csk(ssk)->icsk_pending && !subflow->stale_count ? - inet_csk(ssk)->icsk_timeout - jiffies : 0; + icsk_timeout(inet_csk(ssk)) - jiffies : 0; } static void mptcp_set_timeout(struct sock *sk) @@ -561,7 +496,7 @@ static void mptcp_cleanup_rbuf(struct mptcp_sock *msk, int copied) bool cleanup, rx_empty; cleanup = (space > 0) && (space >= (old_space << 1)) && copied; - rx_empty = !__mptcp_rmem(sk) && copied; + rx_empty = !sk_rmem_alloc_get(sk) && copied; mptcp_for_each_subflow(msk, subflow) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); @@ -634,27 +569,13 @@ static void mptcp_dss_corruption(struct mptcp_sock *msk, struct sock *ssk) } static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk, - struct sock *ssk, - unsigned int *bytes) + struct sock *ssk) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); struct sock *sk = (struct sock *)msk; - unsigned int moved = 0; bool more_data_avail; struct tcp_sock *tp; - bool done = false; - int sk_rbuf; - - sk_rbuf = READ_ONCE(sk->sk_rcvbuf); - - if (!(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) { - int ssk_rbuf = READ_ONCE(ssk->sk_rcvbuf); - - if (unlikely(ssk_rbuf > sk_rbuf)) { - WRITE_ONCE(sk->sk_rcvbuf, ssk_rbuf); - sk_rbuf = ssk_rbuf; - } - } + bool ret = false; pr_debug("msk=%p ssk=%p\n", msk, ssk); tp = tcp_sk(ssk); @@ -664,20 +585,16 @@ static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk, struct sk_buff *skb; bool fin; + if (sk_rmem_alloc_get(sk) > sk->sk_rcvbuf) + break; + /* try to move as much data as available */ map_remaining = subflow->map_data_len - mptcp_subflow_get_map_offset(subflow); skb = skb_peek(&ssk->sk_receive_queue); - if (!skb) { - /* With racing move_skbs_to_msk() and __mptcp_move_skbs(), - * a different CPU can have already processed the pending - * data, stop here or we can enter an infinite loop - */ - if (!moved) - done = true; + if (unlikely(!skb)) break; - } if (__mptcp_check_fallback(msk)) { /* Under fallback skbs have no MPTCP extension and TCP could @@ -690,19 +607,13 @@ static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk, offset = seq - TCP_SKB_CB(skb)->seq; fin = TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN; - if (fin) { - done = true; + if (fin) seq++; - } if (offset < skb->len) { size_t len = skb->len - offset; - if (tp->urg_data) - done = true; - - if (__mptcp_move_skb(msk, ssk, skb, offset, len)) - moved += len; + ret = __mptcp_move_skb(msk, ssk, skb, offset, len) || ret; seq += len; if (unlikely(map_remaining < len)) { @@ -716,22 +627,16 @@ static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk, } sk_eat_skb(ssk, skb); - done = true; } WRITE_ONCE(tp->copied_seq, seq); more_data_avail = mptcp_subflow_data_available(ssk); - if (atomic_read(&sk->sk_rmem_alloc) > sk_rbuf) { - done = true; - break; - } } while (more_data_avail); - if (moved > 0) + if (ret) msk->last_data_recv = tcp_jiffies32; - *bytes += moved; - return done; + return ret; } static bool __mptcp_ofo_queue(struct mptcp_sock *msk) @@ -825,9 +730,9 @@ void __mptcp_error_report(struct sock *sk) static bool move_skbs_to_msk(struct mptcp_sock *msk, struct sock *ssk) { struct sock *sk = (struct sock *)msk; - unsigned int moved = 0; + bool moved; - __mptcp_move_skbs_from_subflow(msk, ssk, &moved); + moved = __mptcp_move_skbs_from_subflow(msk, ssk); __mptcp_ofo_queue(msk); if (unlikely(ssk->sk_err)) { if (!sock_owned_by_user(sk)) @@ -843,14 +748,29 @@ static bool move_skbs_to_msk(struct mptcp_sock *msk, struct sock *ssk) */ if (mptcp_pending_data_fin(sk, NULL)) mptcp_schedule_work(sk); - return moved > 0; + return moved; +} + +static void __mptcp_rcvbuf_update(struct sock *sk, struct sock *ssk) +{ + if (unlikely(ssk->sk_rcvbuf > sk->sk_rcvbuf)) + WRITE_ONCE(sk->sk_rcvbuf, ssk->sk_rcvbuf); +} + +static void __mptcp_data_ready(struct sock *sk, struct sock *ssk) +{ + struct mptcp_sock *msk = mptcp_sk(sk); + + __mptcp_rcvbuf_update(sk, ssk); + + /* Wake-up the reader only for in-sequence data */ + if (move_skbs_to_msk(msk, ssk) && mptcp_epollin_ready(sk)) + sk->sk_data_ready(sk); } void mptcp_data_ready(struct sock *sk, struct sock *ssk) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); - struct mptcp_sock *msk = mptcp_sk(sk); - int sk_rbuf, ssk_rbuf; /* The peer can send data while we are shutting down this * subflow at msk destruction time, but we must avoid enqueuing @@ -859,19 +779,11 @@ void mptcp_data_ready(struct sock *sk, struct sock *ssk) if (unlikely(subflow->disposable)) return; - ssk_rbuf = READ_ONCE(ssk->sk_rcvbuf); - sk_rbuf = READ_ONCE(sk->sk_rcvbuf); - if (unlikely(ssk_rbuf > sk_rbuf)) - sk_rbuf = ssk_rbuf; - - /* over limit? can't append more skbs to msk, Also, no need to wake-up*/ - if (__mptcp_rmem(sk) > sk_rbuf) - return; - - /* Wake-up the reader only for in-sequence data */ mptcp_data_lock(sk); - if (move_skbs_to_msk(msk, ssk) && mptcp_epollin_ready(sk)) - sk->sk_data_ready(sk); + if (!sock_owned_by_user(sk)) + __mptcp_data_ready(sk, ssk); + else + __set_bit(MPTCP_DEQUEUE, &mptcp_sk(sk)->cb_flags); mptcp_data_unlock(sk); } @@ -950,20 +862,6 @@ bool mptcp_schedule_work(struct sock *sk) return false; } -static struct sock *mptcp_subflow_recv_lookup(const struct mptcp_sock *msk) -{ - struct mptcp_subflow_context *subflow; - - msk_owned_by_me(msk); - - mptcp_for_each_subflow(msk, subflow) { - if (READ_ONCE(subflow->data_avail)) - return mptcp_subflow_tcp_sock(subflow); - } - - return NULL; -} - static bool mptcp_skb_can_collapse_to(u64 write_seq, const struct sk_buff *skb, const struct mptcp_ext *mpext) @@ -1944,16 +1842,17 @@ do_error: static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied); -static int __mptcp_recvmsg_mskq(struct mptcp_sock *msk, +static int __mptcp_recvmsg_mskq(struct sock *sk, struct msghdr *msg, size_t len, int flags, struct scm_timestamping_internal *tss, int *cmsg_flags) { + struct mptcp_sock *msk = mptcp_sk(sk); struct sk_buff *skb, *tmp; int copied = 0; - skb_queue_walk_safe(&msk->receive_queue, skb, tmp) { + skb_queue_walk_safe(&sk->sk_receive_queue, skb, tmp) { u32 offset = MPTCP_SKB_CB(skb)->offset; u32 data_len = skb->len - offset; u32 count = min_t(size_t, len - copied, data_len); @@ -1985,10 +1884,11 @@ static int __mptcp_recvmsg_mskq(struct mptcp_sock *msk, } if (!(flags & MSG_PEEK)) { - /* we will bulk release the skb memory later */ + /* avoid the indirect call, we know the destructor is sock_wfree */ skb->destructor = NULL; - WRITE_ONCE(msk->rmem_released, msk->rmem_released + skb->truesize); - __skb_unlink(skb, &msk->receive_queue); + atomic_sub(skb->truesize, &sk->sk_rmem_alloc); + sk_mem_uncharge(sk, skb->truesize); + __skb_unlink(skb, &sk->sk_receive_queue); __kfree_skb(skb); msk->bytes_consumed += count; } @@ -2101,66 +2001,65 @@ new_measure: msk->rcvq_space.time = mstamp; } -static void __mptcp_update_rmem(struct sock *sk) +static struct mptcp_subflow_context * +__mptcp_first_ready_from(struct mptcp_sock *msk, + struct mptcp_subflow_context *subflow) { - struct mptcp_sock *msk = mptcp_sk(sk); - - if (!msk->rmem_released) - return; + struct mptcp_subflow_context *start_subflow = subflow; - atomic_sub(msk->rmem_released, &sk->sk_rmem_alloc); - mptcp_rmem_uncharge(sk, msk->rmem_released); - WRITE_ONCE(msk->rmem_released, 0); + while (!READ_ONCE(subflow->data_avail)) { + subflow = mptcp_next_subflow(msk, subflow); + if (subflow == start_subflow) + return NULL; + } + return subflow; } -static void __mptcp_splice_receive_queue(struct sock *sk) +static bool __mptcp_move_skbs(struct sock *sk) { + struct mptcp_subflow_context *subflow; struct mptcp_sock *msk = mptcp_sk(sk); + bool ret = false; - skb_queue_splice_tail_init(&sk->sk_receive_queue, &msk->receive_queue); -} + if (list_empty(&msk->conn_list)) + return false; -static bool __mptcp_move_skbs(struct mptcp_sock *msk) -{ - struct sock *sk = (struct sock *)msk; - unsigned int moved = 0; - bool ret, done; + /* verify we can move any data from the subflow, eventually updating */ + if (!(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) + mptcp_for_each_subflow(msk, subflow) + __mptcp_rcvbuf_update(sk, subflow->tcp_sock); - do { - struct sock *ssk = mptcp_subflow_recv_lookup(msk); + subflow = list_first_entry(&msk->conn_list, + struct mptcp_subflow_context, node); + for (;;) { + struct sock *ssk; bool slowpath; - /* we can have data pending in the subflows only if the msk - * receive buffer was full at subflow_data_ready() time, - * that is an unlikely slow path. + /* + * As an optimization avoid traversing the subflows list + * and ev. acquiring the subflow socket lock before baling out */ - if (likely(!ssk)) + if (sk_rmem_alloc_get(sk) > sk->sk_rcvbuf) break; - slowpath = lock_sock_fast(ssk); - mptcp_data_lock(sk); - __mptcp_update_rmem(sk); - done = __mptcp_move_skbs_from_subflow(msk, ssk, &moved); - mptcp_data_unlock(sk); + subflow = __mptcp_first_ready_from(msk, subflow); + if (!subflow) + break; + ssk = mptcp_subflow_tcp_sock(subflow); + slowpath = lock_sock_fast(ssk); + ret = __mptcp_move_skbs_from_subflow(msk, ssk) || ret; if (unlikely(ssk->sk_err)) __mptcp_error_report(sk); unlock_sock_fast(ssk, slowpath); - } while (!done); - /* acquire the data lock only if some input data is pending */ - ret = moved > 0; - if (!RB_EMPTY_ROOT(&msk->out_of_order_queue) || - !skb_queue_empty_lockless(&sk->sk_receive_queue)) { - mptcp_data_lock(sk); - __mptcp_update_rmem(sk); - ret |= __mptcp_ofo_queue(msk); - __mptcp_splice_receive_queue(sk); - mptcp_data_unlock(sk); + subflow = mptcp_next_subflow(msk, subflow); } + + __mptcp_ofo_queue(msk); if (ret) mptcp_check_data_fin((struct sock *)msk); - return !skb_queue_empty(&msk->receive_queue); + return ret; } static unsigned int mptcp_inq_hint(const struct sock *sk) @@ -2168,7 +2067,7 @@ static unsigned int mptcp_inq_hint(const struct sock *sk) const struct mptcp_sock *msk = mptcp_sk(sk); const struct sk_buff *skb; - skb = skb_peek(&msk->receive_queue); + skb = skb_peek(&sk->sk_receive_queue); if (skb) { u64 hint_val = READ_ONCE(msk->ack_seq) - MPTCP_SKB_CB(skb)->map_seq; @@ -2214,7 +2113,7 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, while (copied < len) { int err, bytes_read; - bytes_read = __mptcp_recvmsg_mskq(msk, msg, len - copied, flags, &tss, &cmsg_flags); + bytes_read = __mptcp_recvmsg_mskq(sk, msg, len - copied, flags, &tss, &cmsg_flags); if (unlikely(bytes_read < 0)) { if (!copied) copied = bytes_read; @@ -2223,7 +2122,7 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, copied += bytes_read; - if (skb_queue_empty(&msk->receive_queue) && __mptcp_move_skbs(msk)) + if (skb_queue_empty(&sk->sk_receive_queue) && __mptcp_move_skbs(sk)) continue; /* only the MPTCP socket status is relevant here. The exit @@ -2249,7 +2148,7 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, /* race breaker: the shutdown could be after the * previous receive queue check */ - if (__mptcp_move_skbs(msk)) + if (__mptcp_move_skbs(sk)) continue; break; } @@ -2293,9 +2192,8 @@ out_err: } } - pr_debug("msk=%p rx queue empty=%d:%d copied=%d\n", - msk, skb_queue_empty_lockless(&sk->sk_receive_queue), - skb_queue_empty(&msk->receive_queue), copied); + pr_debug("msk=%p rx queue empty=%d copied=%d\n", + msk, skb_queue_empty(&sk->sk_receive_queue), copied); release_sock(sk); return copied; @@ -2783,7 +2681,7 @@ static void mptcp_worker(struct work_struct *work) mptcp_check_fastclose(msk); - mptcp_pm_nl_work(msk); + mptcp_pm_worker(msk); mptcp_check_send_data_fin(sk); mptcp_check_data_fin_ack(sk); @@ -2822,11 +2720,8 @@ static void __mptcp_init_sock(struct sock *sk) INIT_LIST_HEAD(&msk->join_list); INIT_LIST_HEAD(&msk->rtx_queue); INIT_WORK(&msk->work, mptcp_worker); - __skb_queue_head_init(&msk->receive_queue); msk->out_of_order_queue = RB_ROOT; msk->first_pending = NULL; - WRITE_ONCE(msk->rmem_fwd_alloc, 0); - WRITE_ONCE(msk->rmem_released, 0); msk->timer_ival = TCP_RTO_MIN; msk->scaling_ratio = TCP_DEFAULT_SCALING_RATIO; @@ -3052,8 +2947,6 @@ static void __mptcp_destroy_sock(struct sock *sk) sk->sk_prot->destroy(sk); - WARN_ON_ONCE(READ_ONCE(msk->rmem_fwd_alloc)); - WARN_ON_ONCE(msk->rmem_released); sk_stream_kill_queues(sk); xfrm_sk_free_policy(sk); @@ -3285,12 +3178,9 @@ static void mptcp_copy_ip_options(struct sock *newsk, const struct sock *sk) rcu_read_lock(); inet_opt = rcu_dereference(inet->inet_opt); if (inet_opt) { - newopt = sock_kmalloc(newsk, sizeof(*inet_opt) + + newopt = sock_kmemdup(newsk, inet_opt, sizeof(*inet_opt) + inet_opt->opt.optlen, GFP_ATOMIC); - if (newopt) - memcpy(newopt, inet_opt, sizeof(*inet_opt) + - inet_opt->opt.optlen); - else + if (!newopt) net_warn_ratelimited("%s: Failed to copy ip options\n", __func__); } RCU_INIT_POINTER(newinet->inet_opt, newopt); @@ -3405,21 +3295,14 @@ void mptcp_destroy_common(struct mptcp_sock *msk, unsigned int flags) mptcp_for_each_subflow_safe(msk, subflow, tmp) __mptcp_close_ssk(sk, mptcp_subflow_tcp_sock(subflow), subflow, flags); - /* move to sk_receive_queue, sk_stream_kill_queues will purge it */ - mptcp_data_lock(sk); - skb_queue_splice_tail_init(&msk->receive_queue, &sk->sk_receive_queue); __skb_queue_purge(&sk->sk_receive_queue); skb_rbtree_purge(&msk->out_of_order_queue); - mptcp_data_unlock(sk); /* move all the rx fwd alloc into the sk_mem_reclaim_final in * inet_sock_destruct() will dispose it */ - sk_forward_alloc_add(sk, msk->rmem_fwd_alloc); - WRITE_ONCE(msk->rmem_fwd_alloc, 0); mptcp_token_destroy(msk); - mptcp_pm_free_anno_list(msk); - mptcp_free_local_addr_list(msk); + mptcp_pm_destroy(msk); } static void mptcp_destroy(struct sock *sk) @@ -3453,7 +3336,8 @@ void __mptcp_check_push(struct sock *sk, struct sock *ssk) #define MPTCP_FLAGS_PROCESS_CTX_NEED (BIT(MPTCP_PUSH_PENDING) | \ BIT(MPTCP_RETRANSMIT) | \ - BIT(MPTCP_FLUSH_JOIN_LIST)) + BIT(MPTCP_FLUSH_JOIN_LIST) | \ + BIT(MPTCP_DEQUEUE)) /* processes deferred events and flush wmem */ static void mptcp_release_cb(struct sock *sk) @@ -3487,6 +3371,11 @@ static void mptcp_release_cb(struct sock *sk) __mptcp_push_pending(sk, 0); if (flags & BIT(MPTCP_RETRANSMIT)) __mptcp_retrans(sk); + if ((flags & BIT(MPTCP_DEQUEUE)) && __mptcp_move_skbs(sk)) { + /* notify ack seq update */ + mptcp_cleanup_rbuf(msk, 0); + sk->sk_data_ready(sk); + } cond_resched(); spin_lock_bh(&sk->sk_lock.slock); @@ -3506,8 +3395,6 @@ static void mptcp_release_cb(struct sock *sk) if (__test_and_clear_bit(MPTCP_SYNC_SNDBUF, &msk->cb_flags)) __mptcp_sync_sndbuf(sk); } - - __mptcp_update_rmem(sk); } /* MP_JOIN client subflow must wait for 4th ack before sending any data: @@ -3533,7 +3420,6 @@ static void schedule_3rdack_retransmission(struct sock *ssk) WARN_ON_ONCE(icsk->icsk_ack.pending & ICSK_ACK_TIMER); smp_store_release(&icsk->icsk_ack.pending, icsk->icsk_ack.pending | ICSK_ACK_SCHED | ICSK_ACK_TIMER); - icsk->icsk_ack.timeout = timeout; sk_reset_timer(ssk, &icsk->icsk_delack_timer, timeout); } @@ -3678,12 +3564,6 @@ static void mptcp_shutdown(struct sock *sk, int how) __mptcp_wr_shutdown(sk); } -static int mptcp_forward_alloc_get(const struct sock *sk) -{ - return READ_ONCE(sk->sk_forward_alloc) + - READ_ONCE(mptcp_sk(sk)->rmem_fwd_alloc); -} - static int mptcp_ioctl_outq(const struct mptcp_sock *msk, u64 v) { const struct sock *sk = (void *)msk; @@ -3724,7 +3604,8 @@ static int mptcp_ioctl(struct sock *sk, int cmd, int *karg) return -EINVAL; lock_sock(sk); - __mptcp_move_skbs(msk); + if (__mptcp_move_skbs(sk)) + mptcp_cleanup_rbuf(msk, 0); *karg = mptcp_inq_hint(sk); release_sock(sk); break; @@ -3841,7 +3722,6 @@ static struct proto mptcp_prot = { .hash = mptcp_hash, .unhash = mptcp_unhash, .get_port = mptcp_get_port, - .forward_alloc_get = mptcp_forward_alloc_get, .stream_memory_free = mptcp_stream_memory_free, .sockets_allocated = &mptcp_sockets_allocated, diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index ad21925af061..d409586b5977 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -124,12 +124,14 @@ #define MPTCP_FLUSH_JOIN_LIST 5 #define MPTCP_SYNC_STATE 6 #define MPTCP_SYNC_SNDBUF 7 +#define MPTCP_DEQUEUE 8 struct mptcp_skb_cb { u64 map_seq; u64 end_seq; u32 offset; - u8 has_rxtstamp:1; + u8 has_rxtstamp; + u8 cant_coalesce; }; #define MPTCP_SKB_CB(__skb) ((struct mptcp_skb_cb *)&((__skb)->cb[0])) @@ -221,6 +223,8 @@ struct mptcp_pm_data { spinlock_t lock; /*protects the whole PM data */ + struct_group(reset, + u8 addr_signal; bool server_side; bool work_pending; @@ -233,6 +237,9 @@ struct mptcp_pm_data { u8 pm_type; u8 subflows; u8 status; + + ); + DECLARE_BITMAP(id_avail_bitmap, MPTCP_PM_MAX_ADDR_ID + 1); struct mptcp_rm_list rm_list_tx; struct mptcp_rm_list rm_list_rx; @@ -279,7 +286,6 @@ struct mptcp_sock { u64 rcv_data_fin_seq; u64 bytes_retrans; u64 bytes_consumed; - int rmem_fwd_alloc; int snd_burst; int old_wspace; u64 recovery_snd_nxt; /* in recovery mode accept up to this seq; @@ -294,7 +300,6 @@ struct mptcp_sock { u32 last_ack_recv; unsigned long timer_ival; u32 token; - int rmem_released; unsigned long flags; unsigned long cb_flags; bool recovery; /* closing subflow write queue reinjected */ @@ -324,7 +329,6 @@ struct mptcp_sock { struct work_struct work; struct sk_buff *ooo_last_skb; struct rb_root out_of_order_queue; - struct sk_buff_head receive_queue; struct list_head conn_list; struct list_head rtx_queue; struct mptcp_data_frag *first_pending; @@ -355,6 +359,8 @@ struct mptcp_sock { list_for_each_entry(__subflow, &((__msk)->conn_list), node) #define mptcp_for_each_subflow_safe(__msk, __subflow, __tmp) \ list_for_each_entry_safe(__subflow, __tmp, &((__msk)->conn_list), node) +#define mptcp_next_subflow(__msk, __subflow) \ + list_next_entry_circular(__subflow, &((__msk)->conn_list), node) extern struct genl_family mptcp_genl_family; @@ -381,14 +387,6 @@ static inline void msk_owned_by_me(const struct mptcp_sock *msk) #define mptcp_sk(ptr) container_of_const(ptr, struct mptcp_sock, sk.icsk_inet.sk) #endif -/* the msk socket don't use the backlog, also account for the bulk - * free memory - */ -static inline int __mptcp_rmem(const struct sock *sk) -{ - return atomic_read(&sk->sk_rmem_alloc) - READ_ONCE(mptcp_sk(sk)->rmem_released); -} - static inline int mptcp_win_from_space(const struct sock *sk, int space) { return __tcp_win_from_space(mptcp_sk(sk)->scaling_ratio, space); @@ -401,7 +399,8 @@ static inline int mptcp_space_from_win(const struct sock *sk, int win) static inline int __mptcp_space(const struct sock *sk) { - return mptcp_win_from_space(sk, READ_ONCE(sk->sk_rcvbuf) - __mptcp_rmem(sk)); + return mptcp_win_from_space(sk, READ_ONCE(sk->sk_rcvbuf) - + sk_rmem_alloc_get(sk)); } static inline struct mptcp_data_frag *mptcp_send_head(const struct sock *sk) @@ -700,6 +699,7 @@ int mptcp_allow_join_id0(const struct net *net); unsigned int mptcp_stale_loss_cnt(const struct net *net); unsigned int mptcp_close_timeout(const struct sock *sk); int mptcp_get_pm_type(const struct net *net); +const char *mptcp_get_path_manager(const struct net *net); const char *mptcp_get_scheduler(const struct net *net); void mptcp_active_disable(struct sock *sk); @@ -726,12 +726,14 @@ struct sock *__mptcp_nmpc_sk(struct mptcp_sock *msk); bool __mptcp_close(struct sock *sk, long timeout); void mptcp_cancel_work(struct sock *sk); void __mptcp_unaccepted_force_close(struct sock *sk); -void mptcp_set_owner_r(struct sk_buff *skb, struct sock *sk); void mptcp_set_state(struct sock *sk, int state); bool mptcp_addresses_equal(const struct mptcp_addr_info *a, const struct mptcp_addr_info *b, bool use_port); -void mptcp_local_address(const struct sock_common *skc, struct mptcp_addr_info *addr); +void mptcp_local_address(const struct sock_common *skc, + struct mptcp_addr_info *addr); +void mptcp_remote_address(const struct sock_common *skc, + struct mptcp_addr_info *addr); /* called with sk socket lock held */ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_pm_local *local, @@ -990,6 +992,7 @@ __sum16 __mptcp_make_csum(u64 data_seq, u32 subflow_seq, u16 data_len, __wsum su void __init mptcp_pm_init(void); void mptcp_pm_data_init(struct mptcp_sock *msk); void mptcp_pm_data_reset(struct mptcp_sock *msk); +void mptcp_pm_destroy(struct mptcp_sock *msk); int mptcp_pm_parse_addr(struct nlattr *attr, struct genl_info *info, struct mptcp_addr_info *addr); int mptcp_pm_parse_entry(struct nlattr *attr, struct genl_info *info, @@ -999,7 +1002,6 @@ bool mptcp_pm_addr_families_match(const struct sock *sk, const struct mptcp_addr_info *loc, const struct mptcp_addr_info *rem); void mptcp_pm_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk); -void mptcp_pm_nl_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk); void mptcp_pm_new_connection(struct mptcp_sock *msk, const struct sock *ssk, int server_side); void mptcp_pm_fully_established(struct mptcp_sock *msk, const struct sock *ssk); bool mptcp_pm_allow_new_subflow(struct mptcp_sock *msk); @@ -1013,34 +1015,35 @@ void mptcp_pm_add_addr_received(const struct sock *ssk, void mptcp_pm_add_addr_echoed(struct mptcp_sock *msk, const struct mptcp_addr_info *addr); void mptcp_pm_add_addr_send_ack(struct mptcp_sock *msk); -bool mptcp_pm_nl_is_init_remote_addr(struct mptcp_sock *msk, - const struct mptcp_addr_info *remote); -void mptcp_pm_nl_addr_send_ack(struct mptcp_sock *msk); +void mptcp_pm_send_ack(struct mptcp_sock *msk, + struct mptcp_subflow_context *subflow, + bool prio, bool backup); +void mptcp_pm_addr_send_ack(struct mptcp_sock *msk); +void mptcp_pm_nl_rm_addr(struct mptcp_sock *msk, u8 rm_id); +void mptcp_pm_rm_subflow(struct mptcp_sock *msk, + const struct mptcp_rm_list *rm_list); void mptcp_pm_rm_addr_received(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list); void mptcp_pm_mp_prio_received(struct sock *sk, u8 bkup); void mptcp_pm_mp_fail_received(struct sock *sk, u64 fail_seq); -int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk, - struct mptcp_addr_info *addr, - struct mptcp_addr_info *rem, - u8 bkup); +int mptcp_pm_mp_prio_send_ack(struct mptcp_sock *msk, + struct mptcp_addr_info *addr, + struct mptcp_addr_info *rem, + u8 bkup); bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk, const struct mptcp_addr_info *addr); -void mptcp_pm_free_anno_list(struct mptcp_sock *msk); bool mptcp_pm_sport_in_anno_list(struct mptcp_sock *msk, const struct sock *sk); struct mptcp_pm_add_entry * mptcp_pm_del_add_timer(struct mptcp_sock *msk, const struct mptcp_addr_info *addr, bool check_id); -struct mptcp_pm_add_entry * -mptcp_lookup_anno_list_by_saddr(const struct mptcp_sock *msk, - const struct mptcp_addr_info *addr); bool mptcp_lookup_subflow_by_saddr(const struct list_head *list, const struct mptcp_addr_info *saddr); bool mptcp_remove_anno_list_by_saddr(struct mptcp_sock *msk, const struct mptcp_addr_info *addr); -int mptcp_pm_set_flags(struct sk_buff *skb, struct genl_info *info); -int mptcp_pm_nl_set_flags(struct sk_buff *skb, struct genl_info *info); -int mptcp_userspace_pm_set_flags(struct sk_buff *skb, struct genl_info *info); +int mptcp_pm_nl_set_flags(struct mptcp_pm_addr_entry *local, + struct genl_info *info); +int mptcp_userspace_pm_set_flags(struct mptcp_pm_addr_entry *local, + struct genl_info *info); int mptcp_pm_announce_addr(struct mptcp_sock *msk, const struct mptcp_addr_info *addr, bool echo); @@ -1048,7 +1051,16 @@ int mptcp_pm_remove_addr(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_ void mptcp_pm_remove_addr_entry(struct mptcp_sock *msk, struct mptcp_pm_addr_entry *entry); -void mptcp_free_local_addr_list(struct mptcp_sock *msk); +/* the default path manager, used in mptcp_pm_unregister */ +extern struct mptcp_pm_ops mptcp_pm_kernel; + +struct mptcp_pm_ops *mptcp_pm_find(const char *name); +int mptcp_pm_register(struct mptcp_pm_ops *pm_ops); +void mptcp_pm_unregister(struct mptcp_pm_ops *pm_ops); +int mptcp_pm_validate(struct mptcp_pm_ops *pm_ops); +void mptcp_pm_get_available(char *buf, size_t maxlen); + +void mptcp_userspace_pm_free_local_addr_list(struct mptcp_sock *msk); void mptcp_event(enum mptcp_event_type type, const struct mptcp_sock *msk, const struct sock *ssk, gfp_t gfp); @@ -1058,12 +1070,11 @@ void mptcp_event_pm_listener(const struct sock *ssk, enum mptcp_event_type event); bool mptcp_userspace_pm_active(const struct mptcp_sock *msk); -void __mptcp_fastopen_gen_msk_ackseq(struct mptcp_sock *msk, struct mptcp_subflow_context *subflow, - const struct mptcp_options_received *mp_opt); void mptcp_fastopen_subflow_synack_set_params(struct mptcp_subflow_context *subflow, struct request_sock *req); -int mptcp_nl_fill_addr(struct sk_buff *skb, - struct mptcp_pm_addr_entry *entry); +int mptcp_pm_genl_fill_addr(struct sk_buff *msg, + struct netlink_callback *cb, + struct mptcp_pm_addr_entry *entry); static inline bool mptcp_pm_should_add_signal(struct mptcp_sock *msk) { @@ -1126,19 +1137,20 @@ bool mptcp_pm_add_addr_signal(struct mptcp_sock *msk, const struct sk_buff *skb, bool mptcp_pm_rm_addr_signal(struct mptcp_sock *msk, unsigned int remaining, struct mptcp_rm_list *rm_list); int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc); -int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc); -int mptcp_userspace_pm_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc); +int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, + struct mptcp_pm_addr_entry *skc); +int mptcp_userspace_pm_get_local_id(struct mptcp_sock *msk, + struct mptcp_pm_addr_entry *skc); bool mptcp_pm_is_backup(struct mptcp_sock *msk, struct sock_common *skc); bool mptcp_pm_nl_is_backup(struct mptcp_sock *msk, struct mptcp_addr_info *skc); bool mptcp_userspace_pm_is_backup(struct mptcp_sock *msk, struct mptcp_addr_info *skc); -int mptcp_pm_dump_addr(struct sk_buff *msg, struct netlink_callback *cb); int mptcp_pm_nl_dump_addr(struct sk_buff *msg, struct netlink_callback *cb); int mptcp_userspace_pm_dump_addr(struct sk_buff *msg, struct netlink_callback *cb); -int mptcp_pm_get_addr(struct sk_buff *skb, struct genl_info *info); -int mptcp_pm_nl_get_addr(struct sk_buff *skb, struct genl_info *info); -int mptcp_userspace_pm_get_addr(struct sk_buff *skb, +int mptcp_pm_nl_get_addr(u8 id, struct mptcp_pm_addr_entry *addr, + struct genl_info *info); +int mptcp_userspace_pm_get_addr(u8 id, struct mptcp_pm_addr_entry *addr, struct genl_info *info); static inline u8 subflow_get_local_id(const struct mptcp_subflow_context *subflow) @@ -1150,8 +1162,11 @@ static inline u8 subflow_get_local_id(const struct mptcp_subflow_context *subflo return local_id; } +void __init mptcp_pm_kernel_register(void); +void __init mptcp_pm_userspace_register(void); void __init mptcp_pm_nl_init(void); -void mptcp_pm_nl_work(struct mptcp_sock *msk); +void mptcp_pm_worker(struct mptcp_sock *msk); +void __mptcp_pm_kernel_worker(struct mptcp_sock *msk); unsigned int mptcp_pm_get_add_addr_signal_max(const struct mptcp_sock *msk); unsigned int mptcp_pm_get_add_addr_accept_max(const struct mptcp_sock *msk); unsigned int mptcp_pm_get_subflows_max(const struct mptcp_sock *msk); diff --git a/net/mptcp/sched.c b/net/mptcp/sched.c index df7dbcfa3b71..c16c6fbd4ba2 100644 --- a/net/mptcp/sched.c +++ b/net/mptcp/sched.c @@ -16,13 +16,25 @@ static DEFINE_SPINLOCK(mptcp_sched_list_lock); static LIST_HEAD(mptcp_sched_list); -static int mptcp_sched_default_get_subflow(struct mptcp_sock *msk, +static int mptcp_sched_default_get_send(struct mptcp_sock *msk, + struct mptcp_sched_data *data) +{ + struct sock *ssk; + + ssk = mptcp_subflow_get_send(msk); + if (!ssk) + return -EINVAL; + + mptcp_subflow_set_scheduled(mptcp_subflow_ctx(ssk), true); + return 0; +} + +static int mptcp_sched_default_get_retrans(struct mptcp_sock *msk, struct mptcp_sched_data *data) { struct sock *ssk; - ssk = data->reinject ? mptcp_subflow_get_retrans(msk) : - mptcp_subflow_get_send(msk); + ssk = mptcp_subflow_get_retrans(msk); if (!ssk) return -EINVAL; @@ -31,7 +43,8 @@ static int mptcp_sched_default_get_subflow(struct mptcp_sock *msk, } static struct mptcp_sched_ops mptcp_sched_default = { - .get_subflow = mptcp_sched_default_get_subflow, + .get_send = mptcp_sched_default_get_send, + .get_retrans = mptcp_sched_default_get_retrans, .name = "default", .owner = THIS_MODULE, }; @@ -73,7 +86,7 @@ void mptcp_get_available_schedulers(char *buf, size_t maxlen) int mptcp_register_scheduler(struct mptcp_sched_ops *sched) { - if (!sched->get_subflow) + if (!sched->get_send) return -EINVAL; spin_lock(&mptcp_sched_list_lock); @@ -144,7 +157,7 @@ void mptcp_subflow_set_scheduled(struct mptcp_subflow_context *subflow, int mptcp_sched_get_send(struct mptcp_sock *msk) { struct mptcp_subflow_context *subflow; - struct mptcp_sched_data data; + struct mptcp_sched_data *data = NULL; msk_owned_by_me(msk); @@ -164,16 +177,15 @@ int mptcp_sched_get_send(struct mptcp_sock *msk) return 0; } - data.reinject = false; if (msk->sched == &mptcp_sched_default || !msk->sched) - return mptcp_sched_default_get_subflow(msk, &data); - return msk->sched->get_subflow(msk, &data); + return mptcp_sched_default_get_send(msk, data); + return msk->sched->get_send(msk, data); } int mptcp_sched_get_retrans(struct mptcp_sock *msk) { struct mptcp_subflow_context *subflow; - struct mptcp_sched_data data; + struct mptcp_sched_data *data = NULL; msk_owned_by_me(msk); @@ -186,8 +198,9 @@ int mptcp_sched_get_retrans(struct mptcp_sock *msk) return 0; } - data.reinject = true; if (msk->sched == &mptcp_sched_default || !msk->sched) - return mptcp_sched_default_get_subflow(msk, &data); - return msk->sched->get_subflow(msk, &data); + return mptcp_sched_default_get_retrans(msk, data); + if (msk->sched->get_retrans) + return msk->sched->get_retrans(msk, data); + return msk->sched->get_send(msk, data); } diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c index 505445a9598f..3caa0a9d3b38 100644 --- a/net/mptcp/sockopt.c +++ b/net/mptcp/sockopt.c @@ -1419,6 +1419,12 @@ static int mptcp_getsockopt_v4(struct mptcp_sock *msk, int optname, switch (optname) { case IP_TOS: return mptcp_put_int_option(msk, optval, optlen, READ_ONCE(inet_sk(sk)->tos)); + case IP_FREEBIND: + return mptcp_put_int_option(msk, optval, optlen, + inet_test_bit(FREEBIND, sk)); + case IP_TRANSPARENT: + return mptcp_put_int_option(msk, optval, optlen, + inet_test_bit(TRANSPARENT, sk)); case IP_BIND_ADDRESS_NO_PORT: return mptcp_put_int_option(msk, optval, optlen, inet_test_bit(BIND_ADDRESS_NO_PORT, sk)); @@ -1430,6 +1436,26 @@ static int mptcp_getsockopt_v4(struct mptcp_sock *msk, int optname, return -EOPNOTSUPP; } +static int mptcp_getsockopt_v6(struct mptcp_sock *msk, int optname, + char __user *optval, int __user *optlen) +{ + struct sock *sk = (void *)msk; + + switch (optname) { + case IPV6_V6ONLY: + return mptcp_put_int_option(msk, optval, optlen, + sk->sk_ipv6only); + case IPV6_TRANSPARENT: + return mptcp_put_int_option(msk, optval, optlen, + inet_test_bit(TRANSPARENT, sk)); + case IPV6_FREEBIND: + return mptcp_put_int_option(msk, optval, optlen, + inet_test_bit(FREEBIND, sk)); + } + + return -EOPNOTSUPP; +} + static int mptcp_getsockopt_sol_mptcp(struct mptcp_sock *msk, int optname, char __user *optval, int __user *optlen) { @@ -1469,6 +1495,8 @@ int mptcp_getsockopt(struct sock *sk, int level, int optname, if (level == SOL_IP) return mptcp_getsockopt_v4(msk, optname, optval, option); + if (level == SOL_IPV6) + return mptcp_getsockopt_v6(msk, optname, optval, option); if (level == SOL_TCP) return mptcp_getsockopt_sol_tcp(msk, optname, optval, option); if (level == SOL_MPTCP) diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 9f18217dddc8..efe8d86496db 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -802,9 +802,6 @@ void __mptcp_subflow_fully_established(struct mptcp_sock *msk, subflow_set_remote_key(msk, subflow, mp_opt); WRITE_ONCE(subflow->fully_established, true); WRITE_ONCE(msk->fully_established, true); - - if (subflow->is_mptfo) - __mptcp_fastopen_gen_msk_ackseq(msk, subflow, mp_opt); } static struct sock *subflow_syn_recv_sock(const struct sock *sk, @@ -1270,7 +1267,12 @@ out: subflow->map_valid = 0; } -/* sched mptcp worker to remove the subflow if no more data is pending */ +static bool subflow_is_done(const struct sock *sk) +{ + return sk->sk_shutdown & RCV_SHUTDOWN || sk->sk_state == TCP_CLOSE; +} + +/* sched mptcp worker for subflow cleanup if no more data is pending */ static void subflow_sched_work_if_closed(struct mptcp_sock *msk, struct sock *ssk) { struct sock *sk = (struct sock *)msk; @@ -1280,8 +1282,18 @@ static void subflow_sched_work_if_closed(struct mptcp_sock *msk, struct sock *ss inet_sk_state_load(sk) != TCP_ESTABLISHED))) return; - if (skb_queue_empty(&ssk->sk_receive_queue) && - !test_and_set_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags)) + if (!skb_queue_empty(&ssk->sk_receive_queue)) + return; + + if (!test_and_set_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags)) + mptcp_schedule_work(sk); + + /* when the fallback subflow closes the rx side, trigger a 'dummy' + * ingress data fin, so that the msk state will follow along + */ + if (__mptcp_check_fallback(msk) && subflow_is_done(ssk) && + msk->first == ssk && + mptcp_update_rcv_data_fin(msk, READ_ONCE(msk->ack_seq), true)) mptcp_schedule_work(sk); } @@ -1826,11 +1838,6 @@ static void __subflow_state_change(struct sock *sk) rcu_read_unlock(); } -static bool subflow_is_done(const struct sock *sk) -{ - return sk->sk_shutdown & RCV_SHUTDOWN || sk->sk_state == TCP_CLOSE; -} - static void subflow_state_change(struct sock *sk) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); @@ -1857,13 +1864,6 @@ static void subflow_state_change(struct sock *sk) subflow_error_report(sk); subflow_sched_work_if_closed(mptcp_sk(parent), sk); - - /* when the fallback subflow closes the rx side, trigger a 'dummy' - * ingress data fin, so that the msk state will follow along - */ - if (__mptcp_check_fallback(msk) && subflow_is_done(sk) && msk->first == sk && - mptcp_update_rcv_data_fin(msk, READ_ONCE(msk->ack_seq), true)) - mptcp_schedule_work(parent); } void mptcp_subflow_queue_clean(struct sock *listener_sk, struct sock *listener_ssk) diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 502cf10aab41..2f666751c7e7 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -618,7 +618,9 @@ static struct ctl_table nf_ct_sysctl_table[] = { .data = &nf_conntrack_max, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_INT_MAX, }, [NF_SYSCTL_CT_COUNT] = { .procname = "nf_conntrack_count", @@ -654,7 +656,9 @@ static struct ctl_table nf_ct_sysctl_table[] = { .data = &nf_ct_expect_max, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ONE, + .extra2 = SYSCTL_INT_MAX, }, [NF_SYSCTL_CT_ACCT] = { .procname = "nf_conntrack_acct", @@ -947,7 +951,9 @@ static struct ctl_table nf_ct_netfilter_table[] = { .data = &nf_conntrack_max, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_INT_MAX, }, }; diff --git a/net/netfilter/nf_log_syslog.c b/net/netfilter/nf_log_syslog.c index 58402226045e..86d5fc5d28e3 100644 --- a/net/netfilter/nf_log_syslog.c +++ b/net/netfilter/nf_log_syslog.c @@ -216,7 +216,9 @@ nf_log_dump_tcp_header(struct nf_log_buf *m, /* Max length: 9 "RES=0x3C " */ nf_log_buf_add(m, "RES=0x%02x ", (u_int8_t)(ntohl(tcp_flag_word(th) & TCP_RESERVED_BITS) >> 22)); - /* Max length: 32 "CWR ECE URG ACK PSH RST SYN FIN " */ + /* Max length: 35 "AE CWR ECE URG ACK PSH RST SYN FIN " */ + if (th->ae) + nf_log_buf_add(m, "AE "); if (th->cwr) nf_log_buf_add(m, "CWR "); if (th->ece) @@ -516,7 +518,7 @@ dump_ipv4_packet(struct net *net, struct nf_log_buf *m, /* Proto Max log string length */ /* IP: 40+46+6+11+127 = 230 */ - /* TCP: 10+max(25,20+30+13+9+32+11+127) = 252 */ + /* TCP: 10+max(25,20+30+13+9+35+11+127) = 255 */ /* UDP: 10+max(25,20) = 35 */ /* UDPLITE: 14+max(25,20) = 39 */ /* ICMP: 11+max(25, 18+25+max(19,14,24+3+n+10,3+n+10)) = 91+n */ @@ -526,7 +528,7 @@ dump_ipv4_packet(struct net *net, struct nf_log_buf *m, /* (ICMP allows recursion one level deep) */ /* maxlen = IP + ICMP + IP + max(TCP,UDP,ICMP,unknown) */ - /* maxlen = 230+ 91 + 230 + 252 = 803 */ + /* maxlen = 230+ 91 + 230 + 255 = 806 */ } static noinline_for_stack void diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index 75598520b0fa..6557a4018c09 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -21,25 +21,22 @@ #include <net/netfilter/nf_log.h> #include <net/netfilter/nft_meta.h> -#if defined(CONFIG_MITIGATION_RETPOLINE) && defined(CONFIG_X86) - +#ifdef CONFIG_MITIGATION_RETPOLINE static struct static_key_false nf_tables_skip_direct_calls; -static bool nf_skip_indirect_calls(void) +static inline bool nf_skip_indirect_calls(void) { return static_branch_likely(&nf_tables_skip_direct_calls); } -static void __init nf_skip_indirect_calls_enable(void) +static inline void __init nf_skip_indirect_calls_enable(void) { if (!cpu_feature_enabled(X86_FEATURE_RETPOLINE)) static_branch_enable(&nf_tables_skip_direct_calls); } #else -static inline bool nf_skip_indirect_calls(void) { return false; } - static inline void nf_skip_indirect_calls_enable(void) { } -#endif +#endif /* CONFIG_MITIGATION_RETPOLINE */ static noinline void __nft_trace_packet(const struct nft_pktinfo *pkt, const struct nft_verdict *verdict, diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 5c913987901a..8b7b39d8a109 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -567,7 +567,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, enum ip_conntrack_info ctinfo = 0; const struct nfnl_ct_hook *nfnl_ct; bool csum_verify; - struct lsm_context ctx; + struct lsm_context ctx = { NULL, 0, 0 }; int seclen = 0; ktime_t tstamp; diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index fa02aab56724..3b507694e81e 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -15,7 +15,6 @@ #include <linux/random.h> #include <linux/jhash.h> #include <linux/slab.h> -#include <linux/vmalloc.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/list.h> @@ -294,8 +293,7 @@ static int htable_create(struct net *net, struct hashlimit_cfg3 *cfg, if (size < 16) size = 16; } - /* FIXME: don't use vmalloc() here or anywhere else -HW */ - hinfo = vmalloc(struct_size(hinfo, hash, size)); + hinfo = kvmalloc(struct_size(hinfo, hash, size), GFP_KERNEL); if (hinfo == NULL) return -ENOMEM; *out_hinfo = hinfo; @@ -303,7 +301,7 @@ static int htable_create(struct net *net, struct hashlimit_cfg3 *cfg, /* copy match config into hashtable config */ ret = cfg_copy(&hinfo->cfg, (void *)cfg, 3); if (ret) { - vfree(hinfo); + kvfree(hinfo); return ret; } @@ -322,7 +320,7 @@ static int htable_create(struct net *net, struct hashlimit_cfg3 *cfg, hinfo->rnd_initialized = false; hinfo->name = kstrdup(name, GFP_KERNEL); if (!hinfo->name) { - vfree(hinfo); + kvfree(hinfo); return -ENOMEM; } spin_lock_init(&hinfo->lock); @@ -344,7 +342,7 @@ static int htable_create(struct net *net, struct hashlimit_cfg3 *cfg, ops, hinfo); if (hinfo->pde == NULL) { kfree(hinfo->name); - vfree(hinfo); + kvfree(hinfo); return -ENOMEM; } hinfo->net = net; @@ -433,7 +431,7 @@ static void htable_put(struct xt_hashlimit_htable *hinfo) cancel_delayed_work_sync(&hinfo->gc_work); htable_selective_cleanup(hinfo, true); kfree(hinfo->name); - vfree(hinfo); + kvfree(hinfo); } } diff --git a/net/netfilter/xt_repldata.h b/net/netfilter/xt_repldata.h index 5d1fb7018dba..600060ca940a 100644 --- a/net/netfilter/xt_repldata.h +++ b/net/netfilter/xt_repldata.h @@ -29,7 +29,7 @@ if (tbl == NULL) \ return NULL; \ term = (struct type##_error *)&(((char *)tbl)[term_offset]); \ - strscpy_pad(tbl->repl.name, info->name, sizeof(tbl->repl.name)); \ + strscpy(tbl->repl.name, info->name); \ *term = (struct type##_error)typ2##_ERROR_INIT; \ tbl->repl.valid_hooks = hook_mask; \ tbl->repl.num_entries = nhooks + 1; \ diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index a53ea60d0a78..e8972a857e51 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -771,6 +771,7 @@ static int netlink_release(struct socket *sock) nlk->cb.done(&nlk->cb); module_put(nlk->cb.module); kfree_skb(nlk->cb.skb); + WRITE_ONCE(nlk->cb_running, false); } module_put(nlk->module); diff --git a/net/nfc/hci/llc.c b/net/nfc/hci/llc.c index ba91284f4086..e6cf4eb06b46 100644 --- a/net/nfc/hci/llc.c +++ b/net/nfc/hci/llc.c @@ -78,17 +78,6 @@ static struct nfc_llc_engine *nfc_llc_name_to_engine(const char *name) return NULL; } -void nfc_llc_unregister(const char *name) -{ - struct nfc_llc_engine *llc_engine; - - llc_engine = nfc_llc_name_to_engine(name); - if (llc_engine == NULL) - return; - - nfc_llc_del_engine(llc_engine); -} - struct nfc_llc *nfc_llc_allocate(const char *name, struct nfc_hci_dev *hdev, xmit_to_drv_t xmit_to_drv, rcv_to_hci_t rcv_to_hci, int tx_headroom, diff --git a/net/nfc/hci/llc.h b/net/nfc/hci/llc.h index d66271d211a5..09914608ec43 100644 --- a/net/nfc/hci/llc.h +++ b/net/nfc/hci/llc.h @@ -40,7 +40,6 @@ struct nfc_llc { void *nfc_llc_get_data(struct nfc_llc *llc); int nfc_llc_register(const char *name, const struct nfc_llc_ops *ops); -void nfc_llc_unregister(const char *name); int nfc_llc_nop_register(void); diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h index 9ca6231ea647..384ca77f4e79 100644 --- a/net/openvswitch/datapath.h +++ b/net/openvswitch/datapath.h @@ -29,8 +29,8 @@ * datapath. * @n_hit: Number of received packets for which a matching flow was found in * the flow table. - * @n_miss: Number of received packets that had no matching flow in the flow - * table. The sum of @n_hit and @n_miss is the number of packets that have + * @n_missed: Number of received packets that had no matching flow in the flow + * table. The sum of @n_hit and @n_missed is the number of packets that have * been received by the datapath. * @n_lost: Number of received packets that had no matching flow in the flow * table that could not be sent to userspace (normally due to an overflow in @@ -40,6 +40,7 @@ * up per packet. * @n_cache_hit: The number of received packets that had their mask found using * the mask cache. + * @syncp: Synchronization point for 64bit counters. */ struct dp_stats_percpu { u64 n_hit; @@ -74,8 +75,10 @@ struct dp_nlsk_pids { * ovs_mutex and RCU. * @stats_percpu: Per-CPU datapath statistics. * @net: Reference to net namespace. - * @max_headroom: the maximum headroom of all vports in this datapath; it will + * @user_features: Bitmap of enabled %OVS_DP_F_* features. + * @max_headroom: The maximum headroom of all vports in this datapath; it will * be used by all the internal vports in this dp. + * @meter_tbl: Meter table. * @upcall_portids: RCU protected 'struct dp_nlsk_pids'. * * Context: See the comment on locking at the top of datapath.c for additional @@ -128,10 +131,13 @@ struct ovs_skb_cb { #define OVS_CB(skb) ((struct ovs_skb_cb *)(skb)->cb) /** - * struct dp_upcall - metadata to include with a packet to send to userspace + * struct dp_upcall_info - metadata to include with a packet sent to userspace * @cmd: One of %OVS_PACKET_CMD_*. * @userdata: If nonnull, its variable-length value is passed to userspace as * %OVS_PACKET_ATTR_USERDATA. + * @actions: If nonnull, its variable-length value is passed to userspace as + * %OVS_PACKET_ATTR_ACTIONS. + * @actions_len: The length of the @actions. * @portid: Netlink portid to which packet should be sent. If @portid is 0 * then no packet is sent and the packet is accounted in the datapath's @n_lost * counter. @@ -152,6 +158,10 @@ struct dp_upcall_info { * struct ovs_net - Per net-namespace data for ovs. * @dps: List of datapaths to enable dumping them all out. * Protected by genl_mutex. + * @dp_notify_work: A work notifier to handle port unregistering. + * @masks_rebalance: A work to periodically optimize flow table caches. + * @ct_limit_info: A hash table of conntrack zone connection limits. + * @xt_label: Whether connlables are configured for the network or not. */ struct ovs_net { struct list_head dps; @@ -160,8 +170,6 @@ struct ovs_net { #if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT) struct ovs_ct_limit_info *ct_limit_info; #endif - - /* Module reference for configuring conntrack. */ bool xt_label; }; diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c index 2412d7813d24..125d310871e9 100644 --- a/net/openvswitch/vport-internal_dev.c +++ b/net/openvswitch/vport-internal_dev.c @@ -149,7 +149,7 @@ static struct vport *internal_dev_create(const struct vport_parms *parms) /* Restrict bridge port to current netns. */ if (vport->port_no == OVSP_LOCAL) - vport->dev->netns_local = true; + vport->dev->netns_immutable = true; rtnl_lock(); err = register_netdevice(vport->dev); diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h index 3e71ca8ad8a7..9f67b9dd49f9 100644 --- a/net/openvswitch/vport.h +++ b/net/openvswitch/vport.h @@ -97,6 +97,8 @@ struct vport { * @desired_ifindex: New vport's ifindex. * @dp: New vport's datapath. * @port_no: New vport's port number. + * @upcall_portids: %OVS_VPORT_ATTR_UPCALL_PID attribute from Netlink message, + * %NULL if none was supplied. */ struct vport_parms { const char *name; @@ -125,6 +127,8 @@ struct vport_parms { * have any configuration. * @send: Send a packet on the device. * zero for dropped packets or negative for error. + * @owner: Module that implements this vport type. + * @list: List entry in the global list of vport types. */ struct vport_ops { enum ovs_vport_type type; @@ -144,6 +148,7 @@ struct vport_ops { /** * struct vport_upcall_stats_percpu - per-cpu packet upcall statistics for * a given vport. + * @syncp: Synchronization point for 64bit counters. * @n_success: Number of packets that upcall to userspace succeed. * @n_fail: Number of packets that upcall to userspace failed. */ @@ -164,6 +169,8 @@ void ovs_vport_free(struct vport *); * * @vport: vport to access * + * Returns: A void pointer to a private data allocated in the @vport. + * * If a nonzero size was passed in priv_size of vport_alloc() a private data * area was allocated on creation. This allows that area to be accessed and * used for any purpose needed by the vport implementer. @@ -178,6 +185,8 @@ static inline void *vport_priv(const struct vport *vport) * * @priv: Start of private data area. * + * Returns: A reference to a vport structure that contains @priv. + * * It is sometimes useful to translate from a pointer to the private data * area to the vport, such as in the case where the private data pointer is * the result of a hash table lookup. @priv must point to the start of the diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index c131e5ceea37..3e9ddf72cd03 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2102,8 +2102,8 @@ retry: skb->protocol = proto; skb->dev = dev; - skb->priority = READ_ONCE(sk->sk_priority); - skb->mark = READ_ONCE(sk->sk_mark); + skb->priority = sockc.priority; + skb->mark = sockc.mark; skb_set_delivery_type_by_clockid(skb, sockc.transmit_time, sk->sk_clockid); skb_setup_tx_timestamp(skb, &sockc); @@ -2634,8 +2634,8 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, skb->protocol = proto; skb->dev = dev; - skb->priority = READ_ONCE(po->sk.sk_priority); - skb->mark = READ_ONCE(po->sk.sk_mark); + skb->priority = sockc->priority; + skb->mark = sockc->mark; skb_set_delivery_type_by_clockid(skb, sockc->transmit_time, po->sk.sk_clockid); skb_setup_tx_timestamp(skb, sockc); skb_zcopy_set_nouarg(skb, ph.raw); @@ -3039,7 +3039,6 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) goto out_unlock; sockcm_init(&sockc, sk); - sockc.mark = READ_ONCE(sk->sk_mark); if (msg->msg_controllen) { err = sock_cmsg_send(sk, msg, &sockc); if (unlikely(err)) diff --git a/net/rds/stats.c b/net/rds/stats.c index 9e87da43c004..cb2e3d2cdf73 100644 --- a/net/rds/stats.c +++ b/net/rds/stats.c @@ -89,8 +89,7 @@ void rds_stats_info_copy(struct rds_info_iterator *iter, for (i = 0; i < nr; i++) { BUG_ON(strlen(names[i]) >= sizeof(ctr.name)); - strncpy(ctr.name, names[i], sizeof(ctr.name) - 1); - ctr.name[sizeof(ctr.name) - 1] = '\0'; + strscpy_pad(ctr.name, names[i]); ctr.value = values[i]; rds_info_copy(iter, &ctr, sizeof(ctr)); diff --git a/net/rfkill/rfkill-gpio.c b/net/rfkill/rfkill-gpio.c index 9fa019e0dcad..41e657e97761 100644 --- a/net/rfkill/rfkill-gpio.c +++ b/net/rfkill/rfkill-gpio.c @@ -162,6 +162,9 @@ static int rfkill_gpio_probe(struct platform_device *pdev) if (!rfkill->rfkill_dev) return -ENOMEM; + if (device_property_present(&pdev->dev, "default-blocked")) + rfkill_init_sw_state(rfkill->rfkill_dev, true); + ret = rfkill_register(rfkill->rfkill_dev); if (ret < 0) goto err_destroy; diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index a64a0cab1bf7..3cc3af15086f 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -344,6 +344,7 @@ struct rxrpc_peer { struct hlist_head error_targets; /* targets for net error distribution */ struct rb_root service_conns; /* Service connections */ struct list_head keepalive_link; /* Link in net->peer_keepalive[] */ + unsigned long app_data; /* Application data (e.g. afs_server) */ time64_t last_tx_at; /* Last time packet sent here */ seqlock_t service_conn_lock; spinlock_t lock; /* access lock */ diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c index 56e09d161a97..71b6e07bf161 100644 --- a/net/rxrpc/peer_object.c +++ b/net/rxrpc/peer_object.c @@ -461,7 +461,7 @@ void rxrpc_destroy_all_peers(struct rxrpc_net *rxnet) continue; hlist_for_each_entry(peer, &rxnet->peer_hash[i], hash_link) { - pr_err("Leaked peer %u {%u} %pISp\n", + pr_err("Leaked peer %x {%u} %pISp\n", peer->debug_id, refcount_read(&peer->ref), &peer->srx.transport); @@ -478,7 +478,7 @@ void rxrpc_destroy_all_peers(struct rxrpc_net *rxnet) */ struct rxrpc_peer *rxrpc_kernel_get_call_peer(struct socket *sock, struct rxrpc_call *call) { - return call->peer; + return rxrpc_get_peer(call->peer, rxrpc_peer_get_application); } EXPORT_SYMBOL(rxrpc_kernel_get_call_peer); @@ -520,3 +520,29 @@ const struct sockaddr *rxrpc_kernel_remote_addr(const struct rxrpc_peer *peer) (peer ? &peer->srx.transport : &rxrpc_null_addr.transport); } EXPORT_SYMBOL(rxrpc_kernel_remote_addr); + +/** + * rxrpc_kernel_set_peer_data - Set app-specific data on a peer. + * @peer: The peer to alter + * @app_data: The data to set + * + * Set the app-specific data on a peer. AF_RXRPC makes no effort to retain + * anything the data might refer to. The previous app_data is returned. + */ +unsigned long rxrpc_kernel_set_peer_data(struct rxrpc_peer *peer, unsigned long app_data) +{ + return xchg(&peer->app_data, app_data); +} +EXPORT_SYMBOL(rxrpc_kernel_set_peer_data); + +/** + * rxrpc_kernel_get_peer_data - Get app-specific data from a peer. + * @peer: The peer to query + * + * Retrieve the app-specific data from a peer. + */ +unsigned long rxrpc_kernel_get_peer_data(const struct rxrpc_peer *peer) +{ + return peer->app_data; +} +EXPORT_SYMBOL(rxrpc_kernel_get_peer_data); diff --git a/net/sched/act_gate.c b/net/sched/act_gate.c index 91c0ec729823..c1f75f272757 100644 --- a/net/sched/act_gate.c +++ b/net/sched/act_gate.c @@ -287,8 +287,7 @@ static void gate_setup_timer(struct tcf_gate *gact, u64 basetime, gact->param.tcfg_basetime = basetime; gact->param.tcfg_clockid = clockid; gact->tk_offset = tko; - hrtimer_init(&gact->hitimer, clockid, HRTIMER_MODE_ABS_SOFT); - gact->hitimer.function = gate_timer_func; + hrtimer_setup(&gact->hitimer, gate_timer_func, clockid, HRTIMER_MODE_ABS_SOFT); } static int tcf_gate_init(struct net *net, struct nlattr *nla, diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c index af7c99845948..ae5dea7c48a8 100644 --- a/net/sched/act_tunnel_key.c +++ b/net/sched/act_tunnel_key.c @@ -571,8 +571,8 @@ static void tunnel_key_release(struct tc_action *a) static int tunnel_key_geneve_opts_dump(struct sk_buff *skb, const struct ip_tunnel_info *info) { + const u8 *src = ip_tunnel_info_opts(info); int len = info->options_len; - u8 *src = (u8 *)(info + 1); struct nlattr *start; start = nla_nest_start_noflag(skb, TCA_TUNNEL_KEY_ENC_OPTS_GENEVE); @@ -580,7 +580,7 @@ static int tunnel_key_geneve_opts_dump(struct sk_buff *skb, return -EMSGSIZE; while (len > 0) { - struct geneve_opt *opt = (struct geneve_opt *)src; + const struct geneve_opt *opt = (const struct geneve_opt *)src; if (nla_put_be16(skb, TCA_TUNNEL_KEY_ENC_OPT_GENEVE_CLASS, opt->opt_class) || @@ -603,7 +603,7 @@ static int tunnel_key_geneve_opts_dump(struct sk_buff *skb, static int tunnel_key_vxlan_opts_dump(struct sk_buff *skb, const struct ip_tunnel_info *info) { - struct vxlan_metadata *md = (struct vxlan_metadata *)(info + 1); + const struct vxlan_metadata *md = ip_tunnel_info_opts(info); struct nlattr *start; start = nla_nest_start_noflag(skb, TCA_TUNNEL_KEY_ENC_OPTS_VXLAN); @@ -622,7 +622,7 @@ static int tunnel_key_vxlan_opts_dump(struct sk_buff *skb, static int tunnel_key_erspan_opts_dump(struct sk_buff *skb, const struct ip_tunnel_info *info) { - struct erspan_metadata *md = (struct erspan_metadata *)(info + 1); + const struct erspan_metadata *md = ip_tunnel_info_opts(info); struct nlattr *start; start = nla_nest_start_noflag(skb, TCA_TUNNEL_KEY_ENC_OPTS_ERSPAN); diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c index 8996c73c9779..3f2e707a11d1 100644 --- a/net/sched/em_meta.c +++ b/net/sched/em_meta.c @@ -460,7 +460,7 @@ META_COLLECTOR(int_sk_fwd_alloc) *err = -1; return; } - dst->value = sk_forward_alloc_get(sk); + dst->value = READ_ONCE(sk->sk_forward_alloc); } META_COLLECTOR(int_sk_sndbuf) diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 6c625dcd0651..defb05c1fba4 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -26,6 +26,7 @@ #include <linux/slab.h> #include <linux/hashtable.h> +#include <net/netdev_lock.h> #include <net/net_namespace.h> #include <net/sock.h> #include <net/netlink.h> @@ -619,8 +620,7 @@ static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer) void qdisc_watchdog_init_clockid(struct qdisc_watchdog *wd, struct Qdisc *qdisc, clockid_t clockid) { - hrtimer_init(&wd->timer, clockid, HRTIMER_MODE_ABS_PINNED); - wd->timer.function = qdisc_watchdog; + hrtimer_setup(&wd->timer, qdisc_watchdog, clockid, HRTIMER_MODE_ABS_PINNED); wd->qdisc = qdisc; } EXPORT_SYMBOL(qdisc_watchdog_init_clockid); @@ -1279,9 +1279,11 @@ static struct Qdisc *qdisc_create(struct net_device *dev, * We replay the request because the device may * go away in the mean time. */ + netdev_unlock_ops(dev); rtnl_unlock(); request_module(NET_SCH_ALIAS_PREFIX "%s", name); rtnl_lock(); + netdev_lock_ops(dev); ops = qdisc_lookup_ops(kind); if (ops != NULL) { /* We will try again qdisc_lookup_ops, @@ -1505,27 +1507,18 @@ const struct nla_policy rtm_tca_policy[TCA_MAX + 1] = { * Delete/get qdisc. */ -static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, - struct netlink_ext_ack *extack) +static int __tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, + struct netlink_ext_ack *extack, + struct net_device *dev, + struct nlattr *tca[TCA_MAX + 1], + struct tcmsg *tcm) { struct net *net = sock_net(skb->sk); - struct tcmsg *tcm = nlmsg_data(n); - struct nlattr *tca[TCA_MAX + 1]; - struct net_device *dev; - u32 clid; struct Qdisc *q = NULL; struct Qdisc *p = NULL; + u32 clid; int err; - err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX, - rtm_tca_policy, extack); - if (err < 0) - return err; - - dev = __dev_get_by_index(net, tcm->tcm_ifindex); - if (!dev) - return -ENODEV; - clid = tcm->tcm_parent; if (clid) { if (clid != TC_H_ROOT) { @@ -1582,6 +1575,31 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, return 0; } +static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, + struct netlink_ext_ack *extack) +{ + struct net *net = sock_net(skb->sk); + struct tcmsg *tcm = nlmsg_data(n); + struct nlattr *tca[TCA_MAX + 1]; + struct net_device *dev; + int err; + + err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX, + rtm_tca_policy, extack); + if (err < 0) + return err; + + dev = __dev_get_by_index(net, tcm->tcm_ifindex); + if (!dev) + return -ENODEV; + + netdev_lock_ops(dev); + err = __tc_get_qdisc(skb, n, extack, dev, tca, tcm); + netdev_unlock_ops(dev); + + return err; +} + static bool req_create_or_replace(struct nlmsghdr *n) { return (n->nlmsg_flags & NLM_F_CREATE && @@ -1601,35 +1619,19 @@ static bool req_change(struct nlmsghdr *n) !(n->nlmsg_flags & NLM_F_EXCL)); } -/* - * Create/change qdisc. - */ -static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, - struct netlink_ext_ack *extack) +static int __tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, + struct netlink_ext_ack *extack, + struct net_device *dev, + struct nlattr *tca[TCA_MAX + 1], + struct tcmsg *tcm, + bool *replay) { - struct net *net = sock_net(skb->sk); - struct tcmsg *tcm; - struct nlattr *tca[TCA_MAX + 1]; - struct net_device *dev; + struct Qdisc *q = NULL; + struct Qdisc *p = NULL; u32 clid; - struct Qdisc *q, *p; int err; -replay: - /* Reinit, just in case something touches this. */ - err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX, - rtm_tca_policy, extack); - if (err < 0) - return err; - - tcm = nlmsg_data(n); clid = tcm->tcm_parent; - q = p = NULL; - - dev = __dev_get_by_index(net, tcm->tcm_ifindex); - if (!dev) - return -ENODEV; - if (clid) { if (clid != TC_H_ROOT) { @@ -1755,7 +1757,7 @@ replay: } err = qdisc_change(q, tca, extack); if (err == 0) - qdisc_notify(net, skb, n, clid, NULL, q, extack); + qdisc_notify(sock_net(skb->sk), skb, n, clid, NULL, q, extack); return err; create_n_graft: @@ -1788,8 +1790,10 @@ create_n_graft2: tca, &err, extack); } if (q == NULL) { - if (err == -EAGAIN) - goto replay; + if (err == -EAGAIN) { + *replay = true; + return 0; + } return err; } @@ -1804,6 +1808,41 @@ graft: return 0; } +/* + * Create/change qdisc. + */ +static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, + struct netlink_ext_ack *extack) +{ + struct net *net = sock_net(skb->sk); + struct nlattr *tca[TCA_MAX + 1]; + struct net_device *dev; + struct tcmsg *tcm; + bool replay; + int err; + +replay: + /* Reinit, just in case something touches this. */ + err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX, + rtm_tca_policy, extack); + if (err < 0) + return err; + + tcm = nlmsg_data(n); + dev = __dev_get_by_index(net, tcm->tcm_ifindex); + if (!dev) + return -ENODEV; + + replay = false; + netdev_lock_ops(dev); + err = __tc_modify_qdisc(skb, n, extack, dev, tca, tcm, &replay); + netdev_unlock_ops(dev); + if (replay) + goto replay; + + return err; +} + static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb, struct netlink_callback *cb, int *q_idx_p, int s_q_idx, bool recur, @@ -1888,17 +1927,23 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb) s_q_idx = 0; q_idx = 0; + netdev_lock_ops(dev); if (tc_dump_qdisc_root(rtnl_dereference(dev->qdisc), skb, cb, &q_idx, s_q_idx, - true, tca[TCA_DUMP_INVISIBLE]) < 0) + true, tca[TCA_DUMP_INVISIBLE]) < 0) { + netdev_unlock_ops(dev); goto done; + } dev_queue = dev_ingress_queue(dev); if (dev_queue && tc_dump_qdisc_root(rtnl_dereference(dev_queue->qdisc_sleeping), skb, cb, &q_idx, s_q_idx, false, - tca[TCA_DUMP_INVISIBLE]) < 0) + tca[TCA_DUMP_INVISIBLE]) < 0) { + netdev_unlock_ops(dev); goto done; + } + netdev_unlock_ops(dev); cont: idx++; @@ -2135,15 +2180,15 @@ static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid, #endif -static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, - struct netlink_ext_ack *extack) +static int __tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, + struct netlink_ext_ack *extack, + struct net_device *dev, + struct nlattr *tca[TCA_MAX + 1], + struct tcmsg *tcm) { struct net *net = sock_net(skb->sk); - struct tcmsg *tcm = nlmsg_data(n); - struct nlattr *tca[TCA_MAX + 1]; - struct net_device *dev; - struct Qdisc *q = NULL; const struct Qdisc_class_ops *cops; + struct Qdisc *q = NULL; unsigned long cl = 0; unsigned long new_cl; u32 portid; @@ -2151,15 +2196,6 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, u32 qid; int err; - err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX, - rtm_tca_policy, extack); - if (err < 0) - return err; - - dev = __dev_get_by_index(net, tcm->tcm_ifindex); - if (!dev) - return -ENODEV; - /* parent == TC_H_UNSPEC - unspecified parent. parent == TC_H_ROOT - class is root, which has no parent. @@ -2274,6 +2310,31 @@ out: return err; } +static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, + struct netlink_ext_ack *extack) +{ + struct net *net = sock_net(skb->sk); + struct tcmsg *tcm = nlmsg_data(n); + struct nlattr *tca[TCA_MAX + 1]; + struct net_device *dev; + int err; + + err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX, + rtm_tca_policy, extack); + if (err < 0) + return err; + + dev = __dev_get_by_index(net, tcm->tcm_ifindex); + if (!dev) + return -ENODEV; + + netdev_lock_ops(dev); + err = __tc_ctl_tclass(skb, n, extack, dev, tca, tcm); + netdev_unlock_ops(dev); + + return err; +} + struct qdisc_dump_args { struct qdisc_walker w; struct sk_buff *skb; @@ -2350,20 +2411,12 @@ static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb, return 0; } -static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb) +static int __tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb, + struct tcmsg *tcm, struct net_device *dev) { - struct tcmsg *tcm = nlmsg_data(cb->nlh); - struct net *net = sock_net(skb->sk); struct netdev_queue *dev_queue; - struct net_device *dev; int t, s_t; - if (nlmsg_len(cb->nlh) < sizeof(*tcm)) - return 0; - dev = dev_get_by_index(net, tcm->tcm_ifindex); - if (!dev) - return 0; - s_t = cb->args[0]; t = 0; @@ -2380,10 +2433,32 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb) done: cb->args[0] = t; - dev_put(dev); return skb->len; } +static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct tcmsg *tcm = nlmsg_data(cb->nlh); + struct net *net = sock_net(skb->sk); + struct net_device *dev; + int err; + + if (nlmsg_len(cb->nlh) < sizeof(*tcm)) + return 0; + + dev = dev_get_by_index(net, tcm->tcm_ifindex); + if (!dev) + return 0; + + netdev_lock_ops(dev); + err = __tc_dump_tclass(skb, cb, tcm, dev); + netdev_unlock_ops(dev); + + dev_put(dev); + + return err; +} + #ifdef CONFIG_PROC_FS static int psched_show(struct seq_file *seq, void *v) { diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index 6a07cdbdb9e1..2cfbc977fe6d 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -447,7 +447,7 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, if (q->wsum + delta_w > QFQ_MAX_WSUM) { NL_SET_ERR_MSG_FMT_MOD(extack, - "total weight out of range (%d + %u)\n", + "total weight out of range (%d + %u)", delta_w, q->wsum); return -EINVAL; } diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index a68e17891b0b..14021b812329 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -1932,8 +1932,7 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt, if (!TXTIME_ASSIST_IS_ENABLED(q->flags) && !FULL_OFFLOAD_IS_ENABLED(q->flags) && !hrtimer_active(&q->advance_timer)) { - hrtimer_init(&q->advance_timer, q->clockid, HRTIMER_MODE_ABS); - q->advance_timer.function = advance_sched; + hrtimer_setup(&q->advance_timer, advance_sched, q->clockid, HRTIMER_MODE_ABS); } err = taprio_get_start_time(sch, new_admin, &start); @@ -2056,8 +2055,7 @@ static int taprio_init(struct Qdisc *sch, struct nlattr *opt, spin_lock_init(&q->current_entry_lock); - hrtimer_init(&q->advance_timer, CLOCK_TAI, HRTIMER_MODE_ABS); - q->advance_timer.function = advance_sched; + hrtimer_setup(&q->advance_timer, advance_sched, CLOCK_TAI, HRTIMER_MODE_ABS); q->root = sch; diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 29727ed1008e..5407a3922101 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -185,12 +185,9 @@ static void sctp_v4_copy_ip_options(struct sock *sk, struct sock *newsk) rcu_read_lock(); inet_opt = rcu_dereference(inet->inet_opt); if (inet_opt) { - newopt = sock_kmalloc(newsk, sizeof(*inet_opt) + + newopt = sock_kmemdup(newsk, inet_opt, sizeof(*inet_opt) + inet_opt->opt.optlen, GFP_ATOMIC); - if (newopt) - memcpy(newopt, inet_opt, sizeof(*inet_opt) + - inet_opt->opt.optlen); - else + if (!newopt) pr_err("%s: Failed to copy ip options\n", __func__); } RCU_INIT_POINTER(newinet->inet_opt, newopt); diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c index 716808f374a8..b391c2ef463f 100644 --- a/net/smc/smc_pnet.c +++ b/net/smc/smc_pnet.c @@ -1079,14 +1079,16 @@ static void smc_pnet_find_roce_by_pnetid(struct net_device *ndev, struct smc_init_info *ini) { u8 ndev_pnetid[SMC_MAX_PNETID_LEN]; + struct net_device *base_ndev; struct net *net; - ndev = pnet_find_base_ndev(ndev); + base_ndev = pnet_find_base_ndev(ndev); net = dev_net(ndev); - if (smc_pnetid_by_dev_port(ndev->dev.parent, ndev->dev_port, + if (smc_pnetid_by_dev_port(base_ndev->dev.parent, base_ndev->dev_port, ndev_pnetid) && + smc_pnet_find_ndev_pnetid_by_table(base_ndev, ndev_pnetid) && smc_pnet_find_ndev_pnetid_by_table(ndev, ndev_pnetid)) { - smc_pnet_find_rdma_dev(ndev, ini); + smc_pnet_find_rdma_dev(base_ndev, ini); return; /* pnetid could not be determined */ } _smc_pnet_find_roce_by_pnetid(ndev_pnetid, ini, NULL, net); diff --git a/net/socket.c b/net/socket.c index 28bae5a94234..9a0e720f0859 100644 --- a/net/socket.c +++ b/net/socket.c @@ -680,17 +680,8 @@ void __sock_tx_timestamp(__u32 tsflags, __u8 *tx_flags) { u8 flags = *tx_flags; - if (tsflags & SOF_TIMESTAMPING_TX_HARDWARE) { - flags |= SKBTX_HW_TSTAMP; - - /* PTP hardware clocks can provide a free running cycle counter - * as a time base for virtual clocks. Tell driver to use the - * free running cycle counter for timestamp if socket is bound - * to virtual clock. - */ - if (tsflags & SOF_TIMESTAMPING_BIND_PHC) - flags |= SKBTX_HW_TSTAMP_USE_CYCLES; - } + if (tsflags & SOF_TIMESTAMPING_TX_HARDWARE) + flags |= SKBTX_HW_TSTAMP_NOBPF; if (tsflags & SOF_TIMESTAMPING_TX_SOFTWARE) flags |= SKBTX_SW_TSTAMP; @@ -698,6 +689,9 @@ void __sock_tx_timestamp(__u32 tsflags, __u8 *tx_flags) if (tsflags & SOF_TIMESTAMPING_TX_SCHED) flags |= SKBTX_SCHED_TSTAMP; + if (tsflags & SOF_TIMESTAMPING_TX_COMPLETION) + flags |= SKBTX_COMPLETION_TSTAMP; + *tx_flags = flags; } EXPORT_SYMBOL(__sock_tx_timestamp); @@ -1145,12 +1139,10 @@ static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from) */ static DEFINE_MUTEX(br_ioctl_mutex); -static int (*br_ioctl_hook)(struct net *net, struct net_bridge *br, - unsigned int cmd, struct ifreq *ifr, +static int (*br_ioctl_hook)(struct net *net, unsigned int cmd, void __user *uarg); -void brioctl_set(int (*hook)(struct net *net, struct net_bridge *br, - unsigned int cmd, struct ifreq *ifr, +void brioctl_set(int (*hook)(struct net *net, unsigned int cmd, void __user *uarg)) { mutex_lock(&br_ioctl_mutex); @@ -1159,8 +1151,7 @@ void brioctl_set(int (*hook)(struct net *net, struct net_bridge *br, } EXPORT_SYMBOL(brioctl_set); -int br_ioctl_call(struct net *net, struct net_bridge *br, unsigned int cmd, - struct ifreq *ifr, void __user *uarg) +int br_ioctl_call(struct net *net, unsigned int cmd, void __user *uarg) { int err = -ENOPKG; @@ -1169,7 +1160,7 @@ int br_ioctl_call(struct net *net, struct net_bridge *br, unsigned int cmd, mutex_lock(&br_ioctl_mutex); if (br_ioctl_hook) - err = br_ioctl_hook(net, br, cmd, ifr, uarg); + err = br_ioctl_hook(net, cmd, uarg); mutex_unlock(&br_ioctl_mutex); return err; @@ -1269,7 +1260,9 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg) case SIOCSIFBR: case SIOCBRADDBR: case SIOCBRDELBR: - err = br_ioctl_call(net, NULL, cmd, NULL, argp); + case SIOCBRADDIF: + case SIOCBRDELIF: + err = br_ioctl_call(net, cmd, argp); break; case SIOCGIFVLAN: case SIOCSIFVLAN: @@ -3429,6 +3422,8 @@ static int compat_sock_ioctl_trans(struct file *file, struct socket *sock, case SIOCGPGRP: case SIOCBRADDBR: case SIOCBRDELBR: + case SIOCBRADDIF: + case SIOCBRDELIF: case SIOCGIFVLAN: case SIOCSIFVLAN: case SIOCGSKNS: @@ -3468,8 +3463,6 @@ static int compat_sock_ioctl_trans(struct file *file, struct socket *sock, case SIOCGIFPFLAGS: case SIOCGIFTXQLEN: case SIOCSIFTXQLEN: - case SIOCBRADDIF: - case SIOCBRDELIF: case SIOCGIFNAME: case SIOCSIFNAME: case SIOCGMIIPHY: diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index 04534ea537c8..5a827afd8e3b 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -489,7 +489,7 @@ static unsigned long rpcauth_cache_shrink_count(struct shrinker *shrink, struct shrink_control *sc) { - return number_cred_unused * sysctl_vfs_cache_pressure / 100; + return number_cred_unused; } static void diff --git a/net/tipc/link.c b/net/tipc/link.c index 5c2088a469ce..50c2e0846ea4 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -1951,7 +1951,6 @@ void tipc_link_create_dummy_tnl_msg(struct tipc_link *l, void tipc_link_tnl_prepare(struct tipc_link *l, struct tipc_link *tnl, int mtyp, struct sk_buff_head *xmitq) { - struct sk_buff_head *fdefq = &tnl->failover_deferdq; struct sk_buff *skb, *tnlskb; struct tipc_msg *hdr, tnlhdr; struct sk_buff_head *queue = &l->transmq; @@ -2078,6 +2077,8 @@ tnl: tipc_link_xmit(tnl, &tnlq, xmitq); if (mtyp == FAILOVER_MSG) { + struct sk_buff_head *fdefq = &tnl->failover_deferdq; + tnl->drop_point = l->rcv_nxt; tnl->failover_reasm_skb = l->reasm_buf; l->reasm_buf = NULL; diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index e50b6e71df13..f672a62a9a52 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -157,7 +157,7 @@ static void delete_all_records(struct tls_offload_context_tx *offload_ctx) offload_ctx->retransmit_hint = NULL; } -static void tls_icsk_clean_acked(struct sock *sk, u32 acked_seq) +static void tls_tcp_clean_acked(struct sock *sk, u32 acked_seq) { struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_record_info *info, *temp; @@ -204,7 +204,7 @@ void tls_device_sk_destruct(struct sock *sk) destroy_record(ctx->open_record); delete_all_records(ctx); crypto_free_aead(ctx->aead_send); - clean_acked_data_disable(inet_csk(sk)); + clean_acked_data_disable(tcp_sk(sk)); } tls_device_queue_ctx_destruction(tls_ctx); @@ -1126,7 +1126,7 @@ int tls_set_device_offload(struct sock *sk) start_marker_record->num_frags = 0; list_add_tail(&start_marker_record->list, &offload_ctx->records_list); - clean_acked_data_enable(inet_csk(sk), &tls_icsk_clean_acked); + clean_acked_data_enable(tcp_sk(sk), &tls_tcp_clean_acked); ctx->push_pending_record = tls_device_push_pending_record; /* TLS offload is greatly simplified if we don't send @@ -1172,7 +1172,7 @@ int tls_set_device_offload(struct sock *sk) release_lock: up_read(&device_offload_lock); - clean_acked_data_disable(inet_csk(sk)); + clean_acked_data_disable(tcp_sk(sk)); crypto_free_aead(offload_ctx->aead_send); free_offload_ctx: kfree(offload_ctx); diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index 99ca4465f702..cb86b0bf9a53 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -1057,7 +1057,7 @@ static u16 tls_user_config(struct tls_context *ctx, bool tx) return 0; } -static int tls_get_info(struct sock *sk, struct sk_buff *skb) +static int tls_get_info(struct sock *sk, struct sk_buff *skb, bool net_admin) { u16 version, cipher_type; struct tls_context *ctx; @@ -1115,7 +1115,7 @@ nla_failure: return err; } -static size_t tls_get_info_size(const struct sock *sk) +static size_t tls_get_info_size(const struct sock *sk, bool net_admin) { size_t size = 0; diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index f0e613d97664..f78a2492826f 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -77,46 +77,37 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt -#include <linux/module.h> -#include <linux/kernel.h> -#include <linux/signal.h> -#include <linux/sched/signal.h> -#include <linux/errno.h> -#include <linux/string.h> -#include <linux/stat.h> +#include <linux/bpf-cgroup.h> +#include <linux/btf_ids.h> #include <linux/dcache.h> -#include <linux/namei.h> -#include <linux/socket.h> -#include <linux/un.h> +#include <linux/errno.h> #include <linux/fcntl.h> +#include <linux/file.h> #include <linux/filter.h> -#include <linux/termios.h> -#include <linux/sockios.h> -#include <linux/net.h> -#include <linux/in.h> #include <linux/fs.h> -#include <linux/slab.h> -#include <linux/uaccess.h> -#include <linux/skbuff.h> -#include <linux/netdevice.h> -#include <net/net_namespace.h> -#include <net/sock.h> -#include <net/tcp_states.h> -#include <net/af_unix.h> -#include <linux/proc_fs.h> -#include <linux/seq_file.h> -#include <net/scm.h> #include <linux/init.h> -#include <linux/poll.h> -#include <linux/rtnetlink.h> +#include <linux/kernel.h> #include <linux/mount.h> -#include <net/checksum.h> +#include <linux/namei.h> +#include <linux/poll.h> +#include <linux/proc_fs.h> +#include <linux/sched/signal.h> #include <linux/security.h> +#include <linux/seq_file.h> +#include <linux/skbuff.h> +#include <linux/slab.h> +#include <linux/socket.h> #include <linux/splice.h> -#include <linux/freezer.h> -#include <linux/file.h> -#include <linux/btf_ids.h> -#include <linux/bpf-cgroup.h> +#include <linux/string.h> +#include <linux/uaccess.h> +#include <net/af_unix.h> +#include <net/net_namespace.h> +#include <net/scm.h> +#include <net/tcp_states.h> +#include <uapi/linux/sockios.h> +#include <uapi/linux/termios.h> + +#include "af_unix.h" static atomic_long_t unix_nr_socks; static struct hlist_head bsd_socket_buckets[UNIX_HASH_SIZE / 2]; @@ -1508,7 +1499,6 @@ out: } static long unix_wait_for_peer(struct sock *other, long timeo) - __releases(&unix_sk(other)->lock) { struct unix_sock *u = unix_sk(other); int sched; diff --git a/net/unix/af_unix.h b/net/unix/af_unix.h new file mode 100644 index 000000000000..59db179df9bb --- /dev/null +++ b/net/unix/af_unix.h @@ -0,0 +1,72 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __AF_UNIX_H +#define __AF_UNIX_H + +#include <linux/uidgid.h> + +#define UNIX_HASH_MOD (256 - 1) +#define UNIX_HASH_SIZE (256 * 2) +#define UNIX_HASH_BITS 8 + +struct sock *unix_peer_get(struct sock *sk); + +struct unix_skb_parms { + struct pid *pid; /* skb credentials */ + kuid_t uid; + kgid_t gid; + struct scm_fp_list *fp; /* Passed files */ +#ifdef CONFIG_SECURITY_NETWORK + u32 secid; /* Security ID */ +#endif + u32 consumed; +} __randomize_layout; + +#define UNIXCB(skb) (*(struct unix_skb_parms *)&((skb)->cb)) + +/* GC for SCM_RIGHTS */ +extern unsigned int unix_tot_inflight; +void unix_add_edges(struct scm_fp_list *fpl, struct unix_sock *receiver); +void unix_del_edges(struct scm_fp_list *fpl); +void unix_update_edges(struct unix_sock *receiver); +int unix_prepare_fpl(struct scm_fp_list *fpl); +void unix_destroy_fpl(struct scm_fp_list *fpl); +void unix_gc(void); +void wait_for_unix_gc(struct scm_fp_list *fpl); + +/* SOCK_DIAG */ +long unix_inq_len(struct sock *sk); +long unix_outq_len(struct sock *sk); + +/* sysctl */ +#ifdef CONFIG_SYSCTL +int unix_sysctl_register(struct net *net); +void unix_sysctl_unregister(struct net *net); +#else +static inline int unix_sysctl_register(struct net *net) +{ + return 0; +} + +static inline void unix_sysctl_unregister(struct net *net) +{ +} +#endif + +/* BPF SOCKMAP */ +int __unix_dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t size, int flags); +int __unix_stream_recvmsg(struct sock *sk, struct msghdr *msg, size_t size, int flags); + +#ifdef CONFIG_BPF_SYSCALL +extern struct proto unix_dgram_proto; +extern struct proto unix_stream_proto; + +int unix_dgram_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore); +int unix_stream_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore); +void __init unix_bpf_build_proto(void); +#else +static inline void __init unix_bpf_build_proto(void) +{ +} +#endif + +#endif diff --git a/net/unix/diag.c b/net/unix/diag.c index 9138af8b465e..79b182d0e62a 100644 --- a/net/unix/diag.c +++ b/net/unix/diag.c @@ -1,15 +1,17 @@ // SPDX-License-Identifier: GPL-2.0-only -#include <linux/types.h> -#include <linux/spinlock.h> -#include <linux/sock_diag.h> -#include <linux/unix_diag.h> -#include <linux/skbuff.h> + +#include <linux/dcache.h> #include <linux/module.h> -#include <linux/uidgid.h> -#include <net/netlink.h> +#include <linux/skbuff.h> +#include <linux/sock_diag.h> +#include <linux/types.h> +#include <linux/user_namespace.h> #include <net/af_unix.h> +#include <net/netlink.h> #include <net/tcp_states.h> -#include <net/sock.h> +#include <uapi/linux/unix_diag.h> + +#include "af_unix.h" static int sk_diag_dump_name(struct sock *sk, struct sk_buff *nlskb) { diff --git a/net/unix/garbage.c b/net/unix/garbage.c index 9848b7b78701..01e2b9452c75 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -63,24 +63,33 @@ * wrt receive and holding up unrelated socket operations. */ -#include <linux/kernel.h> -#include <linux/string.h> -#include <linux/socket.h> -#include <linux/un.h> -#include <linux/net.h> #include <linux/fs.h> +#include <linux/list.h> #include <linux/skbuff.h> -#include <linux/netdevice.h> -#include <linux/file.h> -#include <linux/proc_fs.h> -#include <linux/mutex.h> -#include <linux/wait.h> - -#include <net/sock.h> +#include <linux/socket.h> +#include <linux/workqueue.h> #include <net/af_unix.h> #include <net/scm.h> #include <net/tcp_states.h> +#include "af_unix.h" + +struct unix_vertex { + struct list_head edges; + struct list_head entry; + struct list_head scc_entry; + unsigned long out_degree; + unsigned long index; + unsigned long scc_index; +}; + +struct unix_edge { + struct unix_sock *predecessor; + struct unix_sock *successor; + struct list_head vertex_entry; + struct list_head stack_entry; +}; + struct unix_sock *unix_get_socket(struct file *filp) { struct inode *inode = file_inode(filp); diff --git a/net/unix/sysctl_net_unix.c b/net/unix/sysctl_net_unix.c index 357b3e5f3847..e02ed6e3955c 100644 --- a/net/unix/sysctl_net_unix.c +++ b/net/unix/sysctl_net_unix.c @@ -5,11 +5,13 @@ * Authors: Mike Shaver. */ -#include <linux/mm.h> #include <linux/slab.h> +#include <linux/string.h> #include <linux/sysctl.h> - #include <net/af_unix.h> +#include <net/net_namespace.h> + +#include "af_unix.h" static struct ctl_table unix_table[] = { { diff --git a/net/unix/unix_bpf.c b/net/unix/unix_bpf.c index bca2d86ba97d..e0d30d6d22ac 100644 --- a/net/unix/unix_bpf.c +++ b/net/unix/unix_bpf.c @@ -1,11 +1,12 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2021 Cong Wang <cong.wang@bytedance.com> */ -#include <linux/skmsg.h> #include <linux/bpf.h> -#include <net/sock.h> +#include <linux/skmsg.h> #include <net/af_unix.h> +#include "af_unix.h" + #define unix_sk_has_data(__sk, __psock) \ ({ !skb_queue_empty(&__sk->sk_receive_queue) || \ !skb_queue_empty(&__psock->ingress_skb) || \ diff --git a/net/wireless/chan.c b/net/wireless/chan.c index 9f918b77b40e..193734b7f9dc 100644 --- a/net/wireless/chan.c +++ b/net/wireless/chan.c @@ -6,7 +6,7 @@ * * Copyright 2009 Johannes Berg <johannes@sipsolutions.net> * Copyright 2013-2014 Intel Mobile Communications GmbH - * Copyright 2018-2024 Intel Corporation + * Copyright 2018-2025 Intel Corporation */ #include <linux/export.h> @@ -55,11 +55,6 @@ void cfg80211_chandef_create(struct cfg80211_chan_def *chandef, } EXPORT_SYMBOL(cfg80211_chandef_create); -static int cfg80211_chandef_get_width(const struct cfg80211_chan_def *c) -{ - return nl80211_chan_width_to_mhz(c->width); -} - static u32 cfg80211_get_start_freq(const struct cfg80211_chan_def *chandef, u32 cf) { @@ -1497,6 +1492,12 @@ bool cfg80211_reg_check_beaconing(struct wiphy *wiphy, if (cfg->reg_power == IEEE80211_REG_VLP_AP) permitting_flags |= IEEE80211_CHAN_ALLOW_6GHZ_VLP_AP; + if ((cfg->iftype == NL80211_IFTYPE_P2P_GO || + cfg->iftype == NL80211_IFTYPE_AP) && + (chandef->width == NL80211_CHAN_WIDTH_20_NOHT || + chandef->width == NL80211_CHAN_WIDTH_20)) + permitting_flags |= IEEE80211_CHAN_ALLOW_20MHZ_ACTIVITY; + return _cfg80211_reg_can_beacon(wiphy, chandef, cfg->iftype, check_no_ir ? IEEE80211_CHAN_NO_IR : 0, permitting_flags); diff --git a/net/wireless/core.c b/net/wireless/core.c index 828e29872633..9e6b31903121 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -5,7 +5,7 @@ * Copyright 2006-2010 Johannes Berg <johannes@sipsolutions.net> * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright 2015-2017 Intel Deutschland GmbH - * Copyright (C) 2018-2024 Intel Corporation + * Copyright (C) 2018-2025 Intel Corporation */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt @@ -162,11 +162,11 @@ int cfg80211_switch_netns(struct cfg80211_registered_device *rdev, list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) { if (!wdev->netdev) continue; - wdev->netdev->netns_local = false; + wdev->netdev->netns_immutable = false; err = dev_change_net_namespace(wdev->netdev, net, "wlan%d"); if (err) break; - wdev->netdev->netns_local = true; + wdev->netdev->netns_immutable = true; } if (err) { @@ -178,11 +178,11 @@ int cfg80211_switch_netns(struct cfg80211_registered_device *rdev, list) { if (!wdev->netdev) continue; - wdev->netdev->netns_local = false; + wdev->netdev->netns_immutable = false; err = dev_change_net_namespace(wdev->netdev, net, "wlan%d"); WARN_ON(err); - wdev->netdev->netns_local = true; + wdev->netdev->netns_immutable = true; } return err; @@ -546,6 +546,9 @@ use_default_name: INIT_WORK(&rdev->mgmt_registrations_update_wk, cfg80211_mgmt_registrations_update_wk); spin_lock_init(&rdev->mgmt_registrations_lock); + INIT_WORK(&rdev->wiphy_work, cfg80211_wiphy_work); + INIT_LIST_HEAD(&rdev->wiphy_work_list); + spin_lock_init(&rdev->wiphy_work_lock); #ifdef CONFIG_CFG80211_DEFAULT_PS rdev->wiphy.flags |= WIPHY_FLAG_PS_ON_BY_DEFAULT; @@ -563,9 +566,6 @@ use_default_name: return NULL; } - INIT_WORK(&rdev->wiphy_work, cfg80211_wiphy_work); - INIT_LIST_HEAD(&rdev->wiphy_work_list); - spin_lock_init(&rdev->wiphy_work_lock); INIT_WORK(&rdev->rfkill_block, cfg80211_rfkill_block_work); INIT_WORK(&rdev->conn_work, cfg80211_conn_work); INIT_WORK(&rdev->event_work, cfg80211_event_work); @@ -793,6 +793,7 @@ int wiphy_register(struct wiphy *wiphy) BIT(NL80211_CHAN_WIDTH_80) | BIT(NL80211_CHAN_WIDTH_80P80) | BIT(NL80211_CHAN_WIDTH_160) | + BIT(NL80211_CHAN_WIDTH_320) | BIT(NL80211_CHAN_WIDTH_5) | BIT(NL80211_CHAN_WIDTH_10)))) return -EINVAL; @@ -1520,7 +1521,7 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb, SET_NETDEV_DEVTYPE(dev, &wiphy_type); wdev->netdev = dev; /* can only change netns with wiphy */ - dev->netns_local = true; + dev->netns_immutable = true; cfg80211_init_wdev(wdev); break; diff --git a/net/wireless/core.h b/net/wireless/core.h index 826299f3d781..c56a35040caa 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -3,7 +3,7 @@ * Wireless configuration interface internals. * * Copyright 2006-2010 Johannes Berg <johannes@sipsolutions.net> - * Copyright (C) 2018-2024 Intel Corporation + * Copyright (C) 2018-2025 Intel Corporation */ #ifndef __NET_WIRELESS_CORE_H #define __NET_WIRELESS_CORE_H @@ -180,7 +180,6 @@ struct cfg80211_internal_bss { struct list_head list; struct list_head hidden_list; struct rb_node rbn; - u64 ts_boottime; unsigned long ts; unsigned long refcount; atomic_t hold; @@ -569,8 +568,8 @@ void cfg80211_wdev_release_link_bsses(struct wireless_dev *wdev, u16 link_mask); int cfg80211_assoc_ml_reconf(struct cfg80211_registered_device *rdev, struct net_device *dev, - struct cfg80211_assoc_link *links, - u16 rem_links); + struct cfg80211_ml_reconf_req *req); + /** * struct cfg80211_colocated_ap - colocated AP information * diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index e10f2b3b4b7f..05d44a443518 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -4,7 +4,7 @@ * * Copyright (c) 2009, Jouni Malinen <j@w1.fi> * Copyright (c) 2015 Intel Deutschland GmbH - * Copyright (C) 2019-2020, 2022-2024 Intel Corporation + * Copyright (C) 2019-2020, 2022-2025 Intel Corporation */ #include <linux/kernel.h> @@ -1297,25 +1297,24 @@ void cfg80211_stop_background_radar_detection(struct wireless_dev *wdev) int cfg80211_assoc_ml_reconf(struct cfg80211_registered_device *rdev, struct net_device *dev, - struct cfg80211_assoc_link *links, - u16 rem_links) + struct cfg80211_ml_reconf_req *req) { struct wireless_dev *wdev = dev->ieee80211_ptr; int err; lockdep_assert_wiphy(wdev->wiphy); - err = rdev_assoc_ml_reconf(rdev, dev, links, rem_links); + err = rdev_assoc_ml_reconf(rdev, dev, req); if (!err) { int link_id; for (link_id = 0; link_id < IEEE80211_MLD_MAX_NUM_LINKS; link_id++) { - if (!links[link_id].bss) + if (!req->add_links[link_id].bss) continue; - cfg80211_ref_bss(&rdev->wiphy, links[link_id].bss); - cfg80211_hold_bss(bss_from_pub(links[link_id].bss)); + cfg80211_ref_bss(&rdev->wiphy, req->add_links[link_id].bss); + cfg80211_hold_bss(bss_from_pub(req->add_links[link_id].bss)); } } @@ -1361,6 +1360,10 @@ void cfg80211_mlo_reconf_add_done(struct net_device *dev, if (data->added_links & BIT(link_id)) { wdev->links[link_id].client.current_bss = bss_from_pub(bss); + + memcpy(wdev->links[link_id].addr, + data->links[link_id].addr, + ETH_ALEN); } else { cfg80211_unhold_bss(bss_from_pub(bss)); cfg80211_put_bss(wiphy, bss); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index aac0e7298dc7..f039a7d0d6f7 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -5,7 +5,7 @@ * Copyright 2006-2010 Johannes Berg <johannes@sipsolutions.net> * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright 2015-2017 Intel Deutschland GmbH - * Copyright (C) 2018-2024 Intel Corporation + * Copyright (C) 2018-2025 Intel Corporation */ #include <linux/if.h> @@ -850,6 +850,7 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { NL80211_MAX_SUPP_SELECTORS), [NL80211_ATTR_MLO_RECONF_REM_LINKS] = { .type = NLA_U16 }, [NL80211_ATTR_EPCS] = { .type = NLA_FLAG }, + [NL80211_ATTR_ASSOC_MLD_EXT_CAPA_OPS] = { .type = NLA_U16 }, }; /* policy for the key attributes */ @@ -1234,6 +1235,10 @@ static int nl80211_msg_put_channel(struct sk_buff *msg, struct wiphy *wiphy, if ((chan->flags & IEEE80211_CHAN_ALLOW_6GHZ_VLP_AP) && nla_put_flag(msg, NL80211_FREQUENCY_ATTR_ALLOW_6GHZ_VLP_AP)) goto nla_put_failure; + if ((chan->flags & IEEE80211_CHAN_ALLOW_20MHZ_ACTIVITY) && + nla_put_flag(msg, + NL80211_FREQUENCY_ATTR_ALLOW_20MHZ_ACTIVITY)) + goto nla_put_failure; } if (nla_put_u32(msg, NL80211_FREQUENCY_ATTR_MAX_TX_POWER, @@ -2768,6 +2773,7 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev, CMD(update_ft_ies, UPDATE_FT_IES); if (rdev->wiphy.sar_capa) CMD(set_sar_specs, SET_SAR_SPECS); + CMD(assoc_ml_reconf, ASSOC_MLO_RECONF); } #undef CMD @@ -4250,6 +4256,10 @@ static int nl80211_parse_mon_options(struct cfg80211_registered_device *rdev, change = true; } + /* MONITOR_FLAG_COOK_FRAMES is deprecated, refuse cooperation */ + if (params->flags & MONITOR_FLAG_COOK_FRAMES) + return -EOPNOTSUPP; + if (params->flags & MONITOR_FLAG_ACTIVE && !(rdev->wiphy.features & NL80211_FEATURE_ACTIVE_MONITOR)) return -EOPNOTSUPP; @@ -6751,9 +6761,6 @@ static int nl80211_send_station(struct sk_buff *msg, u32 cmd, u32 portid, PUT_SINFO_U64(RX_BYTES64, rx_bytes); PUT_SINFO_U64(TX_BYTES64, tx_bytes); - PUT_SINFO(LLID, llid, u16); - PUT_SINFO(PLID, plid, u16); - PUT_SINFO(PLINK_STATE, plink_state, u8); PUT_SINFO_U64(RX_DURATION, rx_duration); PUT_SINFO_U64(TX_DURATION, tx_duration); @@ -6797,13 +6804,18 @@ static int nl80211_send_station(struct sk_buff *msg, u32 cmd, u32 portid, PUT_SINFO(TX_RETRIES, tx_retries, u32); PUT_SINFO(TX_FAILED, tx_failed, u32); PUT_SINFO(EXPECTED_THROUGHPUT, expected_throughput, u32); - PUT_SINFO(AIRTIME_LINK_METRIC, airtime_link_metric, u32); PUT_SINFO(BEACON_LOSS, beacon_loss_count, u32); + + PUT_SINFO(LLID, llid, u16); + PUT_SINFO(PLID, plid, u16); + PUT_SINFO(PLINK_STATE, plink_state, u8); + PUT_SINFO(AIRTIME_LINK_METRIC, airtime_link_metric, u32); PUT_SINFO(LOCAL_PM, local_pm, u32); PUT_SINFO(PEER_PM, peer_pm, u32); PUT_SINFO(NONPEER_PM, nonpeer_pm, u32); PUT_SINFO(CONNECTED_TO_GATE, connected_to_gate, u8); PUT_SINFO(CONNECTED_TO_AS, connected_to_as, u8); + PUT_SINFO_U64(T_OFFSET, t_offset); if (sinfo->filled & BIT_ULL(NL80211_STA_INFO_BSS_PARAM)) { bss_param = nla_nest_start_noflag(msg, @@ -6831,7 +6843,6 @@ static int nl80211_send_station(struct sk_buff *msg, u32 cmd, u32 portid, &sinfo->sta_flags)) goto nla_put_failure; - PUT_SINFO_U64(T_OFFSET, t_offset); PUT_SINFO_U64(RX_DROP_MISC, rx_dropped_misc); PUT_SINFO_U64(BEACON_RX, rx_beacon); PUT_SINFO(BEACON_SIGNAL_AVG, rx_beacon_signal_avg, u8); @@ -10172,7 +10183,7 @@ static int nl80211_start_radar_detection(struct sk_buff *skb, switch (wdev->iftype) { case NL80211_IFTYPE_AP: case NL80211_IFTYPE_P2P_GO: - wdev->links[0].ap.chandef = chandef; + wdev->links[link_id].ap.chandef = chandef; break; case NL80211_IFTYPE_ADHOC: wdev->u.ibss.chandef = chandef; @@ -10520,9 +10531,9 @@ static int nl80211_send_bss(struct sk_buff *msg, struct netlink_callback *cb, intbss->parent_bssid))) goto nla_put_failure; - if (intbss->ts_boottime && + if (res->ts_boottime && nla_put_u64_64bit(msg, NL80211_BSS_LAST_SEEN_BOOTTIME, - intbss->ts_boottime, NL80211_BSS_PAD)) + res->ts_boottime, NL80211_BSS_PAD)) goto nla_put_failure; if (!nl80211_put_signal(msg, intbss->pub.chains, @@ -11374,6 +11385,10 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info) err = -EINVAL; goto free; } + + if (info->attrs[NL80211_ATTR_ASSOC_MLD_EXT_CAPA_OPS]) + req.ext_mld_capa_ops = + nla_get_u16(info->attrs[NL80211_ATTR_ASSOC_MLD_EXT_CAPA_OPS]); } else { if (req.link_id >= 0) return -EINVAL; @@ -11383,6 +11398,9 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info) if (IS_ERR(req.bss)) return PTR_ERR(req.bss); ap_addr = req.bss->bssid; + + if (info->attrs[NL80211_ATTR_ASSOC_MLD_EXT_CAPA_OPS]) + return -EINVAL; } err = nl80211_crypto_settings(rdev, info, &req.crypto, 1); @@ -16489,9 +16507,9 @@ static int nl80211_assoc_ml_reconf(struct sk_buff *skb, struct genl_info *info) struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_assoc_link links[IEEE80211_MLD_MAX_NUM_LINKS] = {}; + struct cfg80211_ml_reconf_req req = {}; unsigned int link_id; - u16 add_links, rem_links; + u16 add_links; int err; if (!wdev->valid_links) @@ -16507,7 +16525,7 @@ static int nl80211_assoc_ml_reconf(struct sk_buff *skb, struct genl_info *info) add_links = 0; if (info->attrs[NL80211_ATTR_MLO_LINKS]) { - err = nl80211_process_links(rdev, links, + err = nl80211_process_links(rdev, req.add_links, /* mark as MLO, but not assoc */ IEEE80211_MLD_MAX_NUM_LINKS, NULL, 0, info); @@ -16516,33 +16534,35 @@ static int nl80211_assoc_ml_reconf(struct sk_buff *skb, struct genl_info *info) for (link_id = 0; link_id < IEEE80211_MLD_MAX_NUM_LINKS; link_id++) { - if (!links[link_id].bss) + if (!req.add_links[link_id].bss) continue; add_links |= BIT(link_id); } } if (info->attrs[NL80211_ATTR_MLO_RECONF_REM_LINKS]) - rem_links = + req.rem_links = nla_get_u16(info->attrs[NL80211_ATTR_MLO_RECONF_REM_LINKS]); - else - rem_links = 0; /* Validate that existing links are not added, removed links are valid * and don't allow adding and removing the same links */ - if ((add_links & rem_links) || !(add_links | rem_links) || + if ((add_links & req.rem_links) || !(add_links | req.rem_links) || (wdev->valid_links & add_links) || - ((wdev->valid_links & rem_links) != rem_links)) { + ((wdev->valid_links & req.rem_links) != req.rem_links)) { err = -EINVAL; goto out; } - err = -EOPNOTSUPP; + if (info->attrs[NL80211_ATTR_ASSOC_MLD_EXT_CAPA_OPS]) + req.ext_mld_capa_ops = + nla_get_u16(info->attrs[NL80211_ATTR_ASSOC_MLD_EXT_CAPA_OPS]); + + err = cfg80211_assoc_ml_reconf(rdev, dev, &req); out: - for (link_id = 0; link_id < ARRAY_SIZE(links); link_id++) - cfg80211_put_bss(&rdev->wiphy, links[link_id].bss); + for (link_id = 0; link_id < ARRAY_SIZE(req.add_links); link_id++) + cfg80211_put_bss(&rdev->wiphy, req.add_links[link_id].bss); return err; } diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h index 759da1623342..9f4783c2354c 100644 --- a/net/wireless/rdev-ops.h +++ b/net/wireless/rdev-ops.h @@ -2,7 +2,7 @@ /* * Portions of this file * Copyright(c) 2016-2017 Intel Deutschland GmbH - * Copyright (C) 2018, 2021-2024 Intel Corporation + * Copyright (C) 2018, 2021-2025 Intel Corporation */ #ifndef __CFG80211_RDEV_OPS #define __CFG80211_RDEV_OPS @@ -1551,16 +1551,14 @@ rdev_get_radio_mask(struct cfg80211_registered_device *rdev, static inline int rdev_assoc_ml_reconf(struct cfg80211_registered_device *rdev, struct net_device *dev, - struct cfg80211_assoc_link *add_links, - u16 rem_links) + struct cfg80211_ml_reconf_req *req) { struct wiphy *wiphy = &rdev->wiphy; int ret = -EOPNOTSUPP; - trace_rdev_assoc_ml_reconf(wiphy, dev, add_links, rem_links); + trace_rdev_assoc_ml_reconf(wiphy, dev, req); if (rdev->ops->assoc_ml_reconf) - ret = rdev->ops->assoc_ml_reconf(wiphy, dev, add_links, - rem_links); + ret = rdev->ops->assoc_ml_reconf(wiphy, dev, req); trace_rdev_return_int(wiphy, ret); return ret; diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 212e9561aae7..c1752b31734f 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -5,7 +5,7 @@ * Copyright 2008-2011 Luis R. Rodriguez <mcgrof@qca.qualcomm.com> * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright 2017 Intel Deutschland GmbH - * Copyright (C) 2018 - 2024 Intel Corporation + * Copyright (C) 2018 - 2025 Intel Corporation * * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -1603,6 +1603,8 @@ static u32 map_regdom_flags(u32 rd_flags) channel_flags |= IEEE80211_CHAN_PSD; if (rd_flags & NL80211_RRF_ALLOW_6GHZ_VLP_AP) channel_flags |= IEEE80211_CHAN_ALLOW_6GHZ_VLP_AP; + if (rd_flags & NL80211_RRF_ALLOW_20MHZ_ACTIVITY) + channel_flags |= IEEE80211_CHAN_ALLOW_20MHZ_ACTIVITY; return channel_flags; } diff --git a/net/wireless/scan.c b/net/wireless/scan.c index cd2124329521..9865f305275d 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -5,7 +5,7 @@ * Copyright 2008 Johannes Berg <johannes@sipsolutions.net> * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright 2016 Intel Deutschland GmbH - * Copyright (C) 2018-2024 Intel Corporation + * Copyright (C) 2018-2025 Intel Corporation */ #include <linux/kernel.h> #include <linux/slab.h> @@ -1365,7 +1365,7 @@ void cfg80211_bss_age(struct cfg80211_registered_device *rdev, unsigned long age_secs) { struct cfg80211_internal_bss *bss; - unsigned long age_jiffies = msecs_to_jiffies(age_secs * MSEC_PER_SEC); + unsigned long age_jiffies = secs_to_jiffies(age_secs); spin_lock_bh(&rdev->bss_lock); list_for_each_entry(bss, &rdev->bss_list, list) @@ -1934,7 +1934,7 @@ cfg80211_update_known_bss(struct cfg80211_registered_device *rdev, known->pub.signal = new->pub.signal; known->pub.capability = new->pub.capability; known->ts = new->ts; - known->ts_boottime = new->ts_boottime; + known->pub.ts_boottime = new->pub.ts_boottime; known->parent_tsf = new->parent_tsf; known->pub.chains = new->pub.chains; memcpy(known->pub.chain_signal, new->pub.chain_signal, @@ -2291,7 +2291,7 @@ cfg80211_inform_single_bss_data(struct wiphy *wiphy, tmp.pub.signal = 0; tmp.pub.beacon_interval = data->beacon_interval; tmp.pub.capability = data->capability; - tmp.ts_boottime = drv_data->boottime_ns; + tmp.pub.ts_boottime = drv_data->boottime_ns; tmp.parent_tsf = drv_data->parent_tsf; ether_addr_copy(tmp.parent_bssid, drv_data->parent_bssid); tmp.pub.chains = drv_data->chains; diff --git a/net/wireless/trace.h b/net/wireless/trace.h index 4f0abd5d49df..4ed9fada4ec0 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -2,7 +2,7 @@ /* * Portions of this file * Copyright(c) 2016-2017 Intel Deutschland GmbH - * Copyright (C) 2018, 2020-2024 Intel Corporation + * Copyright (C) 2018, 2020-2025 Intel Corporation */ #undef TRACE_SYSTEM #define TRACE_SYSTEM cfg80211 @@ -1378,6 +1378,7 @@ TRACE_EVENT(rdev_assoc, __dynamic_array(u8, fils_kek, req->fils_kek_len) __dynamic_array(u8, fils_nonces, req->fils_nonces ? 2 * FILS_NONCE_LEN : 0) + __field(u16, ext_mld_capa_ops) ), TP_fast_assign( WIPHY_ASSIGN; @@ -1404,6 +1405,7 @@ TRACE_EVENT(rdev_assoc, if (req->fils_nonces) memcpy(__get_dynamic_array(fils_nonces), req->fils_nonces, 2 * FILS_NONCE_LEN); + __entry->ext_mld_capa_ops = req->ext_mld_capa_ops; ), TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", bssid: %pM" ", previous bssid: %pM, use mfp: %s, flags: 0x%x", @@ -4118,7 +4120,7 @@ TRACE_EVENT(cfg80211_links_removed, NETDEV_ASSIGN; __entry->link_mask = link_mask; ), - TP_printk(NETDEV_PR_FMT ", link_mask:%u", NETDEV_PR_ARG, + TP_printk(NETDEV_PR_FMT ", link_mask:0x%x", NETDEV_PR_ARG, __entry->link_mask) ); @@ -4142,14 +4144,14 @@ TRACE_EVENT(cfg80211_mlo_reconf_add_done, TRACE_EVENT(rdev_assoc_ml_reconf, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, - struct cfg80211_assoc_link *add_links, - u16 rem_links), - TP_ARGS(wiphy, netdev, add_links, rem_links), + struct cfg80211_ml_reconf_req *req), + TP_ARGS(wiphy, netdev, req), TP_STRUCT__entry( WIPHY_ENTRY NETDEV_ENTRY __field(u16, add_links) __field(u16, rem_links) + __field(u16, ext_mld_capa_ops) ), TP_fast_assign( WIPHY_ASSIGN; @@ -4157,10 +4159,11 @@ TRACE_EVENT(rdev_assoc_ml_reconf, u32 i; __entry->add_links = 0; - __entry->rem_links = rem_links; - for (i = 0; add_links && i < IEEE80211_MLD_MAX_NUM_LINKS; i++) - if (add_links[i].bss) + __entry->rem_links = req->rem_links; + for (i = 0; i < IEEE80211_MLD_MAX_NUM_LINKS; i++) + if (req->add_links[i].bss) __entry->add_links |= BIT(i); + __entry->ext_mld_capa_ops = req->ext_mld_capa_ops; ), TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", add_links=0x%x, rem_links=0x%x", WIPHY_PR_ARG, NETDEV_PR_ARG, diff --git a/net/wireless/util.c b/net/wireless/util.c index 60157943d351..ed868c0f7ca8 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -5,7 +5,7 @@ * Copyright 2007-2009 Johannes Berg <johannes@sipsolutions.net> * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright 2017 Intel Deutschland GmbH - * Copyright (C) 2018-2023 Intel Corporation + * Copyright (C) 2018-2023, 2025 Intel Corporation */ #include <linux/export.h> #include <linux/bitops.h> @@ -2908,7 +2908,7 @@ bool cfg80211_radio_chandef_valid(const struct wiphy_radio *radio, u32 freq, width; freq = ieee80211_chandef_to_khz(chandef); - width = nl80211_chan_width_to_mhz(chandef->width); + width = cfg80211_chandef_get_width(chandef); if (!ieee80211_radio_freq_range_valid(radio, freq, width)) return false; diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 89d2bef96469..e5d104ce7b82 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -25,6 +25,7 @@ #include <linux/vmalloc.h> #include <net/xdp_sock_drv.h> #include <net/busy_poll.h> +#include <net/netdev_lock.h> #include <net/netdev_rx_queue.h> #include <net/xdp.h> @@ -742,6 +743,9 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs, goto free_err; } } + + if (meta->flags & XDP_TXMD_FLAGS_LAUNCH_TIME) + skb->skb_mstamp_ns = meta->request.launch_time; } } @@ -875,7 +879,7 @@ static bool xsk_no_wakeup(struct sock *sk) #ifdef CONFIG_NET_RX_BUSY_POLL /* Prefer busy-polling, skip the wakeup. */ return READ_ONCE(sk->sk_prefer_busy_poll) && READ_ONCE(sk->sk_ll_usec) && - READ_ONCE(sk->sk_napi_id) >= MIN_NAPI_ID; + napi_id_valid(READ_ONCE(sk->sk_napi_id)); #else return false; #endif @@ -1178,6 +1182,8 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len) goto out_release; } + netdev_lock_ops(dev); + if (!xs->rx && !xs->tx) { err = -EINVAL; goto out_unlock; @@ -1312,6 +1318,7 @@ out_unlock: smp_wmb(); WRITE_ONCE(xs->state, XSK_BOUND); } + netdev_unlock_ops(dev); out_release: mutex_unlock(&xs->mutex); rtnl_unlock(); diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c index d158cb6dd391..25a76c5ce0f1 100644 --- a/net/xdp/xsk_buff_pool.c +++ b/net/xdp/xsk_buff_pool.c @@ -1,5 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 +#include <linux/netdevice.h> +#include <net/netdev_lock.h> #include <net/xsk_buff_pool.h> #include <net/xdp_sock.h> #include <net/xdp_sock_drv.h> @@ -219,6 +221,7 @@ int xp_assign_dev(struct xsk_buff_pool *pool, bpf.xsk.pool = pool; bpf.xsk.queue_id = queue_id; + netdev_ops_assert_locked(netdev); err = netdev->netdev_ops->ndo_bpf(netdev, &bpf); if (err) goto err_unreg_pool; @@ -699,18 +702,56 @@ void xp_free(struct xdp_buff_xsk *xskb) } EXPORT_SYMBOL(xp_free); -void *xp_raw_get_data(struct xsk_buff_pool *pool, u64 addr) +static u64 __xp_raw_get_addr(const struct xsk_buff_pool *pool, u64 addr) +{ + return pool->unaligned ? xp_unaligned_add_offset_to_addr(addr) : addr; +} + +static void *__xp_raw_get_data(const struct xsk_buff_pool *pool, u64 addr) { - addr = pool->unaligned ? xp_unaligned_add_offset_to_addr(addr) : addr; return pool->addrs + addr; } + +void *xp_raw_get_data(struct xsk_buff_pool *pool, u64 addr) +{ + return __xp_raw_get_data(pool, __xp_raw_get_addr(pool, addr)); +} EXPORT_SYMBOL(xp_raw_get_data); -dma_addr_t xp_raw_get_dma(struct xsk_buff_pool *pool, u64 addr) +static dma_addr_t __xp_raw_get_dma(const struct xsk_buff_pool *pool, u64 addr) { - addr = pool->unaligned ? xp_unaligned_add_offset_to_addr(addr) : addr; return (pool->dma_pages[addr >> PAGE_SHIFT] & ~XSK_NEXT_PG_CONTIG_MASK) + (addr & ~PAGE_MASK); } + +dma_addr_t xp_raw_get_dma(struct xsk_buff_pool *pool, u64 addr) +{ + return __xp_raw_get_dma(pool, __xp_raw_get_addr(pool, addr)); +} EXPORT_SYMBOL(xp_raw_get_dma); + +/** + * xp_raw_get_ctx - get &xdp_desc context + * @pool: XSk buff pool desc address belongs to + * @addr: desc address (from userspace) + * + * Helper for getting desc's DMA address and metadata pointer, if present. + * Saves one call on hotpath, double calculation of the actual address, + * and inline checks for metadata presence and sanity. + * + * Return: new &xdp_desc_ctx struct containing desc's DMA address and metadata + * pointer, if it is present and valid (initialized to %NULL otherwise). + */ +struct xdp_desc_ctx xp_raw_get_ctx(const struct xsk_buff_pool *pool, u64 addr) +{ + struct xdp_desc_ctx ret; + + addr = __xp_raw_get_addr(pool, addr); + + ret.dma = __xp_raw_get_dma(pool, addr); + ret.meta = __xsk_buff_get_metadata(pool, __xp_raw_get_data(pool, addr)); + + return ret; +} +EXPORT_SYMBOL(xp_raw_get_ctx); diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c index d1fa94e52cea..d62f76161d83 100644 --- a/net/xfrm/xfrm_device.c +++ b/net/xfrm/xfrm_device.c @@ -244,11 +244,6 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x, xfrm_address_t *daddr; bool is_packet_offload; - if (!x->type_offload) { - NL_SET_ERR_MSG(extack, "Type doesn't support offload"); - return -EINVAL; - } - if (xuo->flags & ~(XFRM_OFFLOAD_IPV6 | XFRM_OFFLOAD_INBOUND | XFRM_OFFLOAD_PACKET)) { NL_SET_ERR_MSG(extack, "Unrecognized flags in offload request"); @@ -310,6 +305,13 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x, return -EINVAL; } + xfrm_set_type_offload(x); + if (!x->type_offload) { + NL_SET_ERR_MSG(extack, "Type doesn't support offload"); + dev_put(dev); + return -EINVAL; + } + xso->dev = dev; netdev_tracker_alloc(dev, &xso->dev_tracker, GFP_ATOMIC); xso->real_dev = dev; @@ -332,6 +334,7 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x, netdev_put(dev, &xso->dev_tracker); xso->type = XFRM_DEV_OFFLOAD_UNSPECIFIED; + xfrm_unset_type_offload(x); /* User explicitly requested packet offload mode and configured * policy in addition to the XFRM state. So be civil to users, * and return an error instead of taking fallback path. @@ -415,14 +418,12 @@ bool xfrm_dev_offload_ok(struct sk_buff *skb, struct xfrm_state *x) struct dst_entry *dst = skb_dst(skb); struct xfrm_dst *xdst = (struct xfrm_dst *)dst; struct net_device *dev = x->xso.dev; + bool check_tunnel_size; - if (!x->type_offload || - (x->xso.type == XFRM_DEV_OFFLOAD_UNSPECIFIED && x->encap)) + if (x->xso.type == XFRM_DEV_OFFLOAD_UNSPECIFIED) return false; - if (x->xso.type == XFRM_DEV_OFFLOAD_PACKET || - ((!dev || (dev == xfrm_dst_path(dst)->dev)) && - !xdst->child->xfrm)) { + if ((dev == xfrm_dst_path(dst)->dev) && !xdst->child->xfrm) { mtu = xfrm_state_mtu(x, xdst->child_mtu_cached); if (skb->len <= mtu) goto ok; @@ -434,8 +435,29 @@ bool xfrm_dev_offload_ok(struct sk_buff *skb, struct xfrm_state *x) return false; ok: - if (dev && dev->xfrmdev_ops && dev->xfrmdev_ops->xdo_dev_offload_ok) - return x->xso.dev->xfrmdev_ops->xdo_dev_offload_ok(skb, x); + check_tunnel_size = x->xso.type == XFRM_DEV_OFFLOAD_PACKET && + x->props.mode == XFRM_MODE_TUNNEL; + switch (x->props.family) { + case AF_INET: + /* Check for IPv4 options */ + if (ip_hdr(skb)->ihl != 5) + return false; + if (check_tunnel_size && xfrm4_tunnel_check_size(skb)) + return false; + break; + case AF_INET6: + /* Check for IPv6 extensions */ + if (ipv6_ext_hdr(ipv6_hdr(skb)->nexthdr)) + return false; + if (check_tunnel_size && xfrm6_tunnel_check_size(skb)) + return false; + break; + default: + break; + } + + if (dev->xfrmdev_ops->xdo_dev_offload_ok) + return dev->xfrmdev_ops->xdo_dev_offload_ok(skb, x); return true; } diff --git a/net/xfrm/xfrm_interface_core.c b/net/xfrm/xfrm_interface_core.c index c397eb99d867..622445f041d3 100644 --- a/net/xfrm/xfrm_interface_core.c +++ b/net/xfrm/xfrm_interface_core.c @@ -242,10 +242,9 @@ static void xfrmi_dev_free(struct net_device *dev) gro_cells_destroy(&xi->gro_cells); } -static int xfrmi_create(struct net_device *dev) +static int xfrmi_create(struct net *net, struct net_device *dev) { struct xfrm_if *xi = netdev_priv(dev); - struct net *net = dev_net(dev); struct xfrmi_net *xfrmn = net_generic(net, xfrmi_net_id); int err; @@ -814,15 +813,17 @@ static void xfrmi_netlink_parms(struct nlattr *data[], parms->collect_md = true; } -static int xfrmi_newlink(struct net *src_net, struct net_device *dev, - struct nlattr *tb[], struct nlattr *data[], - struct netlink_ext_ack *extack) +static int xfrmi_newlink(struct net_device *dev, + struct rtnl_newlink_params *params, + struct netlink_ext_ack *extack) { - struct net *net = dev_net(dev); + struct nlattr **data = params->data; struct xfrm_if_parms p = {}; struct xfrm_if *xi; + struct net *net; int err; + net = params->link_net ? : dev_net(dev); xfrmi_netlink_parms(data, &p); if (p.collect_md) { struct xfrmi_net *xfrmn = net_generic(net, xfrmi_net_id); @@ -851,7 +852,7 @@ static int xfrmi_newlink(struct net *src_net, struct net_device *dev, xi->net = net; xi->dev = dev; - err = xfrmi_create(dev); + err = xfrmi_create(net, dev); return err; } diff --git a/net/xfrm/xfrm_iptfs.c b/net/xfrm/xfrm_iptfs.c index 755f1eea8bfa..3b6d7284fc70 100644 --- a/net/xfrm/xfrm_iptfs.c +++ b/net/xfrm/xfrm_iptfs.c @@ -2625,12 +2625,10 @@ static void __iptfs_init_state(struct xfrm_state *x, struct xfrm_iptfs_data *xtfs) { __skb_queue_head_init(&xtfs->queue); - hrtimer_init(&xtfs->iptfs_timer, CLOCK_MONOTONIC, IPTFS_HRTIMER_MODE); - xtfs->iptfs_timer.function = iptfs_delay_timer; + hrtimer_setup(&xtfs->iptfs_timer, iptfs_delay_timer, CLOCK_MONOTONIC, IPTFS_HRTIMER_MODE); spin_lock_init(&xtfs->drop_lock); - hrtimer_init(&xtfs->drop_timer, CLOCK_MONOTONIC, IPTFS_HRTIMER_MODE); - xtfs->drop_timer.function = iptfs_drop_timer; + hrtimer_setup(&xtfs->drop_timer, iptfs_drop_timer, CLOCK_MONOTONIC, IPTFS_HRTIMER_MODE); /* Modify type (esp) adjustment values */ diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index 3cabc87978dd..9077730ff7d0 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -827,7 +827,7 @@ out: } EXPORT_SYMBOL_GPL(xfrm_output); -static int xfrm4_tunnel_check_size(struct sk_buff *skb) +int xfrm4_tunnel_check_size(struct sk_buff *skb) { int mtu, ret = 0; @@ -853,6 +853,7 @@ static int xfrm4_tunnel_check_size(struct sk_buff *skb) out: return ret; } +EXPORT_SYMBOL_GPL(xfrm4_tunnel_check_size); static int xfrm4_extract_output(struct xfrm_state *x, struct sk_buff *skb) { @@ -875,7 +876,7 @@ static int xfrm4_extract_output(struct xfrm_state *x, struct sk_buff *skb) } #if IS_ENABLED(CONFIG_IPV6) -static int xfrm6_tunnel_check_size(struct sk_buff *skb) +int xfrm6_tunnel_check_size(struct sk_buff *skb) { int mtu, ret = 0; struct dst_entry *dst = skb_dst(skb); @@ -905,6 +906,7 @@ static int xfrm6_tunnel_check_size(struct sk_buff *skb) out: return ret; } +EXPORT_SYMBOL_GPL(xfrm6_tunnel_check_size); #endif static int xfrm6_extract_output(struct xfrm_state *x, struct sk_buff *skb) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 6551e588fe52..30970d40a454 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -3294,7 +3294,7 @@ no_transform: ok: xfrm_pols_put(pols, drop_pols); - if (dst && dst->xfrm && + if (dst->xfrm && (dst->xfrm->props.mode == XFRM_MODE_TUNNEL || dst->xfrm->props.mode == XFRM_MODE_IPTFS)) dst->flags |= DST_XFRM_TUNNEL; diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index ad2202fa82f3..d896c3fefb07 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -424,18 +424,18 @@ void xfrm_unregister_type_offload(const struct xfrm_type_offload *type, } EXPORT_SYMBOL(xfrm_unregister_type_offload); -static const struct xfrm_type_offload * -xfrm_get_type_offload(u8 proto, unsigned short family, bool try_load) +void xfrm_set_type_offload(struct xfrm_state *x) { const struct xfrm_type_offload *type = NULL; struct xfrm_state_afinfo *afinfo; + bool try_load = true; retry: - afinfo = xfrm_state_get_afinfo(family); + afinfo = xfrm_state_get_afinfo(x->props.family); if (unlikely(afinfo == NULL)) - return NULL; + goto out; - switch (proto) { + switch (x->id.proto) { case IPPROTO_ESP: type = afinfo->type_offload_esp; break; @@ -449,18 +449,16 @@ retry: rcu_read_unlock(); if (!type && try_load) { - request_module("xfrm-offload-%d-%d", family, proto); + request_module("xfrm-offload-%d-%d", x->props.family, + x->id.proto); try_load = false; goto retry; } - return type; -} - -static void xfrm_put_type_offload(const struct xfrm_type_offload *type) -{ - module_put(type->owner); +out: + x->type_offload = type; } +EXPORT_SYMBOL(xfrm_set_type_offload); static const struct xfrm_mode xfrm4_mode_map[XFRM_MODE_MAX] = { [XFRM_MODE_BEET] = { @@ -609,8 +607,6 @@ static void ___xfrm_state_destroy(struct xfrm_state *x) kfree(x->coaddr); kfree(x->replay_esn); kfree(x->preplay_esn); - if (x->type_offload) - xfrm_put_type_offload(x->type_offload); if (x->type) { x->type->destructor(x); xfrm_put_type(x->type); @@ -746,8 +742,8 @@ struct xfrm_state *xfrm_state_alloc(struct net *net) INIT_HLIST_NODE(&x->bysrc); INIT_HLIST_NODE(&x->byspi); INIT_HLIST_NODE(&x->byseq); - hrtimer_init(&x->mtimer, CLOCK_BOOTTIME, HRTIMER_MODE_ABS_SOFT); - x->mtimer.function = xfrm_timer_handler; + hrtimer_setup(&x->mtimer, xfrm_timer_handler, CLOCK_BOOTTIME, + HRTIMER_MODE_ABS_SOFT); timer_setup(&x->rtimer, xfrm_replay_timer_handler, 0); x->curlft.add_time = ktime_get_real_seconds(); x->lft.soft_byte_limit = XFRM_INF; @@ -784,6 +780,8 @@ void xfrm_dev_state_free(struct xfrm_state *x) struct xfrm_dev_offload *xso = &x->xso; struct net_device *dev = READ_ONCE(xso->dev); + xfrm_unset_type_offload(x); + if (dev && dev->xfrmdev_ops) { spin_lock_bh(&xfrm_state_dev_gc_lock); if (!hlist_unhashed(&x->dev_gclist)) @@ -2315,12 +2313,12 @@ xfrm_state_lookup_byaddr(struct net *net, u32 mark, struct xfrm_hash_state_ptrs state_ptrs; struct xfrm_state *x; - spin_lock_bh(&net->xfrm.xfrm_state_lock); + rcu_read_lock(); xfrm_hash_ptrs_get(net, &state_ptrs); x = __xfrm_state_lookup_byaddr(&state_ptrs, mark, daddr, saddr, proto, family); - spin_unlock_bh(&net->xfrm.xfrm_state_lock); + rcu_read_unlock(); return x; } EXPORT_SYMBOL(xfrm_state_lookup_byaddr); @@ -3122,8 +3120,7 @@ u32 xfrm_state_mtu(struct xfrm_state *x, int mtu) } EXPORT_SYMBOL_GPL(xfrm_state_mtu); -int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload, - struct netlink_ext_ack *extack) +int __xfrm_init_state(struct xfrm_state *x, struct netlink_ext_ack *extack) { const struct xfrm_mode *inner_mode; const struct xfrm_mode *outer_mode; @@ -3178,8 +3175,6 @@ int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload, goto error; } - x->type_offload = xfrm_get_type_offload(x->id.proto, family, offload); - err = x->type->init_state(x, extack); if (err) goto error; @@ -3192,12 +3187,6 @@ int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload, } x->outer_mode = *outer_mode; - if (init_replay) { - err = xfrm_init_replay(x, extack); - if (err) - goto error; - } - if (x->nat_keepalive_interval) { if (x->dir != XFRM_SA_DIR_OUT) { NL_SET_ERR_MSG(extack, "NAT keepalive is only supported for outbound SAs"); @@ -3229,11 +3218,16 @@ int xfrm_init_state(struct xfrm_state *x) { int err; - err = __xfrm_init_state(x, true, false, NULL); - if (!err) - x->km.state = XFRM_STATE_VALID; + err = __xfrm_init_state(x, NULL); + if (err) + return err; - return err; + err = xfrm_init_replay(x, NULL); + if (err) + return err; + + x->km.state = XFRM_STATE_VALID; + return 0; } EXPORT_SYMBOL(xfrm_init_state); diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 08c6d6f0179f..784a2d124749 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -178,6 +178,12 @@ static inline int verify_replay(struct xfrm_usersa_info *p, "Replay seq and seq_hi should be 0 for output SA"); return -EINVAL; } + if (rs->oseq_hi && !(p->flags & XFRM_STATE_ESN)) { + NL_SET_ERR_MSG( + extack, + "Replay oseq_hi should be 0 in non-ESN mode for output SA"); + return -EINVAL; + } if (rs->bmp_len) { NL_SET_ERR_MSG(extack, "Replay bmp_len should 0 for output SA"); return -EINVAL; @@ -190,6 +196,12 @@ static inline int verify_replay(struct xfrm_usersa_info *p, "Replay oseq and oseq_hi should be 0 for input SA"); return -EINVAL; } + if (rs->seq_hi && !(p->flags & XFRM_STATE_ESN)) { + NL_SET_ERR_MSG( + extack, + "Replay seq_hi should be 0 in non-ESN mode for input SA"); + return -EINVAL; + } } return 0; @@ -907,7 +919,7 @@ static struct xfrm_state *xfrm_state_construct(struct net *net, goto error; } - err = __xfrm_init_state(x, false, attrs[XFRMA_OFFLOAD_DEV], extack); + err = __xfrm_init_state(x, extack); if (err) goto error; |