diff options
Diffstat (limited to 'net')
99 files changed, 906 insertions, 455 deletions
diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c index 9a47ef8b95c4..1f1f5b0873b2 100644 --- a/net/batman-adv/fragmentation.c +++ b/net/batman-adv/fragmentation.c @@ -391,6 +391,7 @@ out: /** * batadv_frag_create() - create a fragment from skb + * @net_dev: outgoing device for fragment * @skb: skb to create fragment from * @frag_head: header to use in new fragment * @fragment_size: size of new fragment @@ -401,22 +402,25 @@ out: * * Return: the new fragment, NULL on error. */ -static struct sk_buff *batadv_frag_create(struct sk_buff *skb, +static struct sk_buff *batadv_frag_create(struct net_device *net_dev, + struct sk_buff *skb, struct batadv_frag_packet *frag_head, unsigned int fragment_size) { + unsigned int ll_reserved = LL_RESERVED_SPACE(net_dev); + unsigned int tailroom = net_dev->needed_tailroom; struct sk_buff *skb_fragment; unsigned int header_size = sizeof(*frag_head); unsigned int mtu = fragment_size + header_size; - skb_fragment = netdev_alloc_skb(NULL, mtu + ETH_HLEN); + skb_fragment = dev_alloc_skb(ll_reserved + mtu + tailroom); if (!skb_fragment) goto err; skb_fragment->priority = skb->priority; /* Eat the last mtu-bytes of the skb */ - skb_reserve(skb_fragment, header_size + ETH_HLEN); + skb_reserve(skb_fragment, ll_reserved + header_size); skb_split(skb, skb_fragment, skb->len - fragment_size); /* Add the header */ @@ -439,11 +443,12 @@ int batadv_frag_send_packet(struct sk_buff *skb, struct batadv_orig_node *orig_node, struct batadv_neigh_node *neigh_node) { + struct net_device *net_dev = neigh_node->if_incoming->net_dev; struct batadv_priv *bat_priv; struct batadv_hard_iface *primary_if = NULL; struct batadv_frag_packet frag_header; struct sk_buff *skb_fragment; - unsigned int mtu = neigh_node->if_incoming->net_dev->mtu; + unsigned int mtu = net_dev->mtu; unsigned int header_size = sizeof(frag_header); unsigned int max_fragment_size, num_fragments; int ret; @@ -503,7 +508,7 @@ int batadv_frag_send_packet(struct sk_buff *skb, goto put_primary_if; } - skb_fragment = batadv_frag_create(skb, &frag_header, + skb_fragment = batadv_frag_create(net_dev, skb, &frag_header, max_fragment_size); if (!skb_fragment) { ret = -ENOMEM; @@ -522,13 +527,14 @@ int batadv_frag_send_packet(struct sk_buff *skb, frag_header.no++; } - /* Make room for the fragment header. */ - if (batadv_skb_head_push(skb, header_size) < 0 || - pskb_expand_head(skb, header_size + ETH_HLEN, 0, GFP_ATOMIC) < 0) { - ret = -ENOMEM; + /* make sure that there is at least enough head for the fragmentation + * and ethernet headers + */ + ret = skb_cow_head(skb, ETH_HLEN + header_size); + if (ret < 0) goto put_primary_if; - } + skb_push(skb, header_size); memcpy(skb->data, &frag_header, header_size); /* Send the last fragment */ diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index dad99641df2a..33904595fc56 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -554,6 +554,9 @@ static void batadv_hardif_recalc_extra_skbroom(struct net_device *soft_iface) needed_headroom = lower_headroom + (lower_header_len - ETH_HLEN); needed_headroom += batadv_max_header_len(); + /* fragmentation headers don't strip the unicast/... header */ + needed_headroom += sizeof(struct batadv_frag_packet); + soft_iface->needed_headroom = needed_headroom; soft_iface->needed_tailroom = lower_tailroom; } diff --git a/net/batman-adv/log.c b/net/batman-adv/log.c index a67b2b091447..c0ca5fbe5b08 100644 --- a/net/batman-adv/log.c +++ b/net/batman-adv/log.c @@ -180,6 +180,7 @@ static const struct file_operations batadv_log_fops = { .read = batadv_log_read, .poll = batadv_log_poll, .llseek = no_llseek, + .owner = THIS_MODULE, }; /** diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 6f742fee874a..d3ea9d0779fb 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -177,6 +177,9 @@ static int br_dev_open(struct net_device *dev) br_stp_enable_bridge(br); br_multicast_open(br); + if (br_opt_get(br, BROPT_MULTICAST_ENABLED)) + br_multicast_join_snoopers(br); + return 0; } @@ -197,6 +200,9 @@ static int br_dev_stop(struct net_device *dev) br_stp_disable_bridge(br); br_multicast_stop(br); + if (br_opt_get(br, BROPT_MULTICAST_ENABLED)) + br_multicast_leave_snoopers(br); + netif_stop_queue(dev); return 0; @@ -207,6 +213,7 @@ static void br_get_stats64(struct net_device *dev, { struct net_bridge *br = netdev_priv(dev); + netdev_stats_to_stats64(stats, &dev->stats); dev_fetch_sw_netstats(stats, br->stats); } diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index eae898c3cff7..54cb82a69056 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -3286,7 +3286,7 @@ static inline void br_ip6_multicast_join_snoopers(struct net_bridge *br) } #endif -static void br_multicast_join_snoopers(struct net_bridge *br) +void br_multicast_join_snoopers(struct net_bridge *br) { br_ip4_multicast_join_snoopers(br); br_ip6_multicast_join_snoopers(br); @@ -3317,7 +3317,7 @@ static inline void br_ip6_multicast_leave_snoopers(struct net_bridge *br) } #endif -static void br_multicast_leave_snoopers(struct net_bridge *br) +void br_multicast_leave_snoopers(struct net_bridge *br) { br_ip4_multicast_leave_snoopers(br); br_ip6_multicast_leave_snoopers(br); @@ -3336,9 +3336,6 @@ static void __br_multicast_open(struct net_bridge *br, void br_multicast_open(struct net_bridge *br) { - if (br_opt_get(br, BROPT_MULTICAST_ENABLED)) - br_multicast_join_snoopers(br); - __br_multicast_open(br, &br->ip4_own_query); #if IS_ENABLED(CONFIG_IPV6) __br_multicast_open(br, &br->ip6_own_query); @@ -3354,9 +3351,6 @@ void br_multicast_stop(struct net_bridge *br) del_timer_sync(&br->ip6_other_query.timer); del_timer_sync(&br->ip6_own_query.timer); #endif - - if (br_opt_get(br, BROPT_MULTICAST_ENABLED)) - br_multicast_leave_snoopers(br); } void br_multicast_dev_del(struct net_bridge *br) @@ -3487,6 +3481,7 @@ static void br_multicast_start_querier(struct net_bridge *br, int br_multicast_toggle(struct net_bridge *br, unsigned long val) { struct net_bridge_port *port; + bool change_snoopers = false; spin_lock_bh(&br->multicast_lock); if (!!br_opt_get(br, BROPT_MULTICAST_ENABLED) == !!val) @@ -3495,7 +3490,7 @@ int br_multicast_toggle(struct net_bridge *br, unsigned long val) br_mc_disabled_update(br->dev, val); br_opt_toggle(br, BROPT_MULTICAST_ENABLED, !!val); if (!br_opt_get(br, BROPT_MULTICAST_ENABLED)) { - br_multicast_leave_snoopers(br); + change_snoopers = true; goto unlock; } @@ -3506,9 +3501,30 @@ int br_multicast_toggle(struct net_bridge *br, unsigned long val) list_for_each_entry(port, &br->port_list, list) __br_multicast_enable_port(port); + change_snoopers = true; + unlock: spin_unlock_bh(&br->multicast_lock); + /* br_multicast_join_snoopers has the potential to cause + * an MLD Report/Leave to be delivered to br_multicast_rcv, + * which would in turn call br_multicast_add_group, which would + * attempt to acquire multicast_lock. This function should be + * called after the lock has been released to avoid deadlocks on + * multicast_lock. + * + * br_multicast_leave_snoopers does not have the problem since + * br_multicast_rcv first checks BROPT_MULTICAST_ENABLED, and + * returns without calling br_multicast_ipv4/6_rcv if it's not + * enabled. Moved both functions out just for symmetry. + */ + if (change_snoopers) { + if (br_opt_get(br, BROPT_MULTICAST_ENABLED)) + br_multicast_join_snoopers(br); + else + br_multicast_leave_snoopers(br); + } + return 0; } diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index 04c3f9a82650..8edfb98ae1d5 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -735,6 +735,11 @@ static int br_nf_dev_queue_xmit(struct net *net, struct sock *sk, struct sk_buff mtu_reserved = nf_bridge_mtu_reduction(skb); mtu = skb->dev->mtu; + if (nf_bridge->pkt_otherhost) { + skb->pkt_type = PACKET_OTHERHOST; + nf_bridge->pkt_otherhost = false; + } + if (nf_bridge->frag_max_size && nf_bridge->frag_max_size < mtu) mtu = nf_bridge->frag_max_size; @@ -835,8 +840,6 @@ static unsigned int br_nf_post_routing(void *priv, else return NF_ACCEPT; - /* We assume any code from br_dev_queue_push_xmit onwards doesn't care - * about the value of skb->pkt_type. */ if (skb->pkt_type == PACKET_OTHERHOST) { skb->pkt_type = PACKET_HOST; nf_bridge->pkt_otherhost = true; diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 345118e35c42..8424464186a6 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -792,6 +792,8 @@ void br_multicast_del_port(struct net_bridge_port *port); void br_multicast_enable_port(struct net_bridge_port *port); void br_multicast_disable_port(struct net_bridge_port *port); void br_multicast_init(struct net_bridge *br); +void br_multicast_join_snoopers(struct net_bridge *br); +void br_multicast_leave_snoopers(struct net_bridge *br); void br_multicast_open(struct net_bridge *br); void br_multicast_stop(struct net_bridge *br); void br_multicast_dev_del(struct net_bridge *br); @@ -969,6 +971,14 @@ static inline void br_multicast_init(struct net_bridge *br) { } +static inline void br_multicast_join_snoopers(struct net_bridge *br) +{ +} + +static inline void br_multicast_leave_snoopers(struct net_bridge *br) +{ +} + static inline void br_multicast_open(struct net_bridge *br) { } diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index 3e493eb85bb2..08c77418c687 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -266,8 +266,10 @@ static int __vlan_add(struct net_bridge_vlan *v, u16 flags, } masterv = br_vlan_get_master(br, v->vid, extack); - if (!masterv) + if (!masterv) { + err = -ENOMEM; goto out_filt; + } v->brvlan = masterv; if (br_opt_get(br, BROPT_VLAN_STATS_PER_PORT)) { v->stats = netdev_alloc_pcpu_stats(struct br_vlan_stats); diff --git a/net/can/af_can.c b/net/can/af_can.c index 6373ab9c5507..4c343b43067f 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -541,10 +541,13 @@ void can_rx_unregister(struct net *net, struct net_device *dev, canid_t can_id, /* Check for bugs in CAN protocol implementations using af_can.c: * 'rcv' will be NULL if no matching list item was found for removal. + * As this case may potentially happen when closing a socket while + * the notifier for removing the CAN netdev is running we just print + * a warning here. */ if (!rcv) { - WARN(1, "BUG: receive list entry not found for dev %s, id %03X, mask %03X\n", - DNAME(dev), can_id, mask); + pr_warn("can: receive list entry not found for dev %s, id %03X, mask %03X\n", + DNAME(dev), can_id, mask); goto out; } @@ -677,16 +680,25 @@ static int can_rcv(struct sk_buff *skb, struct net_device *dev, { struct canfd_frame *cfd = (struct canfd_frame *)skb->data; - if (unlikely(dev->type != ARPHRD_CAN || skb->len != CAN_MTU || - cfd->len > CAN_MAX_DLEN)) { - pr_warn_once("PF_CAN: dropped non conform CAN skbuf: dev type %d, len %d, datalen %d\n", + if (unlikely(dev->type != ARPHRD_CAN || skb->len != CAN_MTU)) { + pr_warn_once("PF_CAN: dropped non conform CAN skbuff: dev type %d, len %d\n", + dev->type, skb->len); + goto free_skb; + } + + /* This check is made separately since cfd->len would be uninitialized if skb->len = 0. */ + if (unlikely(cfd->len > CAN_MAX_DLEN)) { + pr_warn_once("PF_CAN: dropped non conform CAN skbuff: dev type %d, len %d, datalen %d\n", dev->type, skb->len, cfd->len); - kfree_skb(skb); - return NET_RX_DROP; + goto free_skb; } can_receive(skb, dev); return NET_RX_SUCCESS; + +free_skb: + kfree_skb(skb); + return NET_RX_DROP; } static int canfd_rcv(struct sk_buff *skb, struct net_device *dev, @@ -694,16 +706,25 @@ static int canfd_rcv(struct sk_buff *skb, struct net_device *dev, { struct canfd_frame *cfd = (struct canfd_frame *)skb->data; - if (unlikely(dev->type != ARPHRD_CAN || skb->len != CANFD_MTU || - cfd->len > CANFD_MAX_DLEN)) { - pr_warn_once("PF_CAN: dropped non conform CAN FD skbuf: dev type %d, len %d, datalen %d\n", + if (unlikely(dev->type != ARPHRD_CAN || skb->len != CANFD_MTU)) { + pr_warn_once("PF_CAN: dropped non conform CAN FD skbuff: dev type %d, len %d\n", + dev->type, skb->len); + goto free_skb; + } + + /* This check is made separately since cfd->len would be uninitialized if skb->len = 0. */ + if (unlikely(cfd->len > CANFD_MAX_DLEN)) { + pr_warn_once("PF_CAN: dropped non conform CAN FD skbuff: dev type %d, len %d, datalen %d\n", dev->type, skb->len, cfd->len); - kfree_skb(skb); - return NET_RX_DROP; + goto free_skb; } can_receive(skb, dev); return NET_RX_SUCCESS; + +free_skb: + kfree_skb(skb); + return NET_RX_DROP; } /* af_can protocol functions */ diff --git a/net/can/isotp.c b/net/can/isotp.c index d78ab13bd8be..26bdc3c20b7e 100644 --- a/net/can/isotp.c +++ b/net/can/isotp.c @@ -1157,6 +1157,9 @@ static int isotp_setsockopt(struct socket *sock, int level, int optname, if (level != SOL_CAN_ISOTP) return -EINVAL; + if (so->bound) + return -EISCONN; + switch (optname) { case CAN_ISOTP_OPTS: if (optlen != sizeof(struct can_isotp_options)) diff --git a/net/core/dev.c b/net/core/dev.c index 82dc6b48e45f..38412e70f761 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4180,7 +4180,7 @@ int dev_queue_xmit_accel(struct sk_buff *skb, struct net_device *sb_dev) } EXPORT_SYMBOL(dev_queue_xmit_accel); -int dev_direct_xmit(struct sk_buff *skb, u16 queue_id) +int __dev_direct_xmit(struct sk_buff *skb, u16 queue_id) { struct net_device *dev = skb->dev; struct sk_buff *orig_skb = skb; @@ -4210,17 +4210,13 @@ int dev_direct_xmit(struct sk_buff *skb, u16 queue_id) dev_xmit_recursion_dec(); local_bh_enable(); - - if (!dev_xmit_complete(ret)) - kfree_skb(skb); - return ret; drop: atomic_long_inc(&dev->tx_dropped); kfree_skb_list(skb); return NET_XMIT_DROP; } -EXPORT_SYMBOL(dev_direct_xmit); +EXPORT_SYMBOL(__dev_direct_xmit); /************************************************************************* * Receiver routines @@ -8921,6 +8917,17 @@ static struct bpf_prog *dev_xdp_prog(struct net_device *dev, return dev->xdp_state[mode].prog; } +static u8 dev_xdp_prog_count(struct net_device *dev) +{ + u8 count = 0; + int i; + + for (i = 0; i < __MAX_XDP_MODE; i++) + if (dev->xdp_state[i].prog || dev->xdp_state[i].link) + count++; + return count; +} + u32 dev_xdp_prog_id(struct net_device *dev, enum bpf_xdp_mode mode) { struct bpf_prog *prog = dev_xdp_prog(dev, mode); @@ -9011,6 +9018,7 @@ static int dev_xdp_attach(struct net_device *dev, struct netlink_ext_ack *extack struct bpf_xdp_link *link, struct bpf_prog *new_prog, struct bpf_prog *old_prog, u32 flags) { + unsigned int num_modes = hweight32(flags & XDP_FLAGS_MODES); struct bpf_prog *cur_prog; enum bpf_xdp_mode mode; bpf_op_t bpf_op; @@ -9026,11 +9034,17 @@ static int dev_xdp_attach(struct net_device *dev, struct netlink_ext_ack *extack NL_SET_ERR_MSG(extack, "Invalid XDP flags for BPF link attachment"); return -EINVAL; } - /* just one XDP mode bit should be set, zero defaults to SKB mode */ - if (hweight32(flags & XDP_FLAGS_MODES) > 1) { + /* just one XDP mode bit should be set, zero defaults to drv/skb mode */ + if (num_modes > 1) { NL_SET_ERR_MSG(extack, "Only one XDP mode flag can be set"); return -EINVAL; } + /* avoid ambiguity if offload + drv/skb mode progs are both loaded */ + if (!num_modes && dev_xdp_prog_count(dev) > 1) { + NL_SET_ERR_MSG(extack, + "More than one program loaded, unset mode is ambiguous"); + return -EINVAL; + } /* old_prog != NULL implies XDP_FLAGS_REPLACE is set */ if (old_prog && !(flags & XDP_FLAGS_REPLACE)) { NL_SET_ERR_MSG(extack, "XDP_FLAGS_REPLACE is not specified"); diff --git a/net/core/devlink.c b/net/core/devlink.c index ab4b1368904f..8c5ddffd707d 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -517,7 +517,7 @@ devlink_reload_limit_is_supported(struct devlink *devlink, enum devlink_reload_l return test_bit(limit, &devlink->ops->reload_limits); } -static int devlink_reload_stat_put(struct sk_buff *msg, enum devlink_reload_action action, +static int devlink_reload_stat_put(struct sk_buff *msg, enum devlink_reload_limit limit, u32 value) { struct nlattr *reload_stats_entry; @@ -526,8 +526,7 @@ static int devlink_reload_stat_put(struct sk_buff *msg, enum devlink_reload_acti if (!reload_stats_entry) return -EMSGSIZE; - if (nla_put_u8(msg, DEVLINK_ATTR_RELOAD_ACTION, action) || - nla_put_u8(msg, DEVLINK_ATTR_RELOAD_STATS_LIMIT, limit) || + if (nla_put_u8(msg, DEVLINK_ATTR_RELOAD_STATS_LIMIT, limit) || nla_put_u32(msg, DEVLINK_ATTR_RELOAD_STATS_VALUE, value)) goto nla_put_failure; nla_nest_end(msg, reload_stats_entry); @@ -540,7 +539,7 @@ nla_put_failure: static int devlink_reload_stats_put(struct sk_buff *msg, struct devlink *devlink, bool is_remote) { - struct nlattr *reload_stats_attr; + struct nlattr *reload_stats_attr, *act_info, *act_stats; int i, j, stat_idx; u32 value; @@ -552,17 +551,29 @@ static int devlink_reload_stats_put(struct sk_buff *msg, struct devlink *devlink if (!reload_stats_attr) return -EMSGSIZE; - for (j = 0; j <= DEVLINK_RELOAD_LIMIT_MAX; j++) { - /* Remote stats are shown even if not locally supported. Stats - * of actions with unspecified limit are shown though drivers - * don't need to register unspecified limit. - */ - if (!is_remote && j != DEVLINK_RELOAD_LIMIT_UNSPEC && - !devlink_reload_limit_is_supported(devlink, j)) + for (i = 0; i <= DEVLINK_RELOAD_ACTION_MAX; i++) { + if ((!is_remote && + !devlink_reload_action_is_supported(devlink, i)) || + i == DEVLINK_RELOAD_ACTION_UNSPEC) continue; - for (i = 0; i <= DEVLINK_RELOAD_ACTION_MAX; i++) { - if ((!is_remote && !devlink_reload_action_is_supported(devlink, i)) || - i == DEVLINK_RELOAD_ACTION_UNSPEC || + act_info = nla_nest_start(msg, DEVLINK_ATTR_RELOAD_ACTION_INFO); + if (!act_info) + goto nla_put_failure; + + if (nla_put_u8(msg, DEVLINK_ATTR_RELOAD_ACTION, i)) + goto action_info_nest_cancel; + act_stats = nla_nest_start(msg, DEVLINK_ATTR_RELOAD_ACTION_STATS); + if (!act_stats) + goto action_info_nest_cancel; + + for (j = 0; j <= DEVLINK_RELOAD_LIMIT_MAX; j++) { + /* Remote stats are shown even if not locally supported. + * Stats of actions with unspecified limit are shown + * though drivers don't need to register unspecified + * limit. + */ + if ((!is_remote && j != DEVLINK_RELOAD_LIMIT_UNSPEC && + !devlink_reload_limit_is_supported(devlink, j)) || devlink_reload_combination_is_invalid(i, j)) continue; @@ -571,13 +582,19 @@ static int devlink_reload_stats_put(struct sk_buff *msg, struct devlink *devlink value = devlink->stats.reload_stats[stat_idx]; else value = devlink->stats.remote_reload_stats[stat_idx]; - if (devlink_reload_stat_put(msg, i, j, value)) - goto nla_put_failure; + if (devlink_reload_stat_put(msg, j, value)) + goto action_stats_nest_cancel; } + nla_nest_end(msg, act_stats); + nla_nest_end(msg, act_info); } nla_nest_end(msg, reload_stats_attr); return 0; +action_stats_nest_cancel: + nla_nest_cancel(msg, act_stats); +action_info_nest_cancel: + nla_nest_cancel(msg, act_info); nla_put_failure: nla_nest_cancel(msg, reload_stats_attr); return -EMSGSIZE; @@ -755,6 +772,8 @@ static int devlink_nl_port_fill(struct sk_buff *msg, struct devlink *devlink, if (nla_put_u32(msg, DEVLINK_ATTR_PORT_INDEX, devlink_port->index)) goto nla_put_failure; + /* Hold rtnl lock while accessing port's netdev attributes. */ + rtnl_lock(); spin_lock_bh(&devlink_port->type_lock); if (nla_put_u16(msg, DEVLINK_ATTR_PORT_TYPE, devlink_port->type)) goto nla_put_failure_type_locked; @@ -763,9 +782,10 @@ static int devlink_nl_port_fill(struct sk_buff *msg, struct devlink *devlink, devlink_port->desired_type)) goto nla_put_failure_type_locked; if (devlink_port->type == DEVLINK_PORT_TYPE_ETH) { + struct net *net = devlink_net(devlink_port->devlink); struct net_device *netdev = devlink_port->type_dev; - if (netdev && + if (netdev && net_eq(net, dev_net(netdev)) && (nla_put_u32(msg, DEVLINK_ATTR_PORT_NETDEV_IFINDEX, netdev->ifindex) || nla_put_string(msg, DEVLINK_ATTR_PORT_NETDEV_NAME, @@ -781,6 +801,7 @@ static int devlink_nl_port_fill(struct sk_buff *msg, struct devlink *devlink, goto nla_put_failure_type_locked; } spin_unlock_bh(&devlink_port->type_lock); + rtnl_unlock(); if (devlink_nl_port_attrs_put(msg, devlink_port)) goto nla_put_failure; if (devlink_nl_port_function_attrs_put(msg, devlink_port, extack)) @@ -791,6 +812,7 @@ static int devlink_nl_port_fill(struct sk_buff *msg, struct devlink *devlink, nla_put_failure_type_locked: spin_unlock_bh(&devlink_port->type_lock); + rtnl_unlock(); nla_put_failure: genlmsg_cancel(msg, hdr); return -EMSGSIZE; @@ -1448,7 +1470,7 @@ static int devlink_nl_sb_port_pool_fill(struct sk_buff *msg, err = ops->sb_occ_port_pool_get(devlink_port, devlink_sb->index, pool_index, &cur, &max); if (err && err != -EOPNOTSUPP) - return err; + goto sb_occ_get_failure; if (!err) { if (nla_put_u32(msg, DEVLINK_ATTR_SB_OCC_CUR, cur)) goto nla_put_failure; @@ -1461,8 +1483,10 @@ static int devlink_nl_sb_port_pool_fill(struct sk_buff *msg, return 0; nla_put_failure: + err = -EMSGSIZE; +sb_occ_get_failure: genlmsg_cancel(msg, hdr); - return -EMSGSIZE; + return err; } static int devlink_nl_cmd_sb_port_pool_get_doit(struct sk_buff *skb, diff --git a/net/core/flow_offload.c b/net/core/flow_offload.c index d4474c812b64..715b67f6c62f 100644 --- a/net/core/flow_offload.c +++ b/net/core/flow_offload.c @@ -381,10 +381,8 @@ static void __flow_block_indr_cleanup(void (*release)(void *cb_priv), list_for_each_entry_safe(this, next, &flow_block_indr_list, indr.list) { if (this->release == release && - this->indr.cb_priv == cb_priv) { + this->indr.cb_priv == cb_priv) list_move(&this->indr.list, cleanup_list); - return; - } } } diff --git a/net/core/gro_cells.c b/net/core/gro_cells.c index e095fb871d91..6eb2e5ec2c50 100644 --- a/net/core/gro_cells.c +++ b/net/core/gro_cells.c @@ -99,9 +99,14 @@ void gro_cells_destroy(struct gro_cells *gcells) struct gro_cell *cell = per_cpu_ptr(gcells->cells, i); napi_disable(&cell->napi); - netif_napi_del(&cell->napi); + __netif_napi_del(&cell->napi); __skb_queue_purge(&cell->napi_skbs); } + /* This barrier is needed because netpoll could access dev->napi_list + * under rcu protection. + */ + synchronize_net(); + free_percpu(gcells->cells); gcells->cells = NULL; } diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c index 7d3438215f32..2f7940bcf715 100644 --- a/net/core/lwt_bpf.c +++ b/net/core/lwt_bpf.c @@ -39,12 +39,11 @@ static int run_lwt_bpf(struct sk_buff *skb, struct bpf_lwt_prog *lwt, { int ret; - /* Preempt disable is needed to protect per-cpu redirect_info between - * BPF prog and skb_do_redirect(). The call_rcu in bpf_prog_put() and - * access to maps strictly require a rcu_read_lock() for protection, - * mixing with BH RCU lock doesn't work. + /* Migration disable and BH disable are needed to protect per-cpu + * redirect_info between BPF prog and skb_do_redirect(). */ - preempt_disable(); + migrate_disable(); + local_bh_disable(); bpf_compute_data_pointers(skb); ret = bpf_prog_run_save_cb(lwt->prog, skb); @@ -78,7 +77,8 @@ static int run_lwt_bpf(struct sk_buff *skb, struct bpf_lwt_prog *lwt, break; } - preempt_enable(); + local_bh_enable(); + migrate_enable(); return ret; } diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 8e39e28b0a8d..9500d28a43b0 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -235,6 +235,8 @@ static int neigh_forced_gc(struct neigh_table *tbl) write_lock(&n->lock); if ((n->nud_state == NUD_FAILED) || + (tbl->is_multicast && + tbl->is_multicast(n->primary_key)) || time_after(tref, n->updated)) remove = true; write_unlock(&n->lock); diff --git a/net/core/netpoll.c b/net/core/netpoll.c index c310c7c1cef7..960948290001 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -29,6 +29,7 @@ #include <linux/slab.h> #include <linux/export.h> #include <linux/if_vlan.h> +#include <net/dsa.h> #include <net/tcp.h> #include <net/udp.h> #include <net/addrconf.h> @@ -657,15 +658,15 @@ EXPORT_SYMBOL_GPL(__netpoll_setup); int netpoll_setup(struct netpoll *np) { - struct net_device *ndev = NULL; + struct net_device *ndev = NULL, *dev = NULL; + struct net *net = current->nsproxy->net_ns; struct in_device *in_dev; int err; rtnl_lock(); - if (np->dev_name[0]) { - struct net *net = current->nsproxy->net_ns; + if (np->dev_name[0]) ndev = __dev_get_by_name(net, np->dev_name); - } + if (!ndev) { np_err(np, "%s doesn't exist, aborting\n", np->dev_name); err = -ENODEV; @@ -673,6 +674,19 @@ int netpoll_setup(struct netpoll *np) } dev_hold(ndev); + /* bring up DSA management network devices up first */ + for_each_netdev(net, dev) { + if (!netdev_uses_dsa(dev)) + continue; + + err = dev_change_flags(dev, dev->flags | IFF_UP, NULL); + if (err < 0) { + np_err(np, "%s failed to open %s\n", + np->dev_name, dev->name); + goto put; + } + } + if (netdev_master_upper_dev_get(ndev)) { np_err(np, "%s is a slave device, aborting\n", np->dev_name); err = -EBUSY; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 1ba8f0163744..e578544b2cc7 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -4549,7 +4549,7 @@ struct sk_buff *sock_dequeue_err_skb(struct sock *sk) if (skb && (skb_next = skb_peek(q))) { icmp_next = is_icmp_err_skb(skb_next); if (icmp_next) - sk->sk_err = SKB_EXT_ERR(skb_next)->ee.ee_origin; + sk->sk_err = SKB_EXT_ERR(skb_next)->ee.ee_errno; } spin_unlock_irqrestore(&q->lock, flags); @@ -5786,6 +5786,9 @@ int skb_mpls_dec_ttl(struct sk_buff *skb) if (unlikely(!eth_p_mpls(skb->protocol))) return -EINVAL; + if (!pskb_may_pull(skb, skb_network_offset(skb) + MPLS_HLEN)) + return -ENOMEM; + lse = be32_to_cpu(mpls_hdr(skb)->label_stack_entry); ttl = (lse & MPLS_LS_TTL_MASK) >> MPLS_LS_TTL_SHIFT; if (!--ttl) diff --git a/net/core/skmsg.c b/net/core/skmsg.c index 654182ecf87b..25cdbb20f3a0 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -170,10 +170,12 @@ static int sk_msg_free_elem(struct sock *sk, struct sk_msg *msg, u32 i, struct scatterlist *sge = sk_msg_elem(msg, i); u32 len = sge->length; - if (charge) - sk_mem_uncharge(sk, len); - if (!msg->skb) + /* When the skb owns the memory we free it from consume_skb path. */ + if (!msg->skb) { + if (charge) + sk_mem_uncharge(sk, len); put_page(sg_page(sge)); + } memset(sge, 0, sizeof(*sge)); return len; } @@ -397,28 +399,45 @@ out: } EXPORT_SYMBOL_GPL(sk_msg_memcopy_from_iter); -static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb) +static struct sk_msg *sk_psock_create_ingress_msg(struct sock *sk, + struct sk_buff *skb) { - struct sock *sk = psock->sk; - int copied = 0, num_sge; struct sk_msg *msg; + if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf) + return NULL; + + if (!sk_rmem_schedule(sk, skb, skb->truesize)) + return NULL; + msg = kzalloc(sizeof(*msg), __GFP_NOWARN | GFP_ATOMIC); if (unlikely(!msg)) - return -EAGAIN; - if (!sk_rmem_schedule(sk, skb, skb->len)) { - kfree(msg); - return -EAGAIN; - } + return NULL; sk_msg_init(msg); + return msg; +} + +static int sk_psock_skb_ingress_enqueue(struct sk_buff *skb, + struct sk_psock *psock, + struct sock *sk, + struct sk_msg *msg) +{ + int num_sge, copied; + + /* skb linearize may fail with ENOMEM, but lets simply try again + * later if this happens. Under memory pressure we don't want to + * drop the skb. We need to linearize the skb so that the mapping + * in skb_to_sgvec can not error. + */ + if (skb_linearize(skb)) + return -EAGAIN; num_sge = skb_to_sgvec(skb, msg->sg.data, 0, skb->len); if (unlikely(num_sge < 0)) { kfree(msg); return num_sge; } - sk_mem_charge(sk, skb->len); copied = skb->len; msg->sg.start = 0; msg->sg.size = copied; @@ -430,6 +449,48 @@ static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb) return copied; } +static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb); + +static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb) +{ + struct sock *sk = psock->sk; + struct sk_msg *msg; + + /* If we are receiving on the same sock skb->sk is already assigned, + * skip memory accounting and owner transition seeing it already set + * correctly. + */ + if (unlikely(skb->sk == sk)) + return sk_psock_skb_ingress_self(psock, skb); + msg = sk_psock_create_ingress_msg(sk, skb); + if (!msg) + return -EAGAIN; + + /* This will transition ownership of the data from the socket where + * the BPF program was run initiating the redirect to the socket + * we will eventually receive this data on. The data will be released + * from skb_consume found in __tcp_bpf_recvmsg() after its been copied + * into user buffers. + */ + skb_set_owner_r(skb, sk); + return sk_psock_skb_ingress_enqueue(skb, psock, sk, msg); +} + +/* Puts an skb on the ingress queue of the socket already assigned to the + * skb. In this case we do not need to check memory limits or skb_set_owner_r + * because the skb is already accounted for here. + */ +static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb) +{ + struct sk_msg *msg = kzalloc(sizeof(*msg), __GFP_NOWARN | GFP_ATOMIC); + struct sock *sk = psock->sk; + + if (unlikely(!msg)) + return -EAGAIN; + sk_msg_init(msg); + return sk_psock_skb_ingress_enqueue(skb, psock, sk, msg); +} + static int sk_psock_handle_skb(struct sk_psock *psock, struct sk_buff *skb, u32 off, u32 len, bool ingress) { @@ -789,7 +850,7 @@ static void sk_psock_verdict_apply(struct sk_psock *psock, * retrying later from workqueue. */ if (skb_queue_empty(&psock->ingress_skb)) { - err = sk_psock_skb_ingress(psock, skb); + err = sk_psock_skb_ingress_self(psock, skb); } if (err < 0) { skb_queue_tail(&psock->ingress_skb, skb); diff --git a/net/core/xdp.c b/net/core/xdp.c index 48aba933a5a8..d900cebc0acd 100644 --- a/net/core/xdp.c +++ b/net/core/xdp.c @@ -335,11 +335,10 @@ EXPORT_SYMBOL_GPL(xdp_rxq_info_reg_mem_model); * scenarios (e.g. queue full), it is possible to return the xdp_frame * while still leveraging this protection. The @napi_direct boolean * is used for those calls sites. Thus, allowing for faster recycling - * of xdp_frames/pages in those cases. This path is never used by the - * MEM_TYPE_XSK_BUFF_POOL memory type, so it's explicitly not part of - * the switch-statement. + * of xdp_frames/pages in those cases. */ -static void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct) +static void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct, + struct xdp_buff *xdp) { struct xdp_mem_allocator *xa; struct page *page; @@ -361,6 +360,10 @@ static void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct) page = virt_to_page(data); /* Assumes order0 page*/ put_page(page); break; + case MEM_TYPE_XSK_BUFF_POOL: + /* NB! Only valid from an xdp_buff! */ + xsk_buff_free(xdp); + break; default: /* Not possible, checked in xdp_rxq_info_reg_mem_model() */ WARN(1, "Incorrect XDP memory type (%d) usage", mem->type); @@ -370,19 +373,19 @@ static void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct) void xdp_return_frame(struct xdp_frame *xdpf) { - __xdp_return(xdpf->data, &xdpf->mem, false); + __xdp_return(xdpf->data, &xdpf->mem, false, NULL); } EXPORT_SYMBOL_GPL(xdp_return_frame); void xdp_return_frame_rx_napi(struct xdp_frame *xdpf) { - __xdp_return(xdpf->data, &xdpf->mem, true); + __xdp_return(xdpf->data, &xdpf->mem, true, NULL); } EXPORT_SYMBOL_GPL(xdp_return_frame_rx_napi); void xdp_return_buff(struct xdp_buff *xdp) { - __xdp_return(xdp->data, &xdp->rxq->mem, true); + __xdp_return(xdp->data, &xdp->rxq->mem, true, xdp); } /* Only called for MEM_TYPE_PAGE_POOL see xdp.h */ @@ -400,18 +403,6 @@ void __xdp_release_frame(void *data, struct xdp_mem_info *mem) } EXPORT_SYMBOL_GPL(__xdp_release_frame); -bool xdp_attachment_flags_ok(struct xdp_attachment_info *info, - struct netdev_bpf *bpf) -{ - if (info->prog && (bpf->flags ^ info->flags) & XDP_FLAGS_MODES) { - NL_SET_ERR_MSG(bpf->extack, - "program loaded with different flags"); - return false; - } - return true; -} -EXPORT_SYMBOL_GPL(xdp_attachment_flags_ok); - void xdp_attachment_setup(struct xdp_attachment_info *info, struct netdev_bpf *bpf) { diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index bb3d70664dde..b0b6e6a4784e 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -427,7 +427,7 @@ struct sock *dccp_v4_request_recv_sock(const struct sock *sk, if (__inet_inherit_port(sk, newsk) < 0) goto put_and_exit; - *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash)); + *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash), NULL); if (*own_req) ireq->ireq_opt = NULL; else diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index ef4ab28cfde0..78ee1b5acf1f 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -533,7 +533,7 @@ static struct sock *dccp_v6_request_recv_sock(const struct sock *sk, dccp_done(newsk); goto out; } - *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash)); + *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash), NULL); /* Clone pktoptions received with SYN, if we own the req */ if (*own_req && ireq->pktopts) { newnp->pktoptions = skb_clone(ireq->pktopts, GFP_ATOMIC); diff --git a/net/ethtool/bitset.c b/net/ethtool/bitset.c index 1fb3603d92ad..0515d6604b3b 100644 --- a/net/ethtool/bitset.c +++ b/net/ethtool/bitset.c @@ -628,6 +628,8 @@ int ethnl_parse_bitset(unsigned long *val, unsigned long *mask, return ret; change_bits = nla_get_u32(tb[ETHTOOL_A_BITSET_SIZE]); + if (change_bits > nbits) + change_bits = nbits; bitmap_from_arr32(val, nla_data(tb[ETHTOOL_A_BITSET_VALUE]), change_bits); if (change_bits < nbits) diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 687971d83b4e..922dd73e5740 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -125,6 +125,7 @@ static int arp_constructor(struct neighbour *neigh); static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb); static void arp_error_report(struct neighbour *neigh, struct sk_buff *skb); static void parp_redo(struct sk_buff *skb); +static int arp_is_multicast(const void *pkey); static const struct neigh_ops arp_generic_ops = { .family = AF_INET, @@ -156,6 +157,7 @@ struct neigh_table arp_tbl = { .key_eq = arp_key_eq, .constructor = arp_constructor, .proxy_redo = parp_redo, + .is_multicast = arp_is_multicast, .id = "arp_cache", .parms = { .tbl = &arp_tbl, @@ -928,6 +930,10 @@ static void parp_redo(struct sk_buff *skb) arp_process(dev_net(skb->dev), NULL, skb); } +static int arp_is_multicast(const void *pkey) +{ + return ipv4_is_multicast(*((__be32 *)pkey)); +} /* * Receive an arp request from the device layer. diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 86a23e4a6a50..cdf6ec5aa45d 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -696,7 +696,7 @@ int fib_gw_from_via(struct fib_config *cfg, struct nlattr *nla, cfg->fc_gw4 = *((__be32 *)via->rtvia_addr); break; case AF_INET6: -#ifdef CONFIG_IPV6 +#if IS_ENABLED(CONFIG_IPV6) if (alen != sizeof(struct in6_addr)) { NL_SET_ERR_MSG(extack, "Invalid IPv6 address in RTA_VIA"); return -EINVAL; @@ -825,7 +825,7 @@ static int rtm_to_fib_config(struct net *net, struct sk_buff *skb, if (has_gw && has_via) { NL_SET_ERR_MSG(extack, "Nexthop configuration can not contain both GATEWAY and VIA"); - goto errout; + return -EINVAL; } return 0; diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 4148f5f78f31..f60869acbef0 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -787,7 +787,7 @@ static void reqsk_queue_hash_req(struct request_sock *req, timer_setup(&req->rsk_timer, reqsk_timer_handler, TIMER_PINNED); mod_timer(&req->rsk_timer, jiffies + timeout); - inet_ehash_insert(req_to_sk(req), NULL); + inet_ehash_insert(req_to_sk(req), NULL, NULL); /* before letting lookups find us, make sure all req fields * are committed to memory and refcnt initialized. */ diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 366a4507b5a3..93474b1bea4e 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -479,8 +479,10 @@ static int inet_req_diag_fill(struct sock *sk, struct sk_buff *skb, r->idiag_inode = 0; if (net_admin && nla_put_u32(skb, INET_DIAG_MARK, - inet_rsk(reqsk)->ir_mark)) + inet_rsk(reqsk)->ir_mark)) { + nlmsg_cancel(skb, nlh); return -EMSGSIZE; + } nlmsg_end(skb, nlh); return 0; diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 8cbe74313f38..45fb450b4522 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -20,6 +20,9 @@ #include <net/addrconf.h> #include <net/inet_connection_sock.h> #include <net/inet_hashtables.h> +#if IS_ENABLED(CONFIG_IPV6) +#include <net/inet6_hashtables.h> +#endif #include <net/secure_seq.h> #include <net/ip.h> #include <net/tcp.h> @@ -508,10 +511,52 @@ static u32 inet_sk_port_offset(const struct sock *sk) inet->inet_dport); } -/* insert a socket into ehash, and eventually remove another one - * (The another one can be a SYN_RECV or TIMEWAIT +/* Searches for an exsiting socket in the ehash bucket list. + * Returns true if found, false otherwise. */ -bool inet_ehash_insert(struct sock *sk, struct sock *osk) +static bool inet_ehash_lookup_by_sk(struct sock *sk, + struct hlist_nulls_head *list) +{ + const __portpair ports = INET_COMBINED_PORTS(sk->sk_dport, sk->sk_num); + const int sdif = sk->sk_bound_dev_if; + const int dif = sk->sk_bound_dev_if; + const struct hlist_nulls_node *node; + struct net *net = sock_net(sk); + struct sock *esk; + + INET_ADDR_COOKIE(acookie, sk->sk_daddr, sk->sk_rcv_saddr); + + sk_nulls_for_each_rcu(esk, node, list) { + if (esk->sk_hash != sk->sk_hash) + continue; + if (sk->sk_family == AF_INET) { + if (unlikely(INET_MATCH(esk, net, acookie, + sk->sk_daddr, + sk->sk_rcv_saddr, + ports, dif, sdif))) { + return true; + } + } +#if IS_ENABLED(CONFIG_IPV6) + else if (sk->sk_family == AF_INET6) { + if (unlikely(INET6_MATCH(esk, net, + &sk->sk_v6_daddr, + &sk->sk_v6_rcv_saddr, + ports, dif, sdif))) { + return true; + } + } +#endif + } + return false; +} + +/* Insert a socket into ehash, and eventually remove another one + * (The another one can be a SYN_RECV or TIMEWAIT) + * If an existing socket already exists, socket sk is not inserted, + * and sets found_dup_sk parameter to true. + */ +bool inet_ehash_insert(struct sock *sk, struct sock *osk, bool *found_dup_sk) { struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; struct hlist_nulls_head *list; @@ -530,16 +575,23 @@ bool inet_ehash_insert(struct sock *sk, struct sock *osk) if (osk) { WARN_ON_ONCE(sk->sk_hash != osk->sk_hash); ret = sk_nulls_del_node_init_rcu(osk); + } else if (found_dup_sk) { + *found_dup_sk = inet_ehash_lookup_by_sk(sk, list); + if (*found_dup_sk) + ret = false; } + if (ret) __sk_nulls_add_node_rcu(sk, list); + spin_unlock(lock); + return ret; } -bool inet_ehash_nolisten(struct sock *sk, struct sock *osk) +bool inet_ehash_nolisten(struct sock *sk, struct sock *osk, bool *found_dup_sk) { - bool ok = inet_ehash_insert(sk, osk); + bool ok = inet_ehash_insert(sk, osk, found_dup_sk); if (ok) { sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); @@ -583,7 +635,7 @@ int __inet_hash(struct sock *sk, struct sock *osk) int err = 0; if (sk->sk_state != TCP_LISTEN) { - inet_ehash_nolisten(sk, osk); + inet_ehash_nolisten(sk, osk, NULL); return 0; } WARN_ON(!sk_unhashed(sk)); @@ -679,7 +731,7 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, tb = inet_csk(sk)->icsk_bind_hash; spin_lock_bh(&head->lock); if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) { - inet_ehash_nolisten(sk, NULL); + inet_ehash_nolisten(sk, NULL, NULL); spin_unlock_bh(&head->lock); return 0; } @@ -758,7 +810,7 @@ ok: inet_bind_hash(sk, tb, port); if (sk_unhashed(sk)) { inet_sk(sk)->inet_sport = htons(port); - inet_ehash_nolisten(sk, (struct sock *)tw); + inet_ehash_nolisten(sk, (struct sock *)tw, NULL); } if (tw) inet_twsk_bind_unhash(tw, hinfo); diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index d1e04d2b5170..563b62b76a5f 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -203,7 +203,7 @@ unsigned int arpt_do_table(struct sk_buff *skb, local_bh_disable(); addend = xt_write_recseq_begin(); - private = READ_ONCE(table->private); /* Address dependency. */ + private = rcu_access_pointer(table->private); cpu = smp_processor_id(); table_base = private->entries; jumpstack = (struct arpt_entry **)private->jumpstack[cpu]; @@ -649,7 +649,7 @@ static struct xt_counters *alloc_counters(const struct xt_table *table) { unsigned int countersize; struct xt_counters *counters; - const struct xt_table_info *private = table->private; + const struct xt_table_info *private = xt_table_get_private_protected(table); /* We need atomic snapshot of counters: rest doesn't change * (other than comefrom, which userspace doesn't care @@ -673,7 +673,7 @@ static int copy_entries_to_user(unsigned int total_size, unsigned int off, num; const struct arpt_entry *e; struct xt_counters *counters; - struct xt_table_info *private = table->private; + struct xt_table_info *private = xt_table_get_private_protected(table); int ret = 0; void *loc_cpu_entry; @@ -807,7 +807,7 @@ static int get_info(struct net *net, void __user *user, const int *len) t = xt_request_find_table_lock(net, NFPROTO_ARP, name); if (!IS_ERR(t)) { struct arpt_getinfo info; - const struct xt_table_info *private = t->private; + const struct xt_table_info *private = xt_table_get_private_protected(t); #ifdef CONFIG_COMPAT struct xt_table_info tmp; @@ -860,7 +860,7 @@ static int get_entries(struct net *net, struct arpt_get_entries __user *uptr, t = xt_find_table_lock(net, NFPROTO_ARP, get.name); if (!IS_ERR(t)) { - const struct xt_table_info *private = t->private; + const struct xt_table_info *private = xt_table_get_private_protected(t); if (get.size == private->size) ret = copy_entries_to_user(private->size, @@ -1017,7 +1017,7 @@ static int do_add_counters(struct net *net, sockptr_t arg, unsigned int len) } local_bh_disable(); - private = t->private; + private = xt_table_get_private_protected(t); if (private->number != tmp.num_counters) { ret = -EINVAL; goto unlock_up_free; @@ -1330,7 +1330,7 @@ static int compat_copy_entries_to_user(unsigned int total_size, void __user *userptr) { struct xt_counters *counters; - const struct xt_table_info *private = table->private; + const struct xt_table_info *private = xt_table_get_private_protected(table); void __user *pos; unsigned int size; int ret = 0; diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index f15bc21d7301..6e2851f8d3a3 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -258,7 +258,7 @@ ipt_do_table(struct sk_buff *skb, WARN_ON(!(table->valid_hooks & (1 << hook))); local_bh_disable(); addend = xt_write_recseq_begin(); - private = READ_ONCE(table->private); /* Address dependency. */ + private = rcu_access_pointer(table->private); cpu = smp_processor_id(); table_base = private->entries; jumpstack = (struct ipt_entry **)private->jumpstack[cpu]; @@ -791,7 +791,7 @@ static struct xt_counters *alloc_counters(const struct xt_table *table) { unsigned int countersize; struct xt_counters *counters; - const struct xt_table_info *private = table->private; + const struct xt_table_info *private = xt_table_get_private_protected(table); /* We need atomic snapshot of counters: rest doesn't change (other than comefrom, which userspace doesn't care @@ -815,7 +815,7 @@ copy_entries_to_user(unsigned int total_size, unsigned int off, num; const struct ipt_entry *e; struct xt_counters *counters; - const struct xt_table_info *private = table->private; + const struct xt_table_info *private = xt_table_get_private_protected(table); int ret = 0; const void *loc_cpu_entry; @@ -964,7 +964,7 @@ static int get_info(struct net *net, void __user *user, const int *len) t = xt_request_find_table_lock(net, AF_INET, name); if (!IS_ERR(t)) { struct ipt_getinfo info; - const struct xt_table_info *private = t->private; + const struct xt_table_info *private = xt_table_get_private_protected(t); #ifdef CONFIG_COMPAT struct xt_table_info tmp; @@ -1018,7 +1018,7 @@ get_entries(struct net *net, struct ipt_get_entries __user *uptr, t = xt_find_table_lock(net, AF_INET, get.name); if (!IS_ERR(t)) { - const struct xt_table_info *private = t->private; + const struct xt_table_info *private = xt_table_get_private_protected(t); if (get.size == private->size) ret = copy_entries_to_user(private->size, t, uptr->entrytable); @@ -1173,7 +1173,7 @@ do_add_counters(struct net *net, sockptr_t arg, unsigned int len) } local_bh_disable(); - private = t->private; + private = xt_table_get_private_protected(t); if (private->number != tmp.num_counters) { ret = -EINVAL; goto unlock_up_free; @@ -1543,7 +1543,7 @@ compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table, void __user *userptr) { struct xt_counters *counters; - const struct xt_table_info *private = table->private; + const struct xt_table_info *private = xt_table_get_private_protected(table); void __user *pos; unsigned int size; int ret = 0; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index dc2a399cd9f4..9f43abeac3a8 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -3222,7 +3222,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, fl4.daddr = dst; fl4.saddr = src; - fl4.flowi4_tos = rtm->rtm_tos; + fl4.flowi4_tos = rtm->rtm_tos & IPTOS_RT_MASK; fl4.flowi4_oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0; fl4.flowi4_mark = mark; fl4.flowi4_uid = uid; @@ -3246,8 +3246,9 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, fl4.flowi4_iif = iif; /* for rt_fill_info */ skb->dev = dev; skb->mark = mark; - err = ip_route_input_rcu(skb, dst, src, rtm->rtm_tos, - dev, &res); + err = ip_route_input_rcu(skb, dst, src, + rtm->rtm_tos & IPTOS_RT_MASK, dev, + &res); rt = skb_rtable(skb); if (err == 0 && rt->dst.error) diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c index 6c4d79baff26..6ea3dc2e4219 100644 --- a/net/ipv4/tcp_bbr.c +++ b/net/ipv4/tcp_bbr.c @@ -945,7 +945,7 @@ static void bbr_update_min_rtt(struct sock *sk, const struct rate_sample *rs) filter_expired = after(tcp_jiffies32, bbr->min_rtt_stamp + bbr_min_rtt_win_sec * HZ); if (rs->rtt_us >= 0 && - (rs->rtt_us <= bbr->min_rtt_us || + (rs->rtt_us < bbr->min_rtt_us || (filter_expired && !rs->is_ack_delayed))) { bbr->min_rtt_us = rs->rtt_us; bbr->min_rtt_stamp = tcp_jiffies32; diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c index 37f4cb2bba5c..bc7d2a586e18 100644 --- a/net/ipv4/tcp_bpf.c +++ b/net/ipv4/tcp_bpf.c @@ -15,8 +15,8 @@ int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock, { struct iov_iter *iter = &msg->msg_iter; int peek = flags & MSG_PEEK; - int i, ret, copied = 0; struct sk_msg *msg_rx; + int i, copied = 0; msg_rx = list_first_entry_or_null(&psock->ingress_msg, struct sk_msg, list); @@ -37,17 +37,16 @@ int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock, page = sg_page(sge); if (copied + copy > len) copy = len - copied; - ret = copy_page_to_iter(page, sge->offset, copy, iter); - if (ret != copy) { - msg_rx->sg.start = i; - return -EFAULT; - } + copy = copy_page_to_iter(page, sge->offset, copy, iter); + if (!copy) + return copied ? copied : -EFAULT; copied += copy; if (likely(!peek)) { sge->offset += copy; sge->length -= copy; - sk_mem_uncharge(sk, copy); + if (!msg_rx->skb) + sk_mem_uncharge(sk, copy); msg_rx->sg.size -= copy; if (!sge->length) { @@ -56,6 +55,11 @@ int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock, put_page(page); } } else { + /* Lets not optimize peek case if copy_page_to_iter + * didn't copy the entire length lets just break. + */ + if (copy != sge->length) + return copied; sk_msg_iter_var_next(i); } diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index db47ac24d057..563d016e7478 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -198,6 +198,11 @@ static void tcp_reinit_congestion_control(struct sock *sk, icsk->icsk_ca_setsockopt = 1; memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv)); + if (ca->flags & TCP_CONG_NEEDS_ECN) + INET_ECN_xmit(sk); + else + INET_ECN_dontxmit(sk); + if (!((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) tcp_init_congestion_control(sk); } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 389d1b340248..ef4bdb038a4b 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -510,7 +510,6 @@ static void tcp_init_buffer_space(struct sock *sk) if (!(sk->sk_userlocks & SOCK_SNDBUF_LOCK)) tcp_sndbuf_expand(sk); - tp->rcvq_space.space = min_t(u32, tp->rcv_wnd, TCP_INIT_CWND * tp->advmss); tcp_mstamp_refresh(tp); tp->rcvq_space.time = tp->tcp_mstamp; tp->rcvq_space.seq = tp->copied_seq; @@ -534,6 +533,8 @@ static void tcp_init_buffer_space(struct sock *sk) tp->rcv_ssthresh = min(tp->rcv_ssthresh, tp->window_clamp); tp->snd_cwnd_stamp = tcp_jiffies32; + tp->rcvq_space.space = min3(tp->rcv_ssthresh, tp->rcv_wnd, + (u32)TCP_INIT_CWND * tp->advmss); } /* 4. Recalculate window clamp after socket hit its memory bounds. */ diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 7352c097ae48..595dcc3afac5 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -980,17 +980,23 @@ static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst, skb = tcp_make_synack(sk, dst, req, foc, synack_type, syn_skb); - tos = sock_net(sk)->ipv4.sysctl_tcp_reflect_tos ? - tcp_rsk(req)->syn_tos : inet_sk(sk)->tos; - if (skb) { __tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr); + tos = sock_net(sk)->ipv4.sysctl_tcp_reflect_tos ? + (tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) | + (inet_sk(sk)->tos & INET_ECN_MASK) : + inet_sk(sk)->tos; + + if (!INET_ECN_is_capable(tos) && + tcp_bpf_ca_needs_ecn((struct sock *)req)) + tos |= INET_ECN_ECT_0; + rcu_read_lock(); err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr, ireq->ir_rmt_addr, rcu_dereference(ireq->ireq_opt), - tos & ~INET_ECN_MASK); + tos); rcu_read_unlock(); err = net_xmit_eval(err); } @@ -1498,6 +1504,7 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, bool *own_req) { struct inet_request_sock *ireq; + bool found_dup_sk = false; struct inet_sock *newinet; struct tcp_sock *newtp; struct sock *newsk; @@ -1535,7 +1542,9 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen; newinet->inet_id = prandom_u32(); - /* Set ToS of the new socket based upon the value of incoming SYN. */ + /* Set ToS of the new socket based upon the value of incoming SYN. + * ECT bits are set later in tcp_init_transfer(). + */ if (sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) newinet->tos = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK; @@ -1575,12 +1584,22 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, if (__inet_inherit_port(sk, newsk) < 0) goto put_and_exit; - *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash)); + *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash), + &found_dup_sk); if (likely(*own_req)) { tcp_move_syn(newtp, req); ireq->ireq_opt = NULL; } else { - newinet->inet_opt = NULL; + if (!req_unhash && found_dup_sk) { + /* This code path should only be executed in the + * syncookie case only + */ + bh_unlock_sock(newsk); + sock_put(newsk); + newsk = NULL; + } else { + newinet->inet_opt = NULL; + } } return newsk; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index bf48cd73e967..99011768c264 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1880,7 +1880,8 @@ static void tcp_cwnd_validate(struct sock *sk, bool is_cwnd_limited) * window, and remember whether we were cwnd-limited then. */ if (!before(tp->snd_una, tp->max_packets_seq) || - tp->packets_out > tp->max_packets_out) { + tp->packets_out > tp->max_packets_out || + is_cwnd_limited) { tp->max_packets_out = tp->packets_out; tp->max_packets_seq = tp->snd_nxt; tp->is_cwnd_limited = is_cwnd_limited; @@ -2702,6 +2703,10 @@ repair: else tcp_chrono_stop(sk, TCP_CHRONO_RWND_LIMITED); + is_cwnd_limited |= (tcp_packets_in_flight(tp) >= tp->snd_cwnd); + if (likely(sent_pkts || is_cwnd_limited)) + tcp_cwnd_validate(sk, is_cwnd_limited); + if (likely(sent_pkts)) { if (tcp_in_cwnd_reduction(sk)) tp->prr_out += sent_pkts; @@ -2709,8 +2714,6 @@ repair: /* Send one loss probe per tail loss episode. */ if (push_one != 2) tcp_schedule_loss_probe(sk, false); - is_cwnd_limited |= (tcp_packets_in_flight(tp) >= tp->snd_cwnd); - tcp_cwnd_validate(sk, is_cwnd_limited); return false; } return !tp->packets_out && !tcp_write_queue_empty(sk); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 09f0a23d1a01..9eeebd4a0054 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2173,7 +2173,7 @@ static int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) __skb_pull(skb, skb_transport_offset(skb)); ret = udp_queue_rcv_one_skb(sk, skb); if (ret > 0) - ip_protocol_deliver_rcu(dev_net(skb->dev), skb, -ret); + ip_protocol_deliver_rcu(dev_net(skb->dev), skb, ret); } return 0; } diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 01146b66d666..8b6eb384bac7 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -5022,8 +5022,10 @@ static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca, return -EMSGSIZE; if (args->netnsid >= 0 && - nla_put_s32(skb, IFA_TARGET_NETNSID, args->netnsid)) + nla_put_s32(skb, IFA_TARGET_NETNSID, args->netnsid)) { + nlmsg_cancel(skb, nlh); return -EMSGSIZE; + } put_ifaddrmsg(nlh, 128, IFA_F_PERMANENT, scope, ifindex); if (nla_put_in6_addr(skb, IFA_MULTICAST, &ifmca->mca_addr) < 0 || @@ -5054,8 +5056,10 @@ static int inet6_fill_ifacaddr(struct sk_buff *skb, struct ifacaddr6 *ifaca, return -EMSGSIZE; if (args->netnsid >= 0 && - nla_put_s32(skb, IFA_TARGET_NETNSID, args->netnsid)) + nla_put_s32(skb, IFA_TARGET_NETNSID, args->netnsid)) { + nlmsg_cancel(skb, nlh); return -EMSGSIZE; + } put_ifaddrmsg(nlh, 128, IFA_F_PERMANENT, scope, ifindex); if (nla_put_in6_addr(skb, IFA_ANYCAST, &ifaca->aca_addr) < 0 || diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c index 642fc6ac13d2..8a22486cf270 100644 --- a/net/ipv6/addrlabel.c +++ b/net/ipv6/addrlabel.c @@ -306,7 +306,9 @@ static int ip6addrlbl_del(struct net *net, /* add default label */ static int __net_init ip6addrlbl_net_init(struct net *net) { - int err = 0; + struct ip6addrlbl_entry *p = NULL; + struct hlist_node *n; + int err; int i; ADDRLABEL(KERN_DEBUG "%s\n", __func__); @@ -315,14 +317,20 @@ static int __net_init ip6addrlbl_net_init(struct net *net) INIT_HLIST_HEAD(&net->ipv6.ip6addrlbl_table.head); for (i = 0; i < ARRAY_SIZE(ip6addrlbl_init_table); i++) { - int ret = ip6addrlbl_add(net, - ip6addrlbl_init_table[i].prefix, - ip6addrlbl_init_table[i].prefixlen, - 0, - ip6addrlbl_init_table[i].label, 0); - /* XXX: should we free all rules when we catch an error? */ - if (ret && (!err || err != -ENOMEM)) - err = ret; + err = ip6addrlbl_add(net, + ip6addrlbl_init_table[i].prefix, + ip6addrlbl_init_table[i].prefixlen, + 0, + ip6addrlbl_init_table[i].label, 0); + if (err) + goto err_ip6addrlbl_add; + } + return 0; + +err_ip6addrlbl_add: + hlist_for_each_entry_safe(p, n, &net->ipv6.ip6addrlbl_table.head, list) { + hlist_del_rcu(&p->list); + kfree_rcu(p, rcu); } return err; } diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index d88d97617f7e..440080da805b 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -588,7 +588,8 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb) memcpy(auth_data, ah->auth_data, ahp->icv_trunc_len); memset(ah->auth_data, 0, ahp->icv_trunc_len); - if (ipv6_clear_mutable_options(ip6h, hdr_len, XFRM_POLICY_IN)) + err = ipv6_clear_mutable_options(ip6h, hdr_len, XFRM_POLICY_IN); + if (err) goto out_free; ip6h->priority = 0; diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 931b186d2e48..cf6e1380b527 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -1133,8 +1133,13 @@ static void ip6gre_tnl_link_config_route(struct ip6_tnl *t, int set_mtu, return; if (rt->dst.dev) { - dev->needed_headroom = rt->dst.dev->hard_header_len + - t_hlen; + unsigned short dst_len = rt->dst.dev->hard_header_len + + t_hlen; + + if (t->dev->header_ops) + dev->hard_header_len = dst_len; + else + dev->needed_headroom = dst_len; if (set_mtu) { dev->mtu = rt->dst.dev->mtu - t_hlen; @@ -1159,7 +1164,12 @@ static int ip6gre_calc_hlen(struct ip6_tnl *tunnel) tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen; t_hlen = tunnel->hlen + sizeof(struct ipv6hdr); - tunnel->dev->needed_headroom = LL_MAX_HEADER + t_hlen; + + if (tunnel->dev->header_ops) + tunnel->dev->hard_header_len = LL_MAX_HEADER + t_hlen; + else + tunnel->dev->needed_headroom = LL_MAX_HEADER + t_hlen; + return t_hlen; } diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 27f29b957ee7..76717478f173 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -81,6 +81,7 @@ static void ndisc_error_report(struct neighbour *neigh, struct sk_buff *skb); static int pndisc_constructor(struct pneigh_entry *n); static void pndisc_destructor(struct pneigh_entry *n); static void pndisc_redo(struct sk_buff *skb); +static int ndisc_is_multicast(const void *pkey); static const struct neigh_ops ndisc_generic_ops = { .family = AF_INET6, @@ -115,6 +116,7 @@ struct neigh_table nd_tbl = { .pconstructor = pndisc_constructor, .pdestructor = pndisc_destructor, .proxy_redo = pndisc_redo, + .is_multicast = ndisc_is_multicast, .allow_add = ndisc_allow_add, .id = "ndisc_cache", .parms = { @@ -1706,6 +1708,11 @@ static void pndisc_redo(struct sk_buff *skb) kfree_skb(skb); } +static int ndisc_is_multicast(const void *pkey) +{ + return ipv6_addr_is_multicast((struct in6_addr *)pkey); +} + static bool ndisc_suppress_frag_ndisc(struct sk_buff *skb) { struct inet6_dev *idev = __in6_dev_get(skb->dev); diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 2e2119bfcf13..c4f532f4d311 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -280,7 +280,7 @@ ip6t_do_table(struct sk_buff *skb, local_bh_disable(); addend = xt_write_recseq_begin(); - private = READ_ONCE(table->private); /* Address dependency. */ + private = rcu_access_pointer(table->private); cpu = smp_processor_id(); table_base = private->entries; jumpstack = (struct ip6t_entry **)private->jumpstack[cpu]; @@ -807,7 +807,7 @@ static struct xt_counters *alloc_counters(const struct xt_table *table) { unsigned int countersize; struct xt_counters *counters; - const struct xt_table_info *private = table->private; + const struct xt_table_info *private = xt_table_get_private_protected(table); /* We need atomic snapshot of counters: rest doesn't change (other than comefrom, which userspace doesn't care @@ -831,7 +831,7 @@ copy_entries_to_user(unsigned int total_size, unsigned int off, num; const struct ip6t_entry *e; struct xt_counters *counters; - const struct xt_table_info *private = table->private; + const struct xt_table_info *private = xt_table_get_private_protected(table); int ret = 0; const void *loc_cpu_entry; @@ -980,7 +980,7 @@ static int get_info(struct net *net, void __user *user, const int *len) t = xt_request_find_table_lock(net, AF_INET6, name); if (!IS_ERR(t)) { struct ip6t_getinfo info; - const struct xt_table_info *private = t->private; + const struct xt_table_info *private = xt_table_get_private_protected(t); #ifdef CONFIG_COMPAT struct xt_table_info tmp; @@ -1035,7 +1035,7 @@ get_entries(struct net *net, struct ip6t_get_entries __user *uptr, t = xt_find_table_lock(net, AF_INET6, get.name); if (!IS_ERR(t)) { - struct xt_table_info *private = t->private; + struct xt_table_info *private = xt_table_get_private_protected(t); if (get.size == private->size) ret = copy_entries_to_user(private->size, t, uptr->entrytable); @@ -1189,7 +1189,7 @@ do_add_counters(struct net *net, sockptr_t arg, unsigned int len) } local_bh_disable(); - private = t->private; + private = xt_table_get_private_protected(t); if (private->number != tmp.num_counters) { ret = -EINVAL; goto unlock_up_free; @@ -1552,7 +1552,7 @@ compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table, void __user *userptr) { struct xt_counters *counters; - const struct xt_table_info *private = table->private; + const struct xt_table_info *private = xt_table_get_private_protected(table); void __user *pos; unsigned int size; int ret = 0; diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 054d287eb13d..c129ad334eb3 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -440,6 +440,7 @@ find_prev_fhdr(struct sk_buff *skb, u8 *prevhdrp, int *prevhoff, int *fhoff) int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user) { u16 savethdr = skb->transport_header; + u8 nexthdr = NEXTHDR_FRAGMENT; int fhoff, nhoff, ret; struct frag_hdr *fhdr; struct frag_queue *fq; @@ -455,6 +456,14 @@ int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user) if (find_prev_fhdr(skb, &prevhdr, &nhoff, &fhoff) < 0) return 0; + /* Discard the first fragment if it does not include all headers + * RFC 8200, Section 4.5 + */ + if (ipv6frag_thdr_truncated(skb, fhoff, &nexthdr)) { + pr_debug("Drop incomplete fragment\n"); + return 0; + } + if (!pskb_may_pull(skb, fhoff + sizeof(*fhdr))) return -ENOMEM; diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index c8cf1bbad74a..47a0dc46cbdb 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -324,9 +324,8 @@ static int ipv6_frag_rcv(struct sk_buff *skb) struct frag_queue *fq; const struct ipv6hdr *hdr = ipv6_hdr(skb); struct net *net = dev_net(skb_dst(skb)->dev); - __be16 frag_off; - int iif, offset; u8 nexthdr; + int iif; if (IP6CB(skb)->flags & IP6SKB_FRAGMENTED) goto fail_hdr; @@ -362,24 +361,11 @@ static int ipv6_frag_rcv(struct sk_buff *skb) * the source of the fragment, with the Pointer field set to zero. */ nexthdr = hdr->nexthdr; - offset = ipv6_skip_exthdr(skb, skb_transport_offset(skb), &nexthdr, &frag_off); - if (offset >= 0) { - /* Check some common protocols' header */ - if (nexthdr == IPPROTO_TCP) - offset += sizeof(struct tcphdr); - else if (nexthdr == IPPROTO_UDP) - offset += sizeof(struct udphdr); - else if (nexthdr == IPPROTO_ICMPV6) - offset += sizeof(struct icmp6hdr); - else - offset += 1; - - if (!(frag_off & htons(IP6_OFFSET)) && offset > skb->len) { - __IP6_INC_STATS(net, __in6_dev_get_safely(skb->dev), - IPSTATS_MIB_INHDRERRORS); - icmpv6_param_prob(skb, ICMPV6_HDR_INCOMP, 0); - return -1; - } + if (ipv6frag_thdr_truncated(skb, skb_transport_offset(skb), &nexthdr)) { + __IP6_INC_STATS(net, __in6_dev_get_safely(skb->dev), + IPSTATS_MIB_INHDRERRORS); + icmpv6_param_prob(skb, ICMPV6_HDR_INCOMP, 0); + return -1; } iif = skb->dev ? skb->dev->ifindex : 0; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 8db59f4e5f13..991dc36f95ff 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -527,15 +527,21 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst, if (np->repflow && ireq->pktopts) fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts)); + tclass = sock_net(sk)->ipv4.sysctl_tcp_reflect_tos ? + (tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) | + (np->tclass & INET_ECN_MASK) : + np->tclass; + + if (!INET_ECN_is_capable(tclass) && + tcp_bpf_ca_needs_ecn((struct sock *)req)) + tclass |= INET_ECN_ECT_0; + rcu_read_lock(); opt = ireq->ipv6_opt; - tclass = sock_net(sk)->ipv4.sysctl_tcp_reflect_tos ? - tcp_rsk(req)->syn_tos : np->tclass; if (!opt) opt = rcu_dereference(np->opt); err = ip6_xmit(sk, skb, fl6, sk->sk_mark, opt, - tclass & ~INET_ECN_MASK, - sk->sk_priority); + tclass, sk->sk_priority); rcu_read_unlock(); err = net_xmit_eval(err); } @@ -1193,6 +1199,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * const struct ipv6_pinfo *np = tcp_inet6_sk(sk); struct ipv6_txoptions *opt; struct inet_sock *newinet; + bool found_dup_sk = false; struct tcp_sock *newtp; struct sock *newsk; #ifdef CONFIG_TCP_MD5SIG @@ -1314,7 +1321,9 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * if (np->repflow) newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb)); - /* Set ToS of the new socket based upon the value of incoming SYN. */ + /* Set ToS of the new socket based upon the value of incoming SYN. + * ECT bits are set later in tcp_init_transfer(). + */ if (sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) newnp->tclass = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK; @@ -1368,7 +1377,8 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * tcp_done(newsk); goto out; } - *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash)); + *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash), + &found_dup_sk); if (*own_req) { tcp_move_syn(newtp, req); @@ -1383,6 +1393,15 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * skb_set_owner_r(newnp->pktoptions, newsk); } } + } else { + if (!req_unhash && found_dup_sk) { + /* This code path should only be executed in the + * syncookie case only + */ + bh_unlock_sock(newsk); + sock_put(newsk); + newsk = NULL; + } } return newsk; diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index 047238f01ba6..db7d888914fa 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -1645,7 +1645,7 @@ static int iucv_callback_connreq(struct iucv_path *path, } /* Create the new socket */ - nsk = iucv_sock_alloc(NULL, sk->sk_type, GFP_ATOMIC, 0); + nsk = iucv_sock_alloc(NULL, sk->sk_protocol, GFP_ATOMIC, 0); if (!nsk) { err = pr_iucv->path_sever(path, user_data); iucv_path_free(path); @@ -1851,7 +1851,7 @@ static int afiucv_hs_callback_syn(struct sock *sk, struct sk_buff *skb) goto out; } - nsk = iucv_sock_alloc(NULL, sk->sk_type, GFP_ATOMIC, 0); + nsk = iucv_sock_alloc(NULL, sk->sk_protocol, GFP_ATOMIC, 0); bh_lock_sock(sk); if ((sk->sk_state != IUCV_LISTEN) || sk_acceptq_is_full(sk) || diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 1be775979132..44154cc596cd 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -948,6 +948,8 @@ int ieee80211_add_virtual_monitor(struct ieee80211_local *local) return ret; } + set_bit(SDATA_STATE_RUNNING, &sdata->state); + ret = ieee80211_check_queues(sdata, NL80211_IFTYPE_MONITOR); if (ret) { kfree(sdata); diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c index 48f31ac9233c..620ecf922408 100644 --- a/net/mac80211/mesh_pathtbl.c +++ b/net/mac80211/mesh_pathtbl.c @@ -60,6 +60,7 @@ static struct mesh_table *mesh_table_alloc(void) atomic_set(&newtbl->entries, 0); spin_lock_init(&newtbl->gates_lock); spin_lock_init(&newtbl->walk_lock); + rhashtable_init(&newtbl->rhead, &mesh_rht_params); return newtbl; } @@ -773,9 +774,6 @@ int mesh_pathtbl_init(struct ieee80211_sub_if_data *sdata) goto free_path; } - rhashtable_init(&tbl_path->rhead, &mesh_rht_params); - rhashtable_init(&tbl_mpp->rhead, &mesh_rht_params); - sdata->u.mesh.mesh_paths = tbl_path; sdata->u.mesh.mpp_paths = tbl_mpp; diff --git a/net/mac80211/rc80211_minstrel.c b/net/mac80211/rc80211_minstrel.c index 86bc469a28bc..b13b1da19386 100644 --- a/net/mac80211/rc80211_minstrel.c +++ b/net/mac80211/rc80211_minstrel.c @@ -274,7 +274,7 @@ minstrel_tx_status(void *priv, struct ieee80211_supported_band *sband, success = !!(info->flags & IEEE80211_TX_STAT_ACK); for (i = 0; i < IEEE80211_TX_MAX_RATES; i++) { - if (ar[i].idx < 0) + if (ar[i].idx < 0 || !ar[i].count) break; ndx = rix_to_ndx(mi, ar[i].idx); @@ -287,12 +287,6 @@ minstrel_tx_status(void *priv, struct ieee80211_supported_band *sband, mi->r[ndx].stats.success += success; } - if ((info->flags & IEEE80211_TX_CTL_RATE_CTRL_PROBE) && (i >= 0)) - mi->sample_packets++; - - if (mi->sample_deferred > 0) - mi->sample_deferred--; - if (time_after(jiffies, mi->last_stats_update + mp->update_interval / (mp->new_avg ? 2 : 1))) minstrel_update_stats(mp, mi); @@ -367,7 +361,7 @@ minstrel_get_rate(void *priv, struct ieee80211_sta *sta, return; delta = (mi->total_packets * sampling_ratio / 100) - - (mi->sample_packets + mi->sample_deferred / 2); + mi->sample_packets; /* delta < 0: no sampling required */ prev_sample = mi->prev_sample; @@ -376,7 +370,6 @@ minstrel_get_rate(void *priv, struct ieee80211_sta *sta, return; if (mi->total_packets >= 10000) { - mi->sample_deferred = 0; mi->sample_packets = 0; mi->total_packets = 0; } else if (delta > mi->n_rates * 2) { @@ -401,19 +394,8 @@ minstrel_get_rate(void *priv, struct ieee80211_sta *sta, * rate sampling method should be used. * Respect such rates that are not sampled for 20 interations. */ - if (mrr_capable && - msr->perfect_tx_time > mr->perfect_tx_time && - msr->stats.sample_skipped < 20) { - /* Only use IEEE80211_TX_CTL_RATE_CTRL_PROBE to mark - * packets that have the sampling rate deferred to the - * second MRR stage. Increase the sample counter only - * if the deferred sample rate was actually used. - * Use the sample_deferred counter to make sure that - * the sampling is not done in large bursts */ - info->flags |= IEEE80211_TX_CTL_RATE_CTRL_PROBE; - rate++; - mi->sample_deferred++; - } else { + if (msr->perfect_tx_time < mr->perfect_tx_time || + msr->stats.sample_skipped >= 20) { if (!msr->sample_limit) return; @@ -433,6 +415,7 @@ minstrel_get_rate(void *priv, struct ieee80211_sta *sta, rate->idx = mi->r[ndx].rix; rate->count = minstrel_get_retry_count(&mi->r[ndx], info); + info->flags |= IEEE80211_TX_CTL_RATE_CTRL_PROBE; } diff --git a/net/mac80211/rc80211_minstrel.h b/net/mac80211/rc80211_minstrel.h index dbb43bcd3c45..86cd80b3ffde 100644 --- a/net/mac80211/rc80211_minstrel.h +++ b/net/mac80211/rc80211_minstrel.h @@ -126,7 +126,6 @@ struct minstrel_sta_info { u8 max_prob_rate; unsigned int total_packets; unsigned int sample_packets; - int sample_deferred; unsigned int sample_row; unsigned int sample_column; diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 4fe284ff1ea3..ec6973ee88ef 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -705,7 +705,7 @@ static int sta_info_insert_finish(struct sta_info *sta) __acquires(RCU) out_drop_sta: local->num_sta--; synchronize_net(); - __cleanup_single_sta(sta); + cleanup_single_sta(sta); out_err: mutex_unlock(&local->sta_mtx); kfree(sinfo); @@ -724,19 +724,13 @@ int sta_info_insert_rcu(struct sta_info *sta) __acquires(RCU) err = sta_info_insert_check(sta); if (err) { + sta_info_free(local, sta); mutex_unlock(&local->sta_mtx); rcu_read_lock(); - goto out_free; + return err; } - err = sta_info_insert_finish(sta); - if (err) - goto out_free; - - return 0; - out_free: - sta_info_free(local, sta); - return err; + return sta_info_insert_finish(sta); } int sta_info_insert(struct sta_info *sta) diff --git a/net/mac80211/status.c b/net/mac80211/status.c index 6feb45135020..3485610755ef 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -49,7 +49,8 @@ static void ieee80211_handle_filtered_frame(struct ieee80211_local *local, int ac; if (info->flags & (IEEE80211_TX_CTL_NO_PS_BUFFER | - IEEE80211_TX_CTL_AMPDU)) { + IEEE80211_TX_CTL_AMPDU | + IEEE80211_TX_CTL_HW_80211_ENCAP)) { ieee80211_free_txskb(&local->hw, skb); return; } @@ -915,15 +916,6 @@ static void __ieee80211_tx_status(struct ieee80211_hw *hw, ieee80211_mpsp_trigger_process( ieee80211_get_qos_ctl(hdr), sta, true, acked); - if (!acked && test_sta_flag(sta, WLAN_STA_PS_STA)) { - /* - * The STA is in power save mode, so assume - * that this TX packet failed because of that. - */ - ieee80211_handle_filtered_frame(local, sta, skb); - return; - } - if (ieee80211_hw_check(&local->hw, HAS_RATE_CONTROL) && (ieee80211_is_data(hdr->frame_control)) && (rates_idx != -1)) @@ -1150,6 +1142,12 @@ void ieee80211_tx_status_ext(struct ieee80211_hw *hw, -info->status.ack_signal); } } else if (test_sta_flag(sta, WLAN_STA_PS_STA)) { + /* + * The STA is in power save mode, so assume + * that this TX packet failed because of that. + */ + if (skb) + ieee80211_handle_filtered_frame(local, sta, skb); return; } else if (noack_success) { /* nothing to do here, do not account as lost */ diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 49342060490f..94e624e9439b 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -3455,7 +3455,7 @@ bool ieee80211_chandef_he_6ghz_oper(struct ieee80211_sub_if_data *sdata, *chandef = he_chandef; - return false; + return true; } bool ieee80211_chandef_s1g_oper(const struct ieee80211_s1g_oper_ie *oper, diff --git a/net/mptcp/mib.c b/net/mptcp/mib.c index 84d119436b22..b921cbdd9aaa 100644 --- a/net/mptcp/mib.c +++ b/net/mptcp/mib.c @@ -67,6 +67,7 @@ void mptcp_seq_show(struct seq_file *seq) for (i = 0; mptcp_snmp_list[i].name; i++) seq_puts(seq, " 0"); + seq_putc(seq, '\n'); return; } diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index ac4a1fe3550b..953906e40742 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -543,9 +543,8 @@ create_msk: fallback = true; } else if (subflow_req->mp_join) { mptcp_get_options(skb, &mp_opt); - if (!mp_opt.mp_join || - !mptcp_can_accept_new_subflow(subflow_req->msk) || - !subflow_hmac_valid(req, &mp_opt)) { + if (!mp_opt.mp_join || !subflow_hmac_valid(req, &mp_opt) || + !mptcp_can_accept_new_subflow(subflow_req->msk)) { SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKMAC); fallback = true; } diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c index f1be3e3f6425..a9cb355324d1 100644 --- a/net/ncsi/ncsi-manage.c +++ b/net/ncsi/ncsi-manage.c @@ -1726,9 +1726,6 @@ struct ncsi_dev *ncsi_register_dev(struct net_device *dev, ndp->ptype.dev = dev; dev_add_pack(&ndp->ptype); - /* Set up generic netlink interface */ - ncsi_init_netlink(dev); - pdev = to_platform_device(dev->dev.parent); if (pdev) { np = pdev->dev.of_node; @@ -1892,8 +1889,6 @@ void ncsi_unregister_dev(struct ncsi_dev *nd) list_del_rcu(&ndp->node); spin_unlock_irqrestore(&ncsi_dev_lock, flags); - ncsi_unregister_netlink(nd->dev); - kfree(ndp); } EXPORT_SYMBOL_GPL(ncsi_unregister_dev); diff --git a/net/ncsi/ncsi-netlink.c b/net/ncsi/ncsi-netlink.c index adddc7707aa4..bb5f1650f11c 100644 --- a/net/ncsi/ncsi-netlink.c +++ b/net/ncsi/ncsi-netlink.c @@ -766,24 +766,8 @@ static struct genl_family ncsi_genl_family __ro_after_init = { .n_small_ops = ARRAY_SIZE(ncsi_ops), }; -int ncsi_init_netlink(struct net_device *dev) +static int __init ncsi_init_netlink(void) { - int rc; - - rc = genl_register_family(&ncsi_genl_family); - if (rc) - netdev_err(dev, "ncsi: failed to register netlink family\n"); - - return rc; -} - -int ncsi_unregister_netlink(struct net_device *dev) -{ - int rc; - - rc = genl_unregister_family(&ncsi_genl_family); - if (rc) - netdev_err(dev, "ncsi: failed to unregister netlink family\n"); - - return rc; + return genl_register_family(&ncsi_genl_family); } +subsys_initcall(ncsi_init_netlink); diff --git a/net/ncsi/ncsi-netlink.h b/net/ncsi/ncsi-netlink.h index 7502723fba83..39a1a9d7bf77 100644 --- a/net/ncsi/ncsi-netlink.h +++ b/net/ncsi/ncsi-netlink.h @@ -22,7 +22,4 @@ int ncsi_send_netlink_err(struct net_device *dev, struct nlmsghdr *nlhdr, int err); -int ncsi_init_netlink(struct net_device *dev); -int ncsi_unregister_netlink(struct net_device *dev); - #endif /* __NCSI_NETLINK_H__ */ diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 7cff6e5e7445..2b19189a930f 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -271,8 +271,7 @@ flag_nested(const struct nlattr *nla) static const struct nla_policy ipaddr_policy[IPSET_ATTR_IPADDR_MAX + 1] = { [IPSET_ATTR_IPADDR_IPV4] = { .type = NLA_U32 }, - [IPSET_ATTR_IPADDR_IPV6] = { .type = NLA_BINARY, - .len = sizeof(struct in6_addr) }, + [IPSET_ATTR_IPADDR_IPV6] = NLA_POLICY_EXACT_LEN(sizeof(struct in6_addr)), }; int diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index e279ded4e306..d45dbcba8b49 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -4167,12 +4167,18 @@ int __net_init ip_vs_control_net_init(struct netns_ipvs *ipvs) spin_lock_init(&ipvs->tot_stats.lock); - proc_create_net("ip_vs", 0, ipvs->net->proc_net, &ip_vs_info_seq_ops, - sizeof(struct ip_vs_iter)); - proc_create_net_single("ip_vs_stats", 0, ipvs->net->proc_net, - ip_vs_stats_show, NULL); - proc_create_net_single("ip_vs_stats_percpu", 0, ipvs->net->proc_net, - ip_vs_stats_percpu_show, NULL); +#ifdef CONFIG_PROC_FS + if (!proc_create_net("ip_vs", 0, ipvs->net->proc_net, + &ip_vs_info_seq_ops, sizeof(struct ip_vs_iter))) + goto err_vs; + if (!proc_create_net_single("ip_vs_stats", 0, ipvs->net->proc_net, + ip_vs_stats_show, NULL)) + goto err_stats; + if (!proc_create_net_single("ip_vs_stats_percpu", 0, + ipvs->net->proc_net, + ip_vs_stats_percpu_show, NULL)) + goto err_percpu; +#endif if (ip_vs_control_net_init_sysctl(ipvs)) goto err; @@ -4180,6 +4186,17 @@ int __net_init ip_vs_control_net_init(struct netns_ipvs *ipvs) return 0; err: +#ifdef CONFIG_PROC_FS + remove_proc_entry("ip_vs_stats_percpu", ipvs->net->proc_net); + +err_percpu: + remove_proc_entry("ip_vs_stats", ipvs->net->proc_net); + +err_stats: + remove_proc_entry("ip_vs", ipvs->net->proc_net); + +err_vs: +#endif free_percpu(ipvs->tot_stats.cpustats); return -ENOMEM; } @@ -4188,9 +4205,11 @@ void __net_exit ip_vs_control_net_cleanup(struct netns_ipvs *ipvs) { ip_vs_trash_cleanup(ipvs); ip_vs_control_net_cleanup_sysctl(ipvs); +#ifdef CONFIG_PROC_FS remove_proc_entry("ip_vs_stats_percpu", ipvs->net->proc_net); remove_proc_entry("ip_vs_stats", ipvs->net->proc_net); remove_proc_entry("ip_vs", ipvs->net->proc_net); +#endif free_percpu(ipvs->tot_stats.cpustats); } diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 0f58e98542be..9a080767667b 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -619,7 +619,8 @@ static int nft_request_module(struct net *net, const char *fmt, ...) static void lockdep_nfnl_nft_mutex_not_held(void) { #ifdef CONFIG_PROVE_LOCKING - WARN_ON_ONCE(lockdep_nfnl_is_held(NFNL_SUBSYS_NFTABLES)); + if (debug_locks) + WARN_ON_ONCE(lockdep_nfnl_is_held(NFNL_SUBSYS_NFTABLES)); #endif } @@ -1722,6 +1723,10 @@ static struct nft_hook *nft_netdev_hook_alloc(struct net *net, } nla_strlcpy(ifname, attr, IFNAMSIZ); + /* nf_tables_netdev_event() is called under rtnl_mutex, this is + * indirectly serializing all the other holders of the commit_mutex with + * the rtnl_mutex. + */ dev = __dev_get_by_name(net, ifname); if (!dev) { err = -ENOENT; @@ -3718,7 +3723,7 @@ cont: return 0; } -static int nf_msecs_to_jiffies64(const struct nlattr *nla, u64 *result) +int nf_msecs_to_jiffies64(const struct nlattr *nla, u64 *result) { u64 ms = be64_to_cpu(nla_get_be64(nla)); u64 max = (u64)(~((u64)0)); @@ -3732,7 +3737,7 @@ static int nf_msecs_to_jiffies64(const struct nlattr *nla, u64 *result) return 0; } -static __be64 nf_jiffies64_to_msecs(u64 input) +__be64 nf_jiffies64_to_msecs(u64 input) { return cpu_to_be64(jiffies64_to_msecs(input)); } diff --git a/net/netfilter/nf_tables_offload.c b/net/netfilter/nf_tables_offload.c index 9f625724a20f..9ae14270c543 100644 --- a/net/netfilter/nf_tables_offload.c +++ b/net/netfilter/nf_tables_offload.c @@ -28,6 +28,23 @@ static struct nft_flow_rule *nft_flow_rule_alloc(int num_actions) return flow; } +void nft_flow_rule_set_addr_type(struct nft_flow_rule *flow, + enum flow_dissector_key_id addr_type) +{ + struct nft_flow_match *match = &flow->match; + struct nft_flow_key *mask = &match->mask; + struct nft_flow_key *key = &match->key; + + if (match->dissector.used_keys & BIT(FLOW_DISSECTOR_KEY_CONTROL)) + return; + + key->control.addr_type = addr_type; + mask->control.addr_type = 0xffff; + match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_CONTROL); + match->dissector.offset[FLOW_DISSECTOR_KEY_CONTROL] = + offsetof(struct nft_flow_key, control); +} + struct nft_flow_rule *nft_flow_rule_create(struct net *net, const struct nft_rule *rule) { diff --git a/net/netfilter/nft_cmp.c b/net/netfilter/nft_cmp.c index bc079d68a536..00e563a72d3d 100644 --- a/net/netfilter/nft_cmp.c +++ b/net/netfilter/nft_cmp.c @@ -123,11 +123,11 @@ static int __nft_cmp_offload(struct nft_offload_ctx *ctx, u8 *mask = (u8 *)&flow->match.mask; u8 *key = (u8 *)&flow->match.key; - if (priv->op != NFT_CMP_EQ || reg->len != priv->len) + if (priv->op != NFT_CMP_EQ || priv->len > reg->len) return -EOPNOTSUPP; - memcpy(key + reg->offset, &priv->data, priv->len); - memcpy(mask + reg->offset, ®->mask, priv->len); + memcpy(key + reg->offset, &priv->data, reg->len); + memcpy(mask + reg->offset, ®->mask, reg->len); flow->match.dissector.used_keys |= BIT(reg->key); flow->match.dissector.offset[reg->key] = reg->base_offset; @@ -137,7 +137,7 @@ static int __nft_cmp_offload(struct nft_offload_ctx *ctx, nft_reg_load16(priv->data.data) != ARPHRD_ETHER) return -EOPNOTSUPP; - nft_offload_update_dependency(ctx, &priv->data, priv->len); + nft_offload_update_dependency(ctx, &priv->data, reg->len); return 0; } diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index 322bd674963e..a1b0aac46e9e 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -177,8 +177,6 @@ static void nft_ct_get_eval(const struct nft_expr *expr, } #endif case NFT_CT_ID: - if (!nf_ct_is_confirmed(ct)) - goto err; *dest = nf_ct_get_id(ct); return; default: diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c index 64ca13a1885b..9af4f93c7f0e 100644 --- a/net/netfilter/nft_dynset.c +++ b/net/netfilter/nft_dynset.c @@ -157,8 +157,10 @@ static int nft_dynset_init(const struct nft_ctx *ctx, if (tb[NFTA_DYNSET_TIMEOUT] != NULL) { if (!(set->flags & NFT_SET_TIMEOUT)) return -EINVAL; - timeout = msecs_to_jiffies(be64_to_cpu(nla_get_be64( - tb[NFTA_DYNSET_TIMEOUT]))); + + err = nf_msecs_to_jiffies64(tb[NFTA_DYNSET_TIMEOUT], &timeout); + if (err) + return err; } priv->sreg_key = nft_parse_register(tb[NFTA_DYNSET_SREG_KEY]); @@ -267,7 +269,7 @@ static int nft_dynset_dump(struct sk_buff *skb, const struct nft_expr *expr) if (nla_put_string(skb, NFTA_DYNSET_SET_NAME, priv->set->name)) goto nla_put_failure; if (nla_put_be64(skb, NFTA_DYNSET_TIMEOUT, - cpu_to_be64(jiffies_to_msecs(priv->timeout)), + nf_jiffies64_to_msecs(priv->timeout), NFTA_DYNSET_PAD)) goto nla_put_failure; if (priv->expr && nft_expr_dump(skb, NFTA_DYNSET_EXPR, priv->expr)) diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c index b37bd02448d8..bf4b3ad5314c 100644 --- a/net/netfilter/nft_meta.c +++ b/net/netfilter/nft_meta.c @@ -724,22 +724,22 @@ static int nft_meta_get_offload(struct nft_offload_ctx *ctx, switch (priv->key) { case NFT_META_PROTOCOL: - NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_BASIC, basic, n_proto, - sizeof(__u16), reg); + NFT_OFFLOAD_MATCH_EXACT(FLOW_DISSECTOR_KEY_BASIC, basic, n_proto, + sizeof(__u16), reg); nft_offload_set_dependency(ctx, NFT_OFFLOAD_DEP_NETWORK); break; case NFT_META_L4PROTO: - NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_BASIC, basic, ip_proto, - sizeof(__u8), reg); + NFT_OFFLOAD_MATCH_EXACT(FLOW_DISSECTOR_KEY_BASIC, basic, ip_proto, + sizeof(__u8), reg); nft_offload_set_dependency(ctx, NFT_OFFLOAD_DEP_TRANSPORT); break; case NFT_META_IIF: - NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_META, meta, - ingress_ifindex, sizeof(__u32), reg); + NFT_OFFLOAD_MATCH_EXACT(FLOW_DISSECTOR_KEY_META, meta, + ingress_ifindex, sizeof(__u32), reg); break; case NFT_META_IIFTYPE: - NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_META, meta, - ingress_iftype, sizeof(__u16), reg); + NFT_OFFLOAD_MATCH_EXACT(FLOW_DISSECTOR_KEY_META, meta, + ingress_iftype, sizeof(__u16), reg); break; default: return -EOPNOTSUPP; diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c index dcd3c7b8a367..47d4e0e21651 100644 --- a/net/netfilter/nft_payload.c +++ b/net/netfilter/nft_payload.c @@ -165,6 +165,34 @@ nla_put_failure: return -1; } +static bool nft_payload_offload_mask(struct nft_offload_reg *reg, + u32 priv_len, u32 field_len) +{ + unsigned int remainder, delta, k; + struct nft_data mask = {}; + __be32 remainder_mask; + + if (priv_len == field_len) { + memset(®->mask, 0xff, priv_len); + return true; + } else if (priv_len > field_len) { + return false; + } + + memset(&mask, 0xff, field_len); + remainder = priv_len % sizeof(u32); + if (remainder) { + k = priv_len / sizeof(u32); + delta = field_len - priv_len; + remainder_mask = htonl(~((1 << (delta * BITS_PER_BYTE)) - 1)); + mask.data[k] = (__force u32)remainder_mask; + } + + memcpy(®->mask, &mask, field_len); + + return true; +} + static int nft_payload_offload_ll(struct nft_offload_ctx *ctx, struct nft_flow_rule *flow, const struct nft_payload *priv) @@ -173,21 +201,21 @@ static int nft_payload_offload_ll(struct nft_offload_ctx *ctx, switch (priv->offset) { case offsetof(struct ethhdr, h_source): - if (priv->len != ETH_ALEN) + if (!nft_payload_offload_mask(reg, priv->len, ETH_ALEN)) return -EOPNOTSUPP; NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_ETH_ADDRS, eth_addrs, src, ETH_ALEN, reg); break; case offsetof(struct ethhdr, h_dest): - if (priv->len != ETH_ALEN) + if (!nft_payload_offload_mask(reg, priv->len, ETH_ALEN)) return -EOPNOTSUPP; NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_ETH_ADDRS, eth_addrs, dst, ETH_ALEN, reg); break; case offsetof(struct ethhdr, h_proto): - if (priv->len != sizeof(__be16)) + if (!nft_payload_offload_mask(reg, priv->len, sizeof(__be16))) return -EOPNOTSUPP; NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_BASIC, basic, @@ -195,14 +223,14 @@ static int nft_payload_offload_ll(struct nft_offload_ctx *ctx, nft_offload_set_dependency(ctx, NFT_OFFLOAD_DEP_NETWORK); break; case offsetof(struct vlan_ethhdr, h_vlan_TCI): - if (priv->len != sizeof(__be16)) + if (!nft_payload_offload_mask(reg, priv->len, sizeof(__be16))) return -EOPNOTSUPP; NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_VLAN, vlan, vlan_tci, sizeof(__be16), reg); break; case offsetof(struct vlan_ethhdr, h_vlan_encapsulated_proto): - if (priv->len != sizeof(__be16)) + if (!nft_payload_offload_mask(reg, priv->len, sizeof(__be16))) return -EOPNOTSUPP; NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_VLAN, vlan, @@ -210,7 +238,7 @@ static int nft_payload_offload_ll(struct nft_offload_ctx *ctx, nft_offload_set_dependency(ctx, NFT_OFFLOAD_DEP_NETWORK); break; case offsetof(struct vlan_ethhdr, h_vlan_TCI) + sizeof(struct vlan_hdr): - if (priv->len != sizeof(__be16)) + if (!nft_payload_offload_mask(reg, priv->len, sizeof(__be16))) return -EOPNOTSUPP; NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_CVLAN, vlan, @@ -218,7 +246,7 @@ static int nft_payload_offload_ll(struct nft_offload_ctx *ctx, break; case offsetof(struct vlan_ethhdr, h_vlan_encapsulated_proto) + sizeof(struct vlan_hdr): - if (priv->len != sizeof(__be16)) + if (!nft_payload_offload_mask(reg, priv->len, sizeof(__be16))) return -EOPNOTSUPP; NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_CVLAN, vlan, @@ -239,21 +267,25 @@ static int nft_payload_offload_ip(struct nft_offload_ctx *ctx, switch (priv->offset) { case offsetof(struct iphdr, saddr): - if (priv->len != sizeof(struct in_addr)) + if (!nft_payload_offload_mask(reg, priv->len, + sizeof(struct in_addr))) return -EOPNOTSUPP; NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_IPV4_ADDRS, ipv4, src, sizeof(struct in_addr), reg); + nft_flow_rule_set_addr_type(flow, FLOW_DISSECTOR_KEY_IPV4_ADDRS); break; case offsetof(struct iphdr, daddr): - if (priv->len != sizeof(struct in_addr)) + if (!nft_payload_offload_mask(reg, priv->len, + sizeof(struct in_addr))) return -EOPNOTSUPP; NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_IPV4_ADDRS, ipv4, dst, sizeof(struct in_addr), reg); + nft_flow_rule_set_addr_type(flow, FLOW_DISSECTOR_KEY_IPV4_ADDRS); break; case offsetof(struct iphdr, protocol): - if (priv->len != sizeof(__u8)) + if (!nft_payload_offload_mask(reg, priv->len, sizeof(__u8))) return -EOPNOTSUPP; NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_BASIC, basic, ip_proto, @@ -275,21 +307,25 @@ static int nft_payload_offload_ip6(struct nft_offload_ctx *ctx, switch (priv->offset) { case offsetof(struct ipv6hdr, saddr): - if (priv->len != sizeof(struct in6_addr)) + if (!nft_payload_offload_mask(reg, priv->len, + sizeof(struct in6_addr))) return -EOPNOTSUPP; NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_IPV6_ADDRS, ipv6, src, sizeof(struct in6_addr), reg); + nft_flow_rule_set_addr_type(flow, FLOW_DISSECTOR_KEY_IPV6_ADDRS); break; case offsetof(struct ipv6hdr, daddr): - if (priv->len != sizeof(struct in6_addr)) + if (!nft_payload_offload_mask(reg, priv->len, + sizeof(struct in6_addr))) return -EOPNOTSUPP; NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_IPV6_ADDRS, ipv6, dst, sizeof(struct in6_addr), reg); + nft_flow_rule_set_addr_type(flow, FLOW_DISSECTOR_KEY_IPV6_ADDRS); break; case offsetof(struct ipv6hdr, nexthdr): - if (priv->len != sizeof(__u8)) + if (!nft_payload_offload_mask(reg, priv->len, sizeof(__u8))) return -EOPNOTSUPP; NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_BASIC, basic, ip_proto, @@ -331,14 +367,14 @@ static int nft_payload_offload_tcp(struct nft_offload_ctx *ctx, switch (priv->offset) { case offsetof(struct tcphdr, source): - if (priv->len != sizeof(__be16)) + if (!nft_payload_offload_mask(reg, priv->len, sizeof(__be16))) return -EOPNOTSUPP; NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_PORTS, tp, src, sizeof(__be16), reg); break; case offsetof(struct tcphdr, dest): - if (priv->len != sizeof(__be16)) + if (!nft_payload_offload_mask(reg, priv->len, sizeof(__be16))) return -EOPNOTSUPP; NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_PORTS, tp, dst, @@ -359,14 +395,14 @@ static int nft_payload_offload_udp(struct nft_offload_ctx *ctx, switch (priv->offset) { case offsetof(struct udphdr, source): - if (priv->len != sizeof(__be16)) + if (!nft_payload_offload_mask(reg, priv->len, sizeof(__be16))) return -EOPNOTSUPP; NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_PORTS, tp, src, sizeof(__be16), reg); break; case offsetof(struct udphdr, dest): - if (priv->len != sizeof(__be16)) + if (!nft_payload_offload_mask(reg, priv->len, sizeof(__be16))) return -EOPNOTSUPP; NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_PORTS, tp, dst, diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index af22dbe85e2c..acce622582e3 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -1349,6 +1349,14 @@ struct xt_counters *xt_counters_alloc(unsigned int counters) } EXPORT_SYMBOL(xt_counters_alloc); +struct xt_table_info +*xt_table_get_private_protected(const struct xt_table *table) +{ + return rcu_dereference_protected(table->private, + mutex_is_locked(&xt[table->af].mutex)); +} +EXPORT_SYMBOL(xt_table_get_private_protected); + struct xt_table_info * xt_replace_table(struct xt_table *table, unsigned int num_counters, @@ -1356,7 +1364,6 @@ xt_replace_table(struct xt_table *table, int *error) { struct xt_table_info *private; - unsigned int cpu; int ret; ret = xt_jumpstack_alloc(newinfo); @@ -1366,47 +1373,20 @@ xt_replace_table(struct xt_table *table, } /* Do the substitution. */ - local_bh_disable(); - private = table->private; + private = xt_table_get_private_protected(table); /* Check inside lock: is the old number correct? */ if (num_counters != private->number) { pr_debug("num_counters != table->private->number (%u/%u)\n", num_counters, private->number); - local_bh_enable(); *error = -EAGAIN; return NULL; } newinfo->initial_entries = private->initial_entries; - /* - * Ensure contents of newinfo are visible before assigning to - * private. - */ - smp_wmb(); - table->private = newinfo; - - /* make sure all cpus see new ->private value */ - smp_wmb(); - /* - * Even though table entries have now been swapped, other CPU's - * may still be using the old entries... - */ - local_bh_enable(); - - /* ... so wait for even xt_recseq on all cpus */ - for_each_possible_cpu(cpu) { - seqcount_t *s = &per_cpu(xt_recseq, cpu); - u32 seq = raw_read_seqcount(s); - - if (seq & 1) { - do { - cond_resched(); - cpu_relax(); - } while (seq == raw_read_seqcount(s)); - } - } + rcu_assign_pointer(table->private, newinfo); + synchronize_rcu(); audit_log_nfcfg(table->name, table->af, private->number, !private->number ? AUDIT_XT_OP_REGISTER : @@ -1442,12 +1422,12 @@ struct xt_table *xt_register_table(struct net *net, } /* Simplifies replace_table code. */ - table->private = bootstrap; + rcu_assign_pointer(table->private, bootstrap); if (!xt_replace_table(table, 0, newinfo, &ret)) goto unlock; - private = table->private; + private = xt_table_get_private_protected(table); pr_debug("table->private->number = %u\n", private->number); /* save number of initial entries */ @@ -1470,7 +1450,8 @@ void *xt_unregister_table(struct xt_table *table) struct xt_table_info *private; mutex_lock(&xt[table->af].mutex); - private = table->private; + private = xt_table_get_private_protected(table); + RCU_INIT_POINTER(table->private, NULL); list_del(&table->list); mutex_unlock(&xt[table->af].mutex); audit_log_nfcfg(table->name, table->af, private->number, diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c index fc55c9116da0..ccb491642811 100644 --- a/net/netlabel/netlabel_unlabeled.c +++ b/net/netlabel/netlabel_unlabeled.c @@ -1167,7 +1167,7 @@ static int netlbl_unlabel_staticlist(struct sk_buff *skb, u32 skip_bkt = cb->args[0]; u32 skip_chain = cb->args[1]; u32 skip_addr4 = cb->args[2]; - u32 iter_bkt, iter_chain, iter_addr4 = 0, iter_addr6 = 0; + u32 iter_bkt, iter_chain = 0, iter_addr4 = 0, iter_addr6 = 0; struct netlbl_unlhsh_iface *iface; struct list_head *iter_list; struct netlbl_af4list *addr4; diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index b87bfc82f44f..c3a664871cb5 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -199,6 +199,9 @@ static int set_mpls(struct sk_buff *skb, struct sw_flow_key *flow_key, __be32 lse; int err; + if (!pskb_may_pull(skb, skb_network_offset(skb) + MPLS_HLEN)) + return -ENOMEM; + stack = mpls_hdr(skb); lse = OVS_MASKED(stack->label_stack_entry, *mpls_lse, *mask); err = skb_mpls_update_lse(skb, lse); @@ -958,14 +961,13 @@ static int dec_ttl_exception_handler(struct datapath *dp, struct sk_buff *skb, { /* The first action is always 'OVS_DEC_TTL_ATTR_ARG'. */ struct nlattr *dec_ttl_arg = nla_data(attr); - int rem = nla_len(attr); if (nla_len(dec_ttl_arg)) { - struct nlattr *actions = nla_next(dec_ttl_arg, &rem); + struct nlattr *actions = nla_data(dec_ttl_arg); if (actions) - return clone_execute(dp, skb, key, 0, actions, rem, - last, false); + return clone_execute(dp, skb, key, 0, nla_data(actions), + nla_len(actions), last, false); } consume_skb(skb); return 0; diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index 9d3e50c4d29f..4c5c2331e764 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -2503,28 +2503,42 @@ static int validate_and_copy_dec_ttl(struct net *net, __be16 eth_type, __be16 vlan_tci, u32 mpls_label_count, bool log) { - int start, err; - u32 nested = true; + const struct nlattr *attrs[OVS_DEC_TTL_ATTR_MAX + 1]; + int start, action_start, err, rem; + const struct nlattr *a, *actions; + + memset(attrs, 0, sizeof(attrs)); + nla_for_each_nested(a, attr, rem) { + int type = nla_type(a); - if (!nla_len(attr)) - return ovs_nla_add_action(sfa, OVS_ACTION_ATTR_DEC_TTL, - NULL, 0, log); + /* Ignore unknown attributes to be future proof. */ + if (type > OVS_DEC_TTL_ATTR_MAX) + continue; + + if (!type || attrs[type]) + return -EINVAL; + + attrs[type] = a; + } + + actions = attrs[OVS_DEC_TTL_ATTR_ACTION]; + if (rem || !actions || (nla_len(actions) && nla_len(actions) < NLA_HDRLEN)) + return -EINVAL; start = add_nested_action_start(sfa, OVS_ACTION_ATTR_DEC_TTL, log); if (start < 0) return start; - err = ovs_nla_add_action(sfa, OVS_DEC_TTL_ATTR_ACTION, &nested, - sizeof(nested), log); + action_start = add_nested_action_start(sfa, OVS_DEC_TTL_ATTR_ACTION, log); + if (action_start < 0) + return action_start; - if (err) - return err; - - err = __ovs_nla_copy_actions(net, attr, key, sfa, eth_type, + err = __ovs_nla_copy_actions(net, actions, key, sfa, eth_type, vlan_tci, mpls_label_count, log); if (err) return err; + add_nested_action_end(*sfa, action_start); add_nested_action_end(*sfa, start); return 0; } @@ -3487,20 +3501,42 @@ out: static int dec_ttl_action_to_attr(const struct nlattr *attr, struct sk_buff *skb) { - int err = 0, rem = nla_len(attr); - struct nlattr *start; + struct nlattr *start, *action_start; + const struct nlattr *a; + int err = 0, rem; start = nla_nest_start_noflag(skb, OVS_ACTION_ATTR_DEC_TTL); - if (!start) return -EMSGSIZE; - err = ovs_nla_put_actions(nla_data(attr), rem, skb); - if (err) - nla_nest_cancel(skb, start); - else - nla_nest_end(skb, start); + nla_for_each_attr(a, nla_data(attr), nla_len(attr), rem) { + switch (nla_type(a)) { + case OVS_DEC_TTL_ATTR_ACTION: + + action_start = nla_nest_start_noflag(skb, OVS_DEC_TTL_ATTR_ACTION); + if (!action_start) { + err = -EMSGSIZE; + goto out; + } + + err = ovs_nla_put_actions(nla_data(a), nla_len(a), skb); + if (err) + goto out; + + nla_nest_end(skb, action_start); + break; + default: + /* Ignore all other option to be future compatible */ + break; + } + } + + nla_nest_end(skb, start); + return 0; + +out: + nla_nest_cancel(skb, start); return err; } diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index cefbd50c1090..7a18ffff8551 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -93,8 +93,8 @@ /* Assumptions: - - If the device has no dev->header_ops, there is no LL header visible - above the device. In this case, its hard_header_len should be 0. + - If the device has no dev->header_ops->create, there is no LL header + visible above the device. In this case, its hard_header_len should be 0. The device may prepend its own header internally. In this case, its needed_headroom should be set to the space needed for it to add its internal header. @@ -108,26 +108,26 @@ On receive: ----------- -Incoming, dev->header_ops != NULL +Incoming, dev_has_header(dev) == true mac_header -> ll header data -> data -Outgoing, dev->header_ops != NULL +Outgoing, dev_has_header(dev) == true mac_header -> ll header data -> ll header -Incoming, dev->header_ops == NULL +Incoming, dev_has_header(dev) == false mac_header -> data However drivers often make it point to the ll header. This is incorrect because the ll header should be invisible to us. data -> data -Outgoing, dev->header_ops == NULL +Outgoing, dev_has_header(dev) == false mac_header -> data. ll header is invisible to us. data -> data Resume - If dev->header_ops == NULL we are unable to restore the ll header, + If dev_has_header(dev) == false we are unable to restore the ll header, because it is invisible to us. @@ -2069,7 +2069,7 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev, skb->dev = dev; - if (dev->header_ops) { + if (dev_has_header(dev)) { /* The device has an explicit notion of ll header, * exported to higher levels. * @@ -2198,7 +2198,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, if (!net_eq(dev_net(dev), sock_net(sk))) goto drop; - if (dev->header_ops) { + if (dev_has_header(dev)) { if (sk->sk_type != SOCK_DGRAM) skb_push(skb, skb->data - skb_mac_header(skb)); else if (skb->pkt_type == PACKET_OUTGOING) { diff --git a/net/rfkill/core.c b/net/rfkill/core.c index 971c73c7d34c..97101c55763d 100644 --- a/net/rfkill/core.c +++ b/net/rfkill/core.c @@ -876,6 +876,9 @@ static int rfkill_resume(struct device *dev) rfkill->suspended = false; + if (!rfkill->registered) + return 0; + if (!rfkill->persistent) { cur = !!(rfkill->state & RFKILL_BLOCK_SW); rfkill_set_block(rfkill, cur); diff --git a/net/rose/rose_loopback.c b/net/rose/rose_loopback.c index 7b094275ea8b..11c45c8c6c16 100644 --- a/net/rose/rose_loopback.c +++ b/net/rose/rose_loopback.c @@ -96,10 +96,19 @@ static void rose_loopback_timer(struct timer_list *unused) } if (frametype == ROSE_CALL_REQUEST) { - if ((dev = rose_dev_get(dest)) != NULL) { - if (rose_rx_call_request(skb, dev, rose_loopback_neigh, lci_o) == 0) - kfree_skb(skb); - } else { + if (!rose_loopback_neigh->dev) { + kfree_skb(skb); + continue; + } + + dev = rose_dev_get(dest); + if (!dev) { + kfree_skb(skb); + continue; + } + + if (rose_rx_call_request(skb, dev, rose_loopback_neigh, lci_o) == 0) { + dev_put(dev); kfree_skb(skb); } } else { diff --git a/net/sched/act_mpls.c b/net/sched/act_mpls.c index 5c7456e5b5cf..d1486ea496a2 100644 --- a/net/sched/act_mpls.c +++ b/net/sched/act_mpls.c @@ -105,6 +105,9 @@ static int tcf_mpls_act(struct sk_buff *skb, const struct tc_action *a, goto drop; break; case TCA_MPLS_ACT_MODIFY: + if (!pskb_may_pull(skb, + skb_network_offset(skb) + MPLS_HLEN)) + goto drop; new_lse = tcf_mpls_get_lse(mpls_hdr(skb), p, false); if (skb_mpls_update_lse(skb, new_lse)) goto drop; diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index fed18fd2c50b..1319986693fc 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -2424,8 +2424,8 @@ static int fl_dump_key_mpls_opt_lse(struct sk_buff *skb, return err; } if (lse_mask->mpls_label) { - err = nla_put_u8(skb, TCA_FLOWER_KEY_MPLS_OPT_LSE_LABEL, - lse_key->mpls_label); + err = nla_put_u32(skb, TCA_FLOWER_KEY_MPLS_OPT_LSE_LABEL, + lse_key->mpls_label); if (err) return err; } diff --git a/net/sched/sch_fq_pie.c b/net/sched/sch_fq_pie.c index 4dda15588cf4..949163fe68af 100644 --- a/net/sched/sch_fq_pie.c +++ b/net/sched/sch_fq_pie.c @@ -401,6 +401,7 @@ static int fq_pie_init(struct Qdisc *sch, struct nlattr *opt, INIT_LIST_HEAD(&q->new_flows); INIT_LIST_HEAD(&q->old_flows); + timer_setup(&q->adapt_timer, fq_pie_timer, 0); if (opt) { err = fq_pie_change(sch, opt, extack); @@ -426,7 +427,6 @@ static int fq_pie_init(struct Qdisc *sch, struct nlattr *opt, pie_vars_init(&flow->vars); } - timer_setup(&q->adapt_timer, fq_pie_timer, 0); mod_timer(&q->adapt_timer, jiffies + HZ / 2); return 0; diff --git a/net/sctp/input.c b/net/sctp/input.c index 55d4fc6f371d..d508f6f3dd08 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -449,7 +449,7 @@ void sctp_icmp_proto_unreachable(struct sock *sk, else { if (!mod_timer(&t->proto_unreach_timer, jiffies + (HZ/20))) - sctp_association_hold(asoc); + sctp_transport_hold(t); } } else { struct net *net = sock_net(sk); @@ -458,7 +458,7 @@ void sctp_icmp_proto_unreachable(struct sock *sk, "encountered!\n", __func__); if (del_timer(&t->proto_unreach_timer)) - sctp_association_put(asoc); + sctp_transport_put(t); sctp_do_sm(net, SCTP_EVENT_T_OTHER, SCTP_ST_OTHER(SCTP_EVENT_ICMP_PROTO_UNREACH), diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index 813d30767204..0948f14ce221 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -419,7 +419,7 @@ void sctp_generate_proto_unreach_event(struct timer_list *t) /* Try again later. */ if (!mod_timer(&transport->proto_unreach_timer, jiffies + (HZ/20))) - sctp_association_hold(asoc); + sctp_transport_hold(transport); goto out_unlock; } @@ -435,7 +435,7 @@ void sctp_generate_proto_unreach_event(struct timer_list *t) out_unlock: bh_unlock_sock(sk); - sctp_association_put(asoc); + sctp_transport_put(transport); } /* Handle the timeout of the RE-CONFIG timer. */ diff --git a/net/sctp/transport.c b/net/sctp/transport.c index 806af58f4375..60fcf31cdcfb 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -133,7 +133,7 @@ void sctp_transport_free(struct sctp_transport *transport) /* Delete the ICMP proto unreachable timer if it's active. */ if (del_timer(&transport->proto_unreach_timer)) - sctp_association_put(transport->asoc); + sctp_transport_put(transport); sctp_transport_put(transport); } diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index e9f487c8c6d5..5dd4faaf7d6e 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -979,7 +979,8 @@ static int __smc_connect(struct smc_sock *smc) /* check if smc modes and versions of CLC proposal and accept match */ rc = smc_connect_check_aclc(ini, aclc); - version = aclc->hdr.version == SMC_V1 ? SMC_V1 : version; + version = aclc->hdr.version == SMC_V1 ? SMC_V1 : SMC_V2; + ini->smcd_version = version; if (rc) goto vlan_cleanup; diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 2b19863f7171..af96f813c075 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -1309,7 +1309,8 @@ int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini) ini->ism_peer_gid[ini->ism_selected]) : smcr_lgr_match(lgr, ini->ib_lcl, role, ini->ib_clcqpn)) && !lgr->sync_err && - lgr->vlan_id == ini->vlan_id && + (ini->smcd_version == SMC_V2 || + lgr->vlan_id == ini->vlan_id) && (role == SMC_CLNT || ini->is_smcd || lgr->conns_num < SMC_RMBS_PER_LGR_MAX)) { /* link group found */ diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c index 1c314dbdc7fa..fc766b537ac7 100644 --- a/net/smc/smc_ib.c +++ b/net/smc/smc_ib.c @@ -198,9 +198,9 @@ int smc_ib_determine_gid(struct smc_ib_device *smcibdev, u8 ibport, rcu_read_lock(); ndev = rdma_read_gid_attr_ndev_rcu(attr); if (!IS_ERR(ndev) && - ((!vlan_id && !is_vlan_dev(attr->ndev)) || - (vlan_id && is_vlan_dev(attr->ndev) && - vlan_dev_vlan_id(attr->ndev) == vlan_id)) && + ((!vlan_id && !is_vlan_dev(ndev)) || + (vlan_id && is_vlan_dev(ndev) && + vlan_dev_vlan_id(ndev) == vlan_id)) && attr->gid_type == IB_GID_TYPE_ROCE) { rcu_read_unlock(); if (gid) diff --git a/net/tipc/node.c b/net/tipc/node.c index d269ebe382e1..83978d5dae59 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -2181,7 +2181,11 @@ void tipc_node_apply_property(struct net *net, struct tipc_bearer *b, &xmitq); else if (prop == TIPC_NLA_PROP_MTU) tipc_link_set_mtu(e->link, b->mtu); + + /* Update MTU for node link entry */ + e->mtu = tipc_link_mss(e->link); } + tipc_node_write_unlock(n); tipc_bearer_xmit(net, bearer_id, &xmitq, &e->maddr, NULL); } diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index cec86229a6a0..a3ab2d3d4e4e 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -694,36 +694,51 @@ static void tls_device_resync_rx(struct tls_context *tls_ctx, static bool tls_device_rx_resync_async(struct tls_offload_resync_async *resync_async, - s64 resync_req, u32 *seq) + s64 resync_req, u32 *seq, u16 *rcd_delta) { u32 is_async = resync_req & RESYNC_REQ_ASYNC; u32 req_seq = resync_req >> 32; u32 req_end = req_seq + ((resync_req >> 16) & 0xffff); + u16 i; + + *rcd_delta = 0; if (is_async) { + /* shouldn't get to wraparound: + * too long in async stage, something bad happened + */ + if (WARN_ON_ONCE(resync_async->rcd_delta == USHRT_MAX)) + return false; + /* asynchronous stage: log all headers seq such that * req_seq <= seq <= end_seq, and wait for real resync request */ - if (between(*seq, req_seq, req_end) && + if (before(*seq, req_seq)) + return false; + if (!after(*seq, req_end) && resync_async->loglen < TLS_DEVICE_RESYNC_ASYNC_LOGMAX) resync_async->log[resync_async->loglen++] = *seq; + resync_async->rcd_delta++; + return false; } /* synchronous stage: check against the logged entries and * proceed to check the next entries if no match was found */ - while (resync_async->loglen) { - if (req_seq == resync_async->log[resync_async->loglen - 1] && - atomic64_try_cmpxchg(&resync_async->req, - &resync_req, 0)) { - resync_async->loglen = 0; + for (i = 0; i < resync_async->loglen; i++) + if (req_seq == resync_async->log[i] && + atomic64_try_cmpxchg(&resync_async->req, &resync_req, 0)) { + *rcd_delta = resync_async->rcd_delta - i; *seq = req_seq; + resync_async->loglen = 0; + resync_async->rcd_delta = 0; return true; } - resync_async->loglen--; - } + + resync_async->loglen = 0; + resync_async->rcd_delta = 0; if (req_seq == *seq && atomic64_try_cmpxchg(&resync_async->req, @@ -741,6 +756,7 @@ void tls_device_rx_resync_new_rec(struct sock *sk, u32 rcd_len, u32 seq) u32 sock_data, is_req_pending; struct tls_prot_info *prot; s64 resync_req; + u16 rcd_delta; u32 req_seq; if (tls_ctx->rx_conf != TLS_HW) @@ -786,8 +802,9 @@ void tls_device_rx_resync_new_rec(struct sock *sk, u32 rcd_len, u32 seq) return; if (!tls_device_rx_resync_async(rx_ctx->resync_async, - resync_req, &seq)) + resync_req, &seq, &rcd_delta)) return; + tls_bigint_subtract(rcd_sn, rcd_delta); break; } @@ -1245,6 +1262,8 @@ void tls_device_offload_cleanup_rx(struct sock *sk) if (tls_ctx->tx_conf != TLS_HW) { dev_put(netdev); tls_ctx->netdev = NULL; + } else { + set_bit(TLS_RX_DEV_CLOSED, &tls_ctx->flags); } out: up_read(&device_offload_lock); @@ -1274,7 +1293,8 @@ static int tls_device_down(struct net_device *netdev) if (ctx->tx_conf == TLS_HW) netdev->tlsdev_ops->tls_dev_del(netdev, ctx, TLS_OFFLOAD_CTX_DIR_TX); - if (ctx->rx_conf == TLS_HW) + if (ctx->rx_conf == TLS_HW && + !test_bit(TLS_RX_DEV_CLOSED, &ctx->flags)) netdev->tlsdev_ops->tls_dev_del(netdev, ctx, TLS_OFFLOAD_CTX_DIR_RX); WRITE_ONCE(ctx->netdev, NULL); diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 95ab5545a931..845c628ac1b2 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -1295,6 +1295,12 @@ static struct sk_buff *tls_wait_data(struct sock *sk, struct sk_psock *psock, return NULL; } + if (!skb_queue_empty(&sk->sk_receive_queue)) { + __strp_unpause(&ctx->strp); + if (ctx->recv_pkt) + return ctx->recv_pkt; + } + if (sk->sk_shutdown & RCV_SHUTDOWN) return NULL; @@ -1913,7 +1919,7 @@ pick_next_record: * another message type */ msg->msg_flags |= MSG_EOR; - if (ctx->control != TLS_RECORD_TYPE_DATA) + if (control != TLS_RECORD_TYPE_DATA) goto recv_end; } else { break; diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index b4d7b8aba003..d10916ab4526 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -438,7 +438,7 @@ int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk) case SOCK_STREAM: if (vsock_use_local_transport(remote_cid)) new_transport = transport_local; - else if (remote_cid <= VMADDR_CID_HOST) + else if (remote_cid <= VMADDR_CID_HOST || !transport_h2g) new_transport = transport_g2h; else new_transport = transport_h2g; diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index 0edda1edf988..5956939eebb7 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -841,8 +841,10 @@ void virtio_transport_release(struct vsock_sock *vsk) virtio_transport_free_pkt(pkt); } - if (remove_sock) + if (remove_sock) { + sock_set_flag(sk, SOCK_DONE); vsock_remove_sock(vsk); + } } EXPORT_SYMBOL_GPL(virtio_transport_release); @@ -1132,8 +1134,8 @@ void virtio_transport_recv_pkt(struct virtio_transport *t, lock_sock(sk); - /* Check if sk has been released before lock_sock */ - if (sk->sk_shutdown == SHUTDOWN_MASK) { + /* Check if sk has been closed before lock_sock */ + if (sock_flag(sk, SOCK_DONE)) { (void)virtio_transport_reset_no_sock(t, pkt); release_sock(sk); sock_put(sk); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index a77174b99b07..f67ddf2cebcb 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -12634,7 +12634,7 @@ static int nl80211_set_rekey_data(struct sk_buff *skb, struct genl_info *info) struct net_device *dev = info->user_ptr[1]; struct wireless_dev *wdev = dev->ieee80211_ptr; struct nlattr *tb[NUM_NL80211_REKEY_DATA]; - struct cfg80211_gtk_rekey_data rekey_data; + struct cfg80211_gtk_rekey_data rekey_data = {}; int err; if (!info->attrs[NL80211_ATTR_REKEY_DATA]) diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index 046d3fee66a9..e65a50192432 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -681,7 +681,8 @@ static int x25_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) int len, i, rc = 0; if (addr_len != sizeof(struct sockaddr_x25) || - addr->sx25_family != AF_X25) { + addr->sx25_family != AF_X25 || + strnlen(addr->sx25_addr.x25_addr, X25_ADDR_LEN) == X25_ADDR_LEN) { rc = -EINVAL; goto out; } @@ -775,7 +776,8 @@ static int x25_connect(struct socket *sock, struct sockaddr *uaddr, rc = -EINVAL; if (addr_len != sizeof(struct sockaddr_x25) || - addr->sx25_family != AF_X25) + addr->sx25_family != AF_X25 || + strnlen(addr->sx25_addr.x25_addr, X25_ADDR_LEN) == X25_ADDR_LEN) goto out; rc = -ENETUNREACH; @@ -1050,6 +1052,7 @@ int x25_rx_call_request(struct sk_buff *skb, struct x25_neigh *nb, makex25->lci = lci; makex25->dest_addr = dest_addr; makex25->source_addr = source_addr; + x25_neigh_hold(nb); makex25->neighbour = nb; makex25->facilities = facilities; makex25->dte_facilities= dte_facilities; diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c index 56d052bc65cb..56a28a686988 100644 --- a/net/xdp/xdp_umem.c +++ b/net/xdp/xdp_umem.c @@ -66,18 +66,31 @@ static void xdp_umem_release(struct xdp_umem *umem) kfree(umem); } +static void xdp_umem_release_deferred(struct work_struct *work) +{ + struct xdp_umem *umem = container_of(work, struct xdp_umem, work); + + xdp_umem_release(umem); +} + void xdp_get_umem(struct xdp_umem *umem) { refcount_inc(&umem->users); } -void xdp_put_umem(struct xdp_umem *umem) +void xdp_put_umem(struct xdp_umem *umem, bool defer_cleanup) { if (!umem) return; - if (refcount_dec_and_test(&umem->users)) - xdp_umem_release(umem); + if (refcount_dec_and_test(&umem->users)) { + if (defer_cleanup) { + INIT_WORK(&umem->work, xdp_umem_release_deferred); + schedule_work(&umem->work); + } else { + xdp_umem_release(umem); + } + } } static int xdp_umem_pin_pages(struct xdp_umem *umem, unsigned long address) diff --git a/net/xdp/xdp_umem.h b/net/xdp/xdp_umem.h index 181fdda2f2a8..aa9fe2780410 100644 --- a/net/xdp/xdp_umem.h +++ b/net/xdp/xdp_umem.h @@ -9,7 +9,7 @@ #include <net/xdp_sock_drv.h> void xdp_get_umem(struct xdp_umem *umem); -void xdp_put_umem(struct xdp_umem *umem); +void xdp_put_umem(struct xdp_umem *umem, bool defer_cleanup); struct xdp_umem *xdp_umem_create(struct xdp_umem_reg *mr); #endif /* XDP_UMEM_H_ */ diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index cfbec3989a76..62504471fd20 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -211,6 +211,14 @@ static int __xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len, return 0; } +static bool xsk_tx_writeable(struct xdp_sock *xs) +{ + if (xskq_cons_present_entries(xs->tx) > xs->tx->nentries / 2) + return false; + + return true; +} + static bool xsk_is_bound(struct xdp_sock *xs) { if (READ_ONCE(xs->state) == XSK_BOUND) { @@ -296,7 +304,8 @@ void xsk_tx_release(struct xsk_buff_pool *pool) rcu_read_lock(); list_for_each_entry_rcu(xs, &pool->xsk_tx_list, tx_list) { __xskq_cons_release(xs->tx); - xs->sk.sk_write_space(&xs->sk); + if (xsk_tx_writeable(xs)) + xs->sk.sk_write_space(&xs->sk); } rcu_read_unlock(); } @@ -411,11 +420,7 @@ static int xsk_generic_xmit(struct sock *sk) skb_shinfo(skb)->destructor_arg = (void *)(long)desc.addr; skb->destructor = xsk_destruct_skb; - /* Hinder dev_direct_xmit from freeing the packet and - * therefore completing it in the destructor - */ - refcount_inc(&skb->users); - err = dev_direct_xmit(skb, xs->queue_id); + err = __dev_direct_xmit(skb, xs->queue_id); if (err == NETDEV_TX_BUSY) { /* Tell user-space to retry the send */ skb->destructor = sock_wfree; @@ -429,12 +434,10 @@ static int xsk_generic_xmit(struct sock *sk) /* Ignore NET_XMIT_CN as packet might have been sent */ if (err == NET_XMIT_DROP) { /* SKB completed but not sent */ - kfree_skb(skb); err = -EBUSY; goto out; } - consume_skb(skb); sent_frame = true; } @@ -442,7 +445,8 @@ static int xsk_generic_xmit(struct sock *sk) out: if (sent_frame) - sk->sk_write_space(sk); + if (xsk_tx_writeable(xs)) + sk->sk_write_space(sk); mutex_unlock(&xs->mutex); return err; @@ -477,11 +481,13 @@ static int xsk_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len) static __poll_t xsk_poll(struct file *file, struct socket *sock, struct poll_table_struct *wait) { - __poll_t mask = datagram_poll(file, sock, wait); + __poll_t mask = 0; struct sock *sk = sock->sk; struct xdp_sock *xs = xdp_sk(sk); struct xsk_buff_pool *pool; + sock_poll_wait(file, sock, wait); + if (unlikely(!xsk_is_bound(xs))) return mask; @@ -497,7 +503,7 @@ static __poll_t xsk_poll(struct file *file, struct socket *sock, if (xs->rx && !xskq_prod_is_empty(xs->rx)) mask |= EPOLLIN | EPOLLRDNORM; - if (xs->tx && !xskq_cons_is_full(xs->tx)) + if (xs->tx && xsk_tx_writeable(xs)) mask |= EPOLLOUT | EPOLLWRNORM; return mask; @@ -1147,7 +1153,7 @@ static void xsk_destruct(struct sock *sk) return; if (!xp_put_pool(xs->pool)) - xdp_put_umem(xs->umem); + xdp_put_umem(xs->umem, !xs->pool); sk_refcnt_debug_dec(sk); } diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c index 8a3bf4e1318e..d5adeee9d5d9 100644 --- a/net/xdp/xsk_buff_pool.c +++ b/net/xdp/xsk_buff_pool.c @@ -175,6 +175,7 @@ static int __xp_assign_dev(struct xsk_buff_pool *pool, if (!pool->dma_pages) { WARN(1, "Driver did not DMA map zero-copy buffers"); + err = -EINVAL; goto err_unreg_xsk; } pool->umem->zc = true; @@ -185,8 +186,10 @@ err_unreg_xsk: err_unreg_pool: if (!force_zc) err = 0; /* fallback to copy mode */ - if (err) + if (err) { xsk_clear_pool_at_qid(netdev, queue_id); + dev_put(netdev); + } return err; } @@ -242,7 +245,7 @@ static void xp_release_deferred(struct work_struct *work) pool->cq = NULL; } - xdp_put_umem(pool->umem); + xdp_put_umem(pool->umem, false); xp_destroy(pool); } diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h index cdb9cf3cd136..9e71b9f27679 100644 --- a/net/xdp/xsk_queue.h +++ b/net/xdp/xsk_queue.h @@ -264,6 +264,12 @@ static inline bool xskq_cons_is_full(struct xsk_queue *q) q->nentries; } +static inline u32 xskq_cons_present_entries(struct xsk_queue *q) +{ + /* No barriers needed since data is not accessed */ + return READ_ONCE(q->ring->producer) - READ_ONCE(q->ring->consumer); +} + /* Functions for producers */ static inline bool xskq_prod_is_full(struct xsk_queue *q) diff --git a/net/xfrm/xfrm_compat.c b/net/xfrm/xfrm_compat.c index e28f0c9ecd6a..d8e8a11ca845 100644 --- a/net/xfrm/xfrm_compat.c +++ b/net/xfrm/xfrm_compat.c @@ -234,6 +234,7 @@ static int xfrm_xlate64_attr(struct sk_buff *dst, const struct nlattr *src) case XFRMA_PAD: /* Ignore */ return 0; + case XFRMA_UNSPEC: case XFRMA_ALG_AUTH: case XFRMA_ALG_CRYPT: case XFRMA_ALG_COMP: @@ -387,7 +388,7 @@ static int xfrm_attr_cpy32(void *dst, size_t *pos, const struct nlattr *src, memcpy(nla, src, nla_attr_size(copy_len)); nla->nla_len = nla_attr_size(payload); - *pos += nla_attr_size(payload); + *pos += nla_attr_size(copy_len); nlmsg->nlmsg_len += nla->nla_len; memset(dst + *pos, 0, payload - copy_len); @@ -563,7 +564,7 @@ static struct nlmsghdr *xfrm_user_rcv_msg_compat(const struct nlmsghdr *h32, return NULL; len += NLMSG_HDRLEN; - h64 = kvmalloc(len, GFP_KERNEL | __GFP_ZERO); + h64 = kvmalloc(len, GFP_KERNEL); if (!h64) return ERR_PTR(-ENOMEM); diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index a77da7aae6fe..2f1517827995 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -2382,8 +2382,10 @@ int xfrm_user_policy(struct sock *sk, int optname, sockptr_t optval, int optlen) if (in_compat_syscall()) { struct xfrm_translator *xtr = xfrm_get_translator(); - if (!xtr) + if (!xtr) { + kfree(data); return -EOPNOTSUPP; + } err = xtr->xlate_user_policy_sockptr(&data, optlen); xfrm_put_translator(xtr); |