diff options
Diffstat (limited to 'net')
34 files changed, 232 insertions, 193 deletions
diff --git a/net/core/netdev-genl-gen.c b/net/core/netdev-genl-gen.c index 9e10802587fc..3abab70d66dd 100644 --- a/net/core/netdev-genl-gen.c +++ b/net/core/netdev-genl-gen.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause +// SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) /* Do not edit directly, auto-generated from: */ /* Documentation/netlink/specs/netdev.yaml */ /* YNL-GEN kernel source */ diff --git a/net/core/netdev-genl-gen.h b/net/core/netdev-genl-gen.h index 2c5fc7d1e8a7..74d74fc23167 100644 --- a/net/core/netdev-genl-gen.h +++ b/net/core/netdev-genl-gen.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ +/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */ /* Do not edit directly, auto-generated from: */ /* Documentation/netlink/specs/netdev.yaml */ /* YNL-GEN kernel header */ diff --git a/net/core/xdp.c b/net/core/xdp.c index 8c92fc553317..b5737e47ec41 100644 --- a/net/core/xdp.c +++ b/net/core/xdp.c @@ -774,20 +774,34 @@ static int __init xdp_metadata_init(void) } late_initcall(xdp_metadata_init); +void xdp_set_features_flag(struct net_device *dev, xdp_features_t val) +{ + val &= NETDEV_XDP_ACT_MASK; + if (dev->xdp_features == val) + return; + + dev->xdp_features = val; + + if (dev->reg_state == NETREG_REGISTERED) + call_netdevice_notifiers(NETDEV_XDP_FEAT_CHANGE, dev); +} +EXPORT_SYMBOL_GPL(xdp_set_features_flag); + void xdp_features_set_redirect_target(struct net_device *dev, bool support_sg) { - dev->xdp_features |= NETDEV_XDP_ACT_NDO_XMIT; - if (support_sg) - dev->xdp_features |= NETDEV_XDP_ACT_NDO_XMIT_SG; + xdp_features_t val = (dev->xdp_features | NETDEV_XDP_ACT_NDO_XMIT); - call_netdevice_notifiers(NETDEV_XDP_FEAT_CHANGE, dev); + if (support_sg) + val |= NETDEV_XDP_ACT_NDO_XMIT_SG; + xdp_set_features_flag(dev, val); } EXPORT_SYMBOL_GPL(xdp_features_set_redirect_target); void xdp_features_clear_redirect_target(struct net_device *dev) { - dev->xdp_features &= ~(NETDEV_XDP_ACT_NDO_XMIT | - NETDEV_XDP_ACT_NDO_XMIT_SG); - call_netdevice_notifiers(NETDEV_XDP_FEAT_CHANGE, dev); + xdp_features_t val = dev->xdp_features; + + val &= ~(NETDEV_XDP_ACT_NDO_XMIT | NETDEV_XDP_ACT_NDO_XMIT_SG); + xdp_set_features_flag(dev, val); } EXPORT_SYMBOL_GPL(xdp_features_clear_redirect_target); diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 6957971c2db2..cac17183589f 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -1933,6 +1933,7 @@ int dsa_slave_change_mtu(struct net_device *dev, int new_mtu) int new_master_mtu; int old_master_mtu; int mtu_limit; + int overhead; int cpu_mtu; int err; @@ -1961,9 +1962,10 @@ int dsa_slave_change_mtu(struct net_device *dev, int new_mtu) largest_mtu = slave_mtu; } - mtu_limit = min_t(int, master->max_mtu, dev->max_mtu); + overhead = dsa_tag_protocol_overhead(cpu_dp->tag_ops); + mtu_limit = min_t(int, master->max_mtu, dev->max_mtu + overhead); old_master_mtu = master->mtu; - new_master_mtu = largest_mtu + dsa_tag_protocol_overhead(cpu_dp->tag_ops); + new_master_mtu = largest_mtu + overhead; if (new_master_mtu > mtu_limit) return -ERANGE; @@ -1998,8 +2000,7 @@ int dsa_slave_change_mtu(struct net_device *dev, int new_mtu) out_port_failed: if (new_master_mtu != old_master_mtu) - dsa_port_mtu_change(cpu_dp, old_master_mtu - - dsa_tag_protocol_overhead(cpu_dp->tag_ops)); + dsa_port_mtu_change(cpu_dp, old_master_mtu - overhead); out_cpu_failed: if (new_master_mtu != old_master_mtu) dev_set_mtu(master, old_master_mtu); diff --git a/net/dsa/tag.c b/net/dsa/tag.c index b2fba1a003ce..5105a5ff58fa 100644 --- a/net/dsa/tag.c +++ b/net/dsa/tag.c @@ -114,7 +114,7 @@ static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev, skb = nskb; } - dev_sw_netstats_rx_add(skb->dev, skb->len); + dev_sw_netstats_rx_add(skb->dev, skb->len + ETH_HLEN); if (dsa_skb_defer_rx_timestamp(p, skb)) return 0; diff --git a/net/dsa/tag_brcm.c b/net/dsa/tag_brcm.c index 10239daa5745..cacdafb41200 100644 --- a/net/dsa/tag_brcm.c +++ b/net/dsa/tag_brcm.c @@ -7,6 +7,7 @@ #include <linux/dsa/brcm.h> #include <linux/etherdevice.h> +#include <linux/if_vlan.h> #include <linux/list.h> #include <linux/slab.h> @@ -252,6 +253,7 @@ static struct sk_buff *brcm_leg_tag_xmit(struct sk_buff *skb, static struct sk_buff *brcm_leg_tag_rcv(struct sk_buff *skb, struct net_device *dev) { + int len = BRCM_LEG_TAG_LEN; int source_port; u8 *brcm_tag; @@ -266,12 +268,16 @@ static struct sk_buff *brcm_leg_tag_rcv(struct sk_buff *skb, if (!skb->dev) return NULL; + /* VLAN tag is added by BCM63xx internal switch */ + if (netdev_uses_dsa(skb->dev)) + len += VLAN_HLEN; + /* Remove Broadcom tag and update checksum */ - skb_pull_rcsum(skb, BRCM_LEG_TAG_LEN); + skb_pull_rcsum(skb, len); dsa_default_offload_fwd_mark(skb); - dsa_strip_etype_header(skb, BRCM_LEG_TAG_LEN); + dsa_strip_etype_header(skb, len); return skb; } diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c index 00db74d96583..b77f1189d19d 100644 --- a/net/hsr/hsr_framereg.c +++ b/net/hsr/hsr_framereg.c @@ -415,7 +415,7 @@ void hsr_addr_subst_dest(struct hsr_node *node_src, struct sk_buff *skb, node_dst = find_node_by_addr_A(&port->hsr->node_db, eth_hdr(skb)->h_dest); if (!node_dst) { - if (net_ratelimit()) + if (port->hsr->prot_version != PRP_V1 && net_ratelimit()) netdev_err(skb->dev, "%s: Unknown node\n", __func__); return; } diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index b5736ef16ed2..390f4be7f7be 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -576,6 +576,9 @@ static int rtentry_to_fib_config(struct net *net, int cmd, struct rtentry *rt, cfg->fc_scope = RT_SCOPE_UNIVERSE; } + if (!cfg->fc_table) + cfg->fc_table = RT_TABLE_MAIN; + if (cmd == SIOCDELRT) return 0; diff --git a/net/ipv4/fou_nl.c b/net/ipv4/fou_nl.c index 5c14fe030eda..6c37c4f98cca 100644 --- a/net/ipv4/fou_nl.c +++ b/net/ipv4/fou_nl.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause +// SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) /* Do not edit directly, auto-generated from: */ /* Documentation/netlink/specs/fou.yaml */ /* YNL-GEN kernel source */ diff --git a/net/ipv4/fou_nl.h b/net/ipv4/fou_nl.h index 58b1e1ed4b3b..dbd0780a5d34 100644 --- a/net/ipv4/fou_nl.h +++ b/net/ipv4/fou_nl.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ +/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */ /* Do not edit directly, auto-generated from: */ /* Documentation/netlink/specs/fou.yaml */ /* YNL-GEN kernel header */ diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index e41fdc38ce19..6edae3886885 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -828,8 +828,14 @@ bool inet_bind2_bucket_match_addr_any(const struct inet_bind2_bucket *tb, const #if IS_ENABLED(CONFIG_IPV6) struct in6_addr addr_any = {}; - if (sk->sk_family != tb->family) + if (sk->sk_family != tb->family) { + if (sk->sk_family == AF_INET) + return net_eq(ib2_net(tb), net) && tb->port == port && + tb->l3mdev == l3mdev && + ipv6_addr_equal(&tb->v6_rcv_saddr, &addr_any); + return false; + } if (sk->sk_family == AF_INET6) return net_eq(ib2_net(tb), net) && tb->port == port && diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index ffff46cdcb58..e55a20264960 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -552,7 +552,7 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev) truncate = true; } - nhoff = skb_network_header(skb) - skb_mac_header(skb); + nhoff = skb_network_offset(skb); if (skb->protocol == htons(ETH_P_IP) && (ntohs(ip_hdr(skb)->tot_len) > skb->len - nhoff)) truncate = true; @@ -561,7 +561,7 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev) int thoff; if (skb_transport_header_was_set(skb)) - thoff = skb_transport_header(skb) - skb_mac_header(skb); + thoff = skb_transport_offset(skb); else thoff = nhoff + sizeof(struct ipv6hdr); if (ntohs(ipv6_hdr(skb)->payload_len) > skb->len - thoff) diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index de90b09dfe78..2541083d49ad 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -614,10 +614,10 @@ void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, } headroom += LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len; - if (headroom > dev->needed_headroom) - dev->needed_headroom = headroom; + if (headroom > READ_ONCE(dev->needed_headroom)) + WRITE_ONCE(dev->needed_headroom, headroom); - if (skb_cow_head(skb, dev->needed_headroom)) { + if (skb_cow_head(skb, READ_ONCE(dev->needed_headroom))) { ip_rt_put(rt); goto tx_dropped; } @@ -800,10 +800,10 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr) + rt->dst.header_len + ip_encap_hlen(&tunnel->encap); - if (max_headroom > dev->needed_headroom) - dev->needed_headroom = max_headroom; + if (max_headroom > READ_ONCE(dev->needed_headroom)) + WRITE_ONCE(dev->needed_headroom, max_headroom); - if (skb_cow_head(skb, dev->needed_headroom)) { + if (skb_cow_head(skb, READ_ONCE(dev->needed_headroom))) { ip_rt_put(rt); DEV_STATS_INC(dev, tx_dropped); kfree_skb(skb); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 71d01cf3c13e..ba839e441450 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -3605,7 +3605,7 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, th->window = htons(min(req->rsk_rcv_wnd, 65535U)); tcp_options_write(th, NULL, &opts); th->doff = (tcp_header_size >> 2); - __TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTSEGS); + TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTSEGS); #ifdef CONFIG_TCP_MD5SIG /* Okay, we have all we need - do the md5 hash if needed */ diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 89f5f0f3f5d6..a4ecfc9d2593 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -959,7 +959,7 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb, truncate = true; } - nhoff = skb_network_header(skb) - skb_mac_header(skb); + nhoff = skb_network_offset(skb); if (skb->protocol == htons(ETH_P_IP) && (ntohs(ip_hdr(skb)->tot_len) > skb->len - nhoff)) truncate = true; @@ -968,7 +968,7 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb, int thoff; if (skb_transport_header_was_set(skb)) - thoff = skb_transport_header(skb) - skb_mac_header(skb); + thoff = skb_transport_offset(skb); else thoff = nhoff + sizeof(struct ipv6hdr); if (ntohs(ipv6_hdr(skb)->payload_len) > skb->len - thoff) diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 47b6607a1370..5e80e517f071 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1240,8 +1240,8 @@ route_lookup: */ max_headroom = LL_RESERVED_SPACE(dst->dev) + sizeof(struct ipv6hdr) + dst->header_len + t->hlen; - if (max_headroom > dev->needed_headroom) - dev->needed_headroom = max_headroom; + if (max_headroom > READ_ONCE(dev->needed_headroom)) + WRITE_ONCE(dev->needed_headroom, max_headroom); err = ip6_tnl_encap(skb, t, &proto, fl6); if (err) diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c index eb0295d90039..fc3fddeb6f36 100644 --- a/net/iucv/iucv.c +++ b/net/iucv/iucv.c @@ -83,7 +83,7 @@ struct iucv_irq_data { u16 ippathid; u8 ipflags1; u8 iptype; - u32 res2[8]; + u32 res2[9]; }; struct iucv_irq_list { diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 56628b52d100..5c8dea49626c 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -997,9 +997,13 @@ out: return ret; } +static struct lock_class_key mptcp_slock_keys[2]; +static struct lock_class_key mptcp_keys[2]; + static int mptcp_pm_nl_create_listen_socket(struct sock *sk, struct mptcp_pm_addr_entry *entry) { + bool is_ipv6 = sk->sk_family == AF_INET6; int addrlen = sizeof(struct sockaddr_in); struct sockaddr_storage addr; struct socket *ssock; @@ -1016,6 +1020,18 @@ static int mptcp_pm_nl_create_listen_socket(struct sock *sk, if (!newsk) return -EINVAL; + /* The subflow socket lock is acquired in a nested to the msk one + * in several places, even by the TCP stack, and this msk is a kernel + * socket: lockdep complains. Instead of propagating the _nested + * modifiers in several places, re-init the lock class for the msk + * socket to an mptcp specific one. + */ + sock_lock_init_class_and_name(newsk, + is_ipv6 ? "mlock-AF_INET6" : "mlock-AF_INET", + &mptcp_slock_keys[is_ipv6], + is_ipv6 ? "msk_lock-AF_INET6" : "msk_lock-AF_INET", + &mptcp_keys[is_ipv6]); + lock_sock(newsk); ssock = __mptcp_nmpc_socket(mptcp_sk(newsk)); release_sock(newsk); diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 3ad9c46202fc..60b23b2716c4 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -825,7 +825,6 @@ static bool __mptcp_finish_join(struct mptcp_sock *msk, struct sock *ssk) if (sk->sk_socket && !ssk->sk_socket) mptcp_sock_graft(ssk, sk->sk_socket); - mptcp_propagate_sndbuf((struct sock *)msk, ssk); mptcp_sockopt_sync_locked(msk, ssk); return true; } @@ -2343,7 +2342,6 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, goto out; } - sock_orphan(ssk); subflow->disposable = 1; /* if ssk hit tcp_done(), tcp_cleanup_ulp() cleared the related ops @@ -2351,15 +2349,25 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, * reference owned by msk; */ if (!inet_csk(ssk)->icsk_ulp_ops) { + WARN_ON_ONCE(!sock_flag(ssk, SOCK_DEAD)); kfree_rcu(subflow, rcu); + } else if (msk->in_accept_queue && msk->first == ssk) { + /* if the first subflow moved to a close state, e.g. due to + * incoming reset and we reach here before inet_child_forget() + * the TCP stack could later try to close it via + * inet_csk_listen_stop(), or deliver it to the user space via + * accept(). + * We can't delete the subflow - or risk a double free - nor let + * the msk survive - or will be leaked in the non accept scenario: + * fallback and let TCP cope with the subflow cleanup. + */ + WARN_ON_ONCE(sock_flag(ssk, SOCK_DEAD)); + mptcp_subflow_drop_ctx(ssk); } else { /* otherwise tcp will dispose of the ssk and subflow ctx */ - if (ssk->sk_state == TCP_LISTEN) { - tcp_set_state(ssk, TCP_CLOSE); - mptcp_subflow_queue_clean(sk, ssk); - inet_csk_listen_stop(ssk); + if (ssk->sk_state == TCP_LISTEN) mptcp_event_pm_listener(ssk, MPTCP_EVENT_LISTENER_CLOSED); - } + __tcp_close(ssk, 0); /* close acquired an extra ref */ @@ -2399,9 +2407,10 @@ static unsigned int mptcp_sync_mss(struct sock *sk, u32 pmtu) return 0; } -static void __mptcp_close_subflow(struct mptcp_sock *msk) +static void __mptcp_close_subflow(struct sock *sk) { struct mptcp_subflow_context *subflow, *tmp; + struct mptcp_sock *msk = mptcp_sk(sk); might_sleep(); @@ -2415,7 +2424,15 @@ static void __mptcp_close_subflow(struct mptcp_sock *msk) if (!skb_queue_empty_lockless(&ssk->sk_receive_queue)) continue; - mptcp_close_ssk((struct sock *)msk, ssk, subflow); + mptcp_close_ssk(sk, ssk, subflow); + } + + /* if the MPC subflow has been closed before the msk is accepted, + * msk will never be accept-ed, close it now + */ + if (!msk->first && msk->in_accept_queue) { + sock_set_flag(sk, SOCK_DEAD); + inet_sk_state_store(sk, TCP_CLOSE); } } @@ -2624,6 +2641,9 @@ static void mptcp_worker(struct work_struct *work) __mptcp_check_send_data_fin(sk); mptcp_check_data_fin(sk); + if (test_and_clear_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags)) + __mptcp_close_subflow(sk); + /* There is no point in keeping around an orphaned sk timedout or * closed, but we need the msk around to reply to incoming DATA_FIN, * even if it is orphaned and in FIN_WAIT2 state @@ -2639,9 +2659,6 @@ static void mptcp_worker(struct work_struct *work) } } - if (test_and_clear_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags)) - __mptcp_close_subflow(msk); - if (test_and_clear_bit(MPTCP_WORK_RTX, &msk->flags)) __mptcp_retrans(sk); @@ -3079,6 +3096,7 @@ struct sock *mptcp_sk_clone(const struct sock *sk, msk->local_key = subflow_req->local_key; msk->token = subflow_req->token; msk->subflow = NULL; + msk->in_accept_queue = 1; WRITE_ONCE(msk->fully_established, false); if (mp_opt->suboptions & OPTION_MPTCP_CSUMREQD) WRITE_ONCE(msk->csum_enabled, true); @@ -3096,8 +3114,7 @@ struct sock *mptcp_sk_clone(const struct sock *sk, security_inet_csk_clone(nsk, req); bh_unlock_sock(nsk); - /* keep a single reference */ - __sock_put(nsk); + /* note: the newly allocated socket refcount is 2 now */ return nsk; } @@ -3153,8 +3170,6 @@ static struct sock *mptcp_accept(struct sock *sk, int flags, int *err, goto out; } - /* acquire the 2nd reference for the owning socket */ - sock_hold(new_mptcp_sock); newsk = new_mptcp_sock; MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPCAPABLEPASSIVEACK); } else { @@ -3705,25 +3720,10 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock, struct sock *newsk = newsock->sk; set_bit(SOCK_CUSTOM_SOCKOPT, &newsock->flags); + msk->in_accept_queue = 0; lock_sock(newsk); - /* PM/worker can now acquire the first subflow socket - * lock without racing with listener queue cleanup, - * we can notify it, if needed. - * - * Even if remote has reset the initial subflow by now - * the refcnt is still at least one. - */ - subflow = mptcp_subflow_ctx(msk->first); - list_add(&subflow->node, &msk->conn_list); - sock_hold(msk->first); - if (mptcp_is_fully_established(newsk)) - mptcp_pm_fully_established(msk, msk->first, GFP_KERNEL); - - mptcp_rcv_space_init(msk, msk->first); - mptcp_propagate_sndbuf(newsk, msk->first); - /* set ssk->sk_socket of accept()ed flows to mptcp socket. * This is needed so NOSPACE flag can be set from tcp stack. */ diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 61fd8eabfca2..339a6f072989 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -295,7 +295,8 @@ struct mptcp_sock { u8 recvmsg_inq:1, cork:1, nodelay:1, - fastopening:1; + fastopening:1, + in_accept_queue:1; int connect_flags; struct work_struct work; struct sk_buff *ooo_last_skb; @@ -628,7 +629,6 @@ void mptcp_close_ssk(struct sock *sk, struct sock *ssk, struct mptcp_subflow_context *subflow); void __mptcp_subflow_send_ack(struct sock *ssk); void mptcp_subflow_reset(struct sock *ssk); -void mptcp_subflow_queue_clean(struct sock *sk, struct sock *ssk); void mptcp_sock_graft(struct sock *sk, struct socket *parent); struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk); bool __mptcp_close(struct sock *sk, long timeout); @@ -666,6 +666,8 @@ void mptcp_subflow_set_active(struct mptcp_subflow_context *subflow); bool mptcp_subflow_active(struct mptcp_subflow_context *subflow); +void mptcp_subflow_drop_ctx(struct sock *ssk); + static inline void mptcp_subflow_tcp_fallback(struct sock *sk, struct mptcp_subflow_context *ctx) { diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 4ae1a7304cf0..a0041360ee9d 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -397,10 +397,15 @@ void mptcp_subflow_reset(struct sock *ssk) struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); struct sock *sk = subflow->conn; + /* mptcp_mp_fail_no_response() can reach here on an already closed + * socket + */ + if (ssk->sk_state == TCP_CLOSE) + return; + /* must hold: tcp_done() could drop last reference on parent */ sock_hold(sk); - tcp_set_state(ssk, TCP_CLOSE); tcp_send_active_reset(ssk, GFP_ATOMIC); tcp_done(ssk); if (!test_and_set_bit(MPTCP_WORK_CLOSE_SUBFLOW, &mptcp_sk(sk)->flags) && @@ -622,7 +627,7 @@ static struct request_sock_ops mptcp_subflow_v6_request_sock_ops __ro_after_init static struct tcp_request_sock_ops subflow_request_sock_ipv6_ops __ro_after_init; static struct inet_connection_sock_af_ops subflow_v6_specific __ro_after_init; static struct inet_connection_sock_af_ops subflow_v6m_specific __ro_after_init; -static struct proto tcpv6_prot_override; +static struct proto tcpv6_prot_override __ro_after_init; static int subflow_v6_conn_request(struct sock *sk, struct sk_buff *skb) { @@ -693,9 +698,10 @@ static bool subflow_hmac_valid(const struct request_sock *req, static void mptcp_force_close(struct sock *sk) { - /* the msk is not yet exposed to user-space */ + /* the msk is not yet exposed to user-space, and refcount is 2 */ inet_sk_state_store(sk, TCP_CLOSE); sk_common_release(sk); + sock_put(sk); } static void subflow_ulp_fallback(struct sock *sk, @@ -711,7 +717,7 @@ static void subflow_ulp_fallback(struct sock *sk, mptcp_subflow_ops_undo_override(sk); } -static void subflow_drop_ctx(struct sock *ssk) +void mptcp_subflow_drop_ctx(struct sock *ssk) { struct mptcp_subflow_context *ctx = mptcp_subflow_ctx(ssk); @@ -750,6 +756,7 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk, struct mptcp_options_received mp_opt; bool fallback, fallback_is_fatal; struct sock *new_msk = NULL; + struct mptcp_sock *owner; struct sock *child; pr_debug("listener=%p, req=%p, conn=%p", listener, req, listener->conn); @@ -816,7 +823,7 @@ create_child: if (new_msk) mptcp_copy_inaddrs(new_msk, child); - subflow_drop_ctx(child); + mptcp_subflow_drop_ctx(child); goto out; } @@ -824,6 +831,8 @@ create_child: ctx->setsockopt_seq = listener->setsockopt_seq; if (ctx->mp_capable) { + owner = mptcp_sk(new_msk); + /* this can't race with mptcp_close(), as the msk is * not yet exposted to user-space */ @@ -832,14 +841,14 @@ create_child: /* record the newly created socket as the first msk * subflow, but don't link it yet into conn_list */ - WRITE_ONCE(mptcp_sk(new_msk)->first, child); + WRITE_ONCE(owner->first, child); /* new mpc subflow takes ownership of the newly * created mptcp socket */ mptcp_sk(new_msk)->setsockopt_seq = ctx->setsockopt_seq; - mptcp_pm_new_connection(mptcp_sk(new_msk), child, 1); - mptcp_token_accept(subflow_req, mptcp_sk(new_msk)); + mptcp_pm_new_connection(owner, child, 1); + mptcp_token_accept(subflow_req, owner); ctx->conn = new_msk; new_msk = NULL; @@ -847,15 +856,21 @@ create_child: * uses the correct data */ mptcp_copy_inaddrs(ctx->conn, child); + mptcp_propagate_sndbuf(ctx->conn, child); + + mptcp_rcv_space_init(owner, child); + list_add(&ctx->node, &owner->conn_list); + sock_hold(child); /* with OoO packets we can reach here without ingress * mpc option */ - if (mp_opt.suboptions & OPTION_MPTCP_MPC_ACK) + if (mp_opt.suboptions & OPTION_MPTCP_MPC_ACK) { mptcp_subflow_fully_established(ctx, &mp_opt); + mptcp_pm_fully_established(owner, child, GFP_ATOMIC); + ctx->pm_notified = 1; + } } else if (ctx->mp_join) { - struct mptcp_sock *owner; - owner = subflow_req->msk; if (!owner) { subflow_add_reset_reason(skb, MPTCP_RST_EPROHIBIT); @@ -899,7 +914,7 @@ out: return child; dispose_child: - subflow_drop_ctx(child); + mptcp_subflow_drop_ctx(child); tcp_rsk(req)->drop_req = true; inet_csk_prepare_for_destroy_sock(child); tcp_done(child); @@ -910,7 +925,7 @@ dispose_child: } static struct inet_connection_sock_af_ops subflow_specific __ro_after_init; -static struct proto tcp_prot_override; +static struct proto tcp_prot_override __ro_after_init; enum mapping_status { MAPPING_OK, @@ -1432,6 +1447,13 @@ static void subflow_error_report(struct sock *ssk) { struct sock *sk = mptcp_subflow_ctx(ssk)->conn; + /* bail early if this is a no-op, so that we avoid introducing a + * problematic lockdep dependency between TCP accept queue lock + * and msk socket spinlock + */ + if (!sk->sk_socket) + return; + mptcp_data_lock(sk); if (!sock_owned_by_user(sk)) __mptcp_error_report(sk); @@ -1803,79 +1825,6 @@ static void subflow_state_change(struct sock *sk) } } -void mptcp_subflow_queue_clean(struct sock *listener_sk, struct sock *listener_ssk) -{ - struct request_sock_queue *queue = &inet_csk(listener_ssk)->icsk_accept_queue; - struct mptcp_sock *msk, *next, *head = NULL; - struct request_sock *req; - - /* build a list of all unaccepted mptcp sockets */ - spin_lock_bh(&queue->rskq_lock); - for (req = queue->rskq_accept_head; req; req = req->dl_next) { - struct mptcp_subflow_context *subflow; - struct sock *ssk = req->sk; - struct mptcp_sock *msk; - - if (!sk_is_mptcp(ssk)) - continue; - - subflow = mptcp_subflow_ctx(ssk); - if (!subflow || !subflow->conn) - continue; - - /* skip if already in list */ - msk = mptcp_sk(subflow->conn); - if (msk->dl_next || msk == head) - continue; - - msk->dl_next = head; - head = msk; - } - spin_unlock_bh(&queue->rskq_lock); - if (!head) - return; - - /* can't acquire the msk socket lock under the subflow one, - * or will cause ABBA deadlock - */ - release_sock(listener_ssk); - - for (msk = head; msk; msk = next) { - struct sock *sk = (struct sock *)msk; - bool do_cancel_work; - - sock_hold(sk); - lock_sock_nested(sk, SINGLE_DEPTH_NESTING); - next = msk->dl_next; - msk->first = NULL; - msk->dl_next = NULL; - - do_cancel_work = __mptcp_close(sk, 0); - release_sock(sk); - if (do_cancel_work) { - /* lockdep will report a false positive ABBA deadlock - * between cancel_work_sync and the listener socket. - * The involved locks belong to different sockets WRT - * the existing AB chain. - * Using a per socket key is problematic as key - * deregistration requires process context and must be - * performed at socket disposal time, in atomic - * context. - * Just tell lockdep to consider the listener socket - * released here. - */ - mutex_release(&listener_sk->sk_lock.dep_map, _RET_IP_); - mptcp_cancel_work(sk); - mutex_acquire(&listener_sk->sk_lock.dep_map, - SINGLE_DEPTH_NESTING, 0, _RET_IP_); - } - sock_put(sk); - } - - /* we are still under the listener msk socket lock */ - lock_sock_nested(listener_ssk, SINGLE_DEPTH_NESTING); -} - static int subflow_ulp_init(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); @@ -1932,6 +1881,13 @@ static void subflow_ulp_release(struct sock *ssk) * when the subflow is still unaccepted */ release = ctx->disposable || list_empty(&ctx->node); + + /* inet_child_forget() does not call sk_state_change(), + * explicitly trigger the socket close machinery + */ + if (!release && !test_and_set_bit(MPTCP_WORK_CLOSE_SUBFLOW, + &mptcp_sk(sk)->flags)) + mptcp_schedule_work(sk); sock_put(sk); } diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c index 80713febfac6..d9da942ad53d 100644 --- a/net/ncsi/ncsi-manage.c +++ b/net/ncsi/ncsi-manage.c @@ -1803,8 +1803,8 @@ struct ncsi_dev *ncsi_register_dev(struct net_device *dev, pdev = to_platform_device(dev->dev.parent); if (pdev) { np = pdev->dev.of_node; - if (np && (of_get_property(np, "mellanox,multi-host", NULL) || - of_get_property(np, "mlx,multi-host", NULL))) + if (np && (of_property_read_bool(np, "mellanox,multi-host") || + of_property_read_bool(np, "mlx,multi-host"))) ndp->mlx_multi_host = true; } diff --git a/net/netfilter/nft_masq.c b/net/netfilter/nft_masq.c index e55e455275c4..9544c2f16998 100644 --- a/net/netfilter/nft_masq.c +++ b/net/netfilter/nft_masq.c @@ -43,7 +43,7 @@ static int nft_masq_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]) { - u32 plen = sizeof_field(struct nf_nat_range, min_addr.all); + u32 plen = sizeof_field(struct nf_nat_range, min_proto.all); struct nft_masq *priv = nft_expr_priv(expr); int err; diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c index 047999150390..5c29915ab028 100644 --- a/net/netfilter/nft_nat.c +++ b/net/netfilter/nft_nat.c @@ -226,7 +226,7 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr, priv->flags |= NF_NAT_RANGE_MAP_IPS; } - plen = sizeof_field(struct nf_nat_range, min_addr.all); + plen = sizeof_field(struct nf_nat_range, min_proto.all); if (tb[NFTA_NAT_REG_PROTO_MIN]) { err = nft_parse_register_load(tb[NFTA_NAT_REG_PROTO_MIN], &priv->sreg_proto_min, plen); diff --git a/net/netfilter/nft_redir.c b/net/netfilter/nft_redir.c index 5f7739987559..67cec56bc84a 100644 --- a/net/netfilter/nft_redir.c +++ b/net/netfilter/nft_redir.c @@ -48,7 +48,7 @@ static int nft_redir_init(const struct nft_ctx *ctx, unsigned int plen; int err; - plen = sizeof_field(struct nf_nat_range, min_addr.all); + plen = sizeof_field(struct nf_nat_range, min_proto.all); if (tb[NFTA_REDIR_REG_PROTO_MIN]) { err = nft_parse_register_load(tb[NFTA_REDIR_REG_PROTO_MIN], &priv->sreg_proto_min, plen); @@ -236,7 +236,7 @@ static struct nft_expr_type nft_redir_inet_type __read_mostly = { .name = "redir", .ops = &nft_redir_inet_ops, .policy = nft_redir_policy, - .maxattr = NFTA_MASQ_MAX, + .maxattr = NFTA_REDIR_MAX, .owner = THIS_MODULE, }; diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 34c508675041..296fc1afedd8 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -1589,6 +1589,10 @@ static int tca_get_fill(struct sk_buff *skb, struct tc_action *actions[], t->tca__pad1 = 0; t->tca__pad2 = 0; + if (extack && extack->_msg && + nla_put_string(skb, TCA_ROOT_EXT_WARN_MSG, extack->_msg)) + goto out_nlmsg_trim; + nest = nla_nest_start_noflag(skb, TCA_ACT_TAB); if (!nest) goto out_nlmsg_trim; @@ -1596,10 +1600,6 @@ static int tca_get_fill(struct sk_buff *skb, struct tc_action *actions[], if (tcf_action_dump(skb, actions, bind, ref, false) < 0) goto out_nlmsg_trim; - if (extack && extack->_msg && - nla_put_string(skb, TCA_EXT_WARN_MSG, extack->_msg)) - goto out_nlmsg_trim; - nla_nest_end(skb, nest); nlh->nlmsg_len = skb_tail_pointer(skb) - b; diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index ff6dd86bdc9f..c6b4a62276f6 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -3501,6 +3501,7 @@ out_pnet: out_nl: smc_nl_exit(); out_ism: + smc_clc_exit(); smc_ism_exit(); out_pernet_subsys_stat: unregister_pernet_subsys(&smc_net_stat_ops); diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c index 53f63bfbaf5f..89105e95b452 100644 --- a/net/smc/smc_cdc.c +++ b/net/smc/smc_cdc.c @@ -114,6 +114,9 @@ int smc_cdc_msg_send(struct smc_connection *conn, union smc_host_cursor cfed; int rc; + if (unlikely(!READ_ONCE(conn->sndbuf_desc))) + return -ENOBUFS; + smc_cdc_add_pending_send(conn, pend); conn->tx_cdc_seq++; diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index d52060b2680c..454356771cda 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -1464,7 +1464,7 @@ static void __smc_lgr_terminate(struct smc_link_group *lgr, bool soft) if (lgr->terminating) return; /* lgr already terminating */ /* cancel free_work sync, will terminate when lgr->freeing is set */ - cancel_delayed_work_sync(&lgr->free_work); + cancel_delayed_work(&lgr->free_work); lgr->terminating = 1; /* kill remaining link group connections */ diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 1fd3f5e57285..fea7ce8fba14 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -798,6 +798,7 @@ svc_start_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) static int svc_stop_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) { + struct svc_rqst *rqstp; struct task_struct *task; unsigned int state = serv->sv_nrthreads-1; @@ -806,7 +807,10 @@ svc_stop_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) task = choose_victim(serv, pool, &state); if (task == NULL) break; - kthread_stop(task); + rqstp = kthread_data(task); + /* Did we lose a race to svo_function threadfn? */ + if (kthread_stop(task) == -EINTR) + svc_exit_thread(rqstp); nrservs++; } while (nrservs < 0); return 0; diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index a1581c77cf84..6564192e7f20 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -241,21 +241,18 @@ static int virtio_transport_send_pkt_info(struct vsock_sock *vsk, } static bool virtio_transport_inc_rx_pkt(struct virtio_vsock_sock *vvs, - struct sk_buff *skb) + u32 len) { - if (vvs->rx_bytes + skb->len > vvs->buf_alloc) + if (vvs->rx_bytes + len > vvs->buf_alloc) return false; - vvs->rx_bytes += skb->len; + vvs->rx_bytes += len; return true; } static void virtio_transport_dec_rx_pkt(struct virtio_vsock_sock *vvs, - struct sk_buff *skb) + u32 len) { - int len; - - len = skb_headroom(skb) - sizeof(struct virtio_vsock_hdr) - skb->len; vvs->rx_bytes -= len; vvs->fwd_cnt += len; } @@ -367,7 +364,7 @@ virtio_transport_stream_do_dequeue(struct vsock_sock *vsk, spin_lock_bh(&vvs->rx_lock); while (total < len && !skb_queue_empty(&vvs->rx_queue)) { - skb = __skb_dequeue(&vvs->rx_queue); + skb = skb_peek(&vvs->rx_queue); bytes = len - total; if (bytes > skb->len) @@ -388,10 +385,11 @@ virtio_transport_stream_do_dequeue(struct vsock_sock *vsk, skb_pull(skb, bytes); if (skb->len == 0) { - virtio_transport_dec_rx_pkt(vvs, skb); + u32 pkt_len = le32_to_cpu(virtio_vsock_hdr(skb)->len); + + virtio_transport_dec_rx_pkt(vvs, pkt_len); + __skb_unlink(skb, &vvs->rx_queue); consume_skb(skb); - } else { - __skb_queue_head(&vvs->rx_queue, skb); } } @@ -437,17 +435,17 @@ static int virtio_transport_seqpacket_do_dequeue(struct vsock_sock *vsk, while (!msg_ready) { struct virtio_vsock_hdr *hdr; + size_t pkt_len; skb = __skb_dequeue(&vvs->rx_queue); if (!skb) break; hdr = virtio_vsock_hdr(skb); + pkt_len = (size_t)le32_to_cpu(hdr->len); if (dequeued_len >= 0) { - size_t pkt_len; size_t bytes_to_copy; - pkt_len = (size_t)le32_to_cpu(hdr->len); bytes_to_copy = min(user_buf_len, pkt_len); if (bytes_to_copy) { @@ -466,7 +464,6 @@ static int virtio_transport_seqpacket_do_dequeue(struct vsock_sock *vsk, dequeued_len = err; } else { user_buf_len -= bytes_to_copy; - skb_pull(skb, bytes_to_copy); } spin_lock_bh(&vvs->rx_lock); @@ -484,7 +481,7 @@ static int virtio_transport_seqpacket_do_dequeue(struct vsock_sock *vsk, msg->msg_flags |= MSG_EOR; } - virtio_transport_dec_rx_pkt(vvs, skb); + virtio_transport_dec_rx_pkt(vvs, pkt_len); kfree_skb(skb); } @@ -1040,7 +1037,7 @@ virtio_transport_recv_enqueue(struct vsock_sock *vsk, spin_lock_bh(&vvs->rx_lock); - can_enqueue = virtio_transport_inc_rx_pkt(vvs, skb); + can_enqueue = virtio_transport_inc_rx_pkt(vvs, len); if (!can_enqueue) { free_pkt = true; goto out; diff --git a/net/wireless/sme.c b/net/wireless/sme.c index 28ce13840a88..7bdeb8eea92d 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -1500,8 +1500,6 @@ int cfg80211_connect(struct cfg80211_registered_device *rdev, connect->key = NULL; connect->key_len = 0; connect->key_idx = 0; - connect->crypto.cipher_group = 0; - connect->crypto.n_ciphers_pairwise = 0; } wdev->connect_keys = connkeys; diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 2ab3e09e2227..50baf50dc513 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -2815,11 +2815,6 @@ int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload, goto error; } - if (!(inner_mode->flags & XFRM_MODE_FLAG_TUNNEL)) { - NL_SET_ERR_MSG(extack, "Only tunnel modes can accommodate an AF_UNSPEC selector"); - goto error; - } - x->inner_mode = *inner_mode; if (x->props.family == AF_INET) diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index cf5172d4ce68..103af2b3e986 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1012,7 +1012,9 @@ static int copy_to_user_aead(struct xfrm_algo_aead *aead, struct sk_buff *skb) return -EMSGSIZE; ap = nla_data(nla); - memcpy(ap, aead, sizeof(*aead)); + strscpy_pad(ap->alg_name, aead->alg_name, sizeof(ap->alg_name)); + ap->alg_key_len = aead->alg_key_len; + ap->alg_icv_len = aead->alg_icv_len; if (redact_secret && aead->alg_key_len) memset(ap->alg_key, 0, (aead->alg_key_len + 7) / 8); @@ -1032,7 +1034,8 @@ static int copy_to_user_ealg(struct xfrm_algo *ealg, struct sk_buff *skb) return -EMSGSIZE; ap = nla_data(nla); - memcpy(ap, ealg, sizeof(*ealg)); + strscpy_pad(ap->alg_name, ealg->alg_name, sizeof(ap->alg_name)); + ap->alg_key_len = ealg->alg_key_len; if (redact_secret && ealg->alg_key_len) memset(ap->alg_key, 0, (ealg->alg_key_len + 7) / 8); @@ -1043,6 +1046,40 @@ static int copy_to_user_ealg(struct xfrm_algo *ealg, struct sk_buff *skb) return 0; } +static int copy_to_user_calg(struct xfrm_algo *calg, struct sk_buff *skb) +{ + struct nlattr *nla = nla_reserve(skb, XFRMA_ALG_COMP, sizeof(*calg)); + struct xfrm_algo *ap; + + if (!nla) + return -EMSGSIZE; + + ap = nla_data(nla); + strscpy_pad(ap->alg_name, calg->alg_name, sizeof(ap->alg_name)); + ap->alg_key_len = 0; + + return 0; +} + +static int copy_to_user_encap(struct xfrm_encap_tmpl *ep, struct sk_buff *skb) +{ + struct nlattr *nla = nla_reserve(skb, XFRMA_ENCAP, sizeof(*ep)); + struct xfrm_encap_tmpl *uep; + + if (!nla) + return -EMSGSIZE; + + uep = nla_data(nla); + memset(uep, 0, sizeof(*uep)); + + uep->encap_type = ep->encap_type; + uep->encap_sport = ep->encap_sport; + uep->encap_dport = ep->encap_dport; + uep->encap_oa = ep->encap_oa; + + return 0; +} + static int xfrm_smark_put(struct sk_buff *skb, struct xfrm_mark *m) { int ret = 0; @@ -1098,12 +1135,12 @@ static int copy_to_user_state_extra(struct xfrm_state *x, goto out; } if (x->calg) { - ret = nla_put(skb, XFRMA_ALG_COMP, sizeof(*(x->calg)), x->calg); + ret = copy_to_user_calg(x->calg, skb); if (ret) goto out; } if (x->encap) { - ret = nla_put(skb, XFRMA_ENCAP, sizeof(*x->encap), x->encap); + ret = copy_to_user_encap(x->encap, skb); if (ret) goto out; } |