From d67c5649c1541dc93f202eeffc6f49220a4ed71d Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Wed, 31 Jul 2024 13:05:53 +0200 Subject: mptcp: fully established after ADD_ADDR echo on MPJ Before this patch, receiving an ADD_ADDR echo on the just connected MP_JOIN subflow -- initiator side, after the MP_JOIN 3WHS -- was resulting in an MP_RESET. That's because only ACKs with a DSS or ADD_ADDRs without the echo bit were allowed. Not allowing the ADD_ADDR echo after an MP_CAPABLE 3WHS makes sense, as we are not supposed to send an ADD_ADDR before because it requires to be in full established mode first. For the MP_JOIN 3WHS, that's different: the ADD_ADDR can be sent on a previous subflow, and the ADD_ADDR echo can be received on the recently created one. The other peer will already be in fully established, so it is allowed to send that. We can then relax the conditions here to accept the ADD_ADDR echo for MPJ subflows. Fixes: 67b12f792d5e ("mptcp: full fully established support after ADD_ADDR") Cc: stable@vger.kernel.org Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20240731-upstream-net-20240731-mptcp-endp-subflow-signal-v1-1-c8a9b036493b@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/options.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/mptcp/options.c b/net/mptcp/options.c index 8a68382a4fe9..ac2f1a54cc43 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -958,7 +958,8 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk, if (subflow->remote_key_valid && (((mp_opt->suboptions & OPTION_MPTCP_DSS) && mp_opt->use_ack) || - ((mp_opt->suboptions & OPTION_MPTCP_ADD_ADDR) && !mp_opt->echo))) { + ((mp_opt->suboptions & OPTION_MPTCP_ADD_ADDR) && + (!mp_opt->echo || subflow->mp_join)))) { /* subflows are fully established as soon as we get any * additional ack, including ADD_ADDR. */ -- cgit v1.2.3 From 8af1f11865f259c882cce71d32f85ee9004e2660 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Wed, 31 Jul 2024 13:05:54 +0200 Subject: mptcp: pm: deny endp with signal + subflow + port As mentioned in the 'Fixes' commit, the port flag is only supported by the 'signal' flag, and not by the 'subflow' one. Then if both the 'signal' and 'subflow' flags are set, the problem is the same: the feature cannot work with the 'subflow' flag. Technically, if both the 'signal' and 'subflow' flags are set, it will be possible to create the listening socket, but not to establish a subflow using this source port. So better to explicitly deny it, not to create some confusions because the expected behaviour is not possible. Fixes: 09f12c3ab7a5 ("mptcp: allow to use port and non-signal in set_flags") Cc: stable@vger.kernel.org Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20240731-upstream-net-20240731-mptcp-endp-subflow-signal-v1-2-c8a9b036493b@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/pm_netlink.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 37954a0b087d..c921d07e5940 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -1328,8 +1328,8 @@ int mptcp_pm_nl_add_addr_doit(struct sk_buff *skb, struct genl_info *info) if (ret < 0) return ret; - if (addr.addr.port && !(addr.flags & MPTCP_PM_ADDR_FLAG_SIGNAL)) { - GENL_SET_ERR_MSG(info, "flags must have signal when using port"); + if (addr.addr.port && !address_use_port(&addr)) { + GENL_SET_ERR_MSG(info, "flags must have signal and not subflow when using port"); return -EINVAL; } -- cgit v1.2.3 From c95eb32ced823a00be62202b43966b07b2f20b7f Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Wed, 31 Jul 2024 13:05:55 +0200 Subject: mptcp: pm: reduce indentation blocks That will simplify the following commits. No functional changes intended. Suggested-by: Paolo Abeni Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20240731-upstream-net-20240731-mptcp-endp-subflow-signal-v1-3-c8a9b036493b@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/pm_netlink.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index c921d07e5940..780f4cca165c 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -567,16 +567,19 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk) if (msk->pm.addr_signal & BIT(MPTCP_ADD_ADDR_SIGNAL)) return; - if (local) { - if (mptcp_pm_alloc_anno_list(msk, &local->addr)) { - __clear_bit(local->addr.id, msk->pm.id_avail_bitmap); - msk->pm.add_addr_signaled++; - mptcp_pm_announce_addr(msk, &local->addr, false); - mptcp_pm_nl_addr_send_ack(msk); - } - } + if (!local) + goto subflow; + + if (!mptcp_pm_alloc_anno_list(msk, &local->addr)) + goto subflow; + + __clear_bit(local->addr.id, msk->pm.id_avail_bitmap); + msk->pm.add_addr_signaled++; + mptcp_pm_announce_addr(msk, &local->addr, false); + mptcp_pm_nl_addr_send_ack(msk); } +subflow: /* check if should create a new subflow */ while (msk->pm.local_addr_used < local_addr_max && msk->pm.subflows < subflows_max) { -- cgit v1.2.3 From cd7c957f936f8cb80d03e5152f4013aae65bd986 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Wed, 31 Jul 2024 13:05:56 +0200 Subject: mptcp: pm: don't try to create sf if alloc failed It sounds better to avoid wasting cycles and / or put extreme memory pressure on the system by trying to create new subflows if it was not possible to add a new item in the announce list. While at it, a warning is now printed if the entry was already in the list as it should not happen with the in-kernel path-manager. With this PM, mptcp_pm_alloc_anno_list() should only fail in case of memory pressure. Fixes: b6c08380860b ("mptcp: remove addr and subflow in PM netlink") Cc: stable@vger.kernel.org Suggested-by: Paolo Abeni Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20240731-upstream-net-20240731-mptcp-endp-subflow-signal-v1-4-c8a9b036493b@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/pm_netlink.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 780f4cca165c..2be7af377cda 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -348,7 +348,7 @@ bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk, add_entry = mptcp_lookup_anno_list_by_saddr(msk, addr); if (add_entry) { - if (mptcp_pm_is_kernel(msk)) + if (WARN_ON_ONCE(mptcp_pm_is_kernel(msk))) return false; sk_reset_timer(sk, &add_entry->add_timer, @@ -555,8 +555,6 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk) /* check first for announce */ if (msk->pm.add_addr_signaled < add_addr_signal_max) { - local = select_signal_address(pernet, msk); - /* due to racing events on both ends we can reach here while * previous add address is still running: if we invoke now * mptcp_pm_announce_addr(), that will fail and the @@ -567,11 +565,15 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk) if (msk->pm.addr_signal & BIT(MPTCP_ADD_ADDR_SIGNAL)) return; + local = select_signal_address(pernet, msk); if (!local) goto subflow; + /* If the alloc fails, we are on memory pressure, not worth + * continuing, and trying to create subflows. + */ if (!mptcp_pm_alloc_anno_list(msk, &local->addr)) - goto subflow; + return; __clear_bit(local->addr.id, msk->pm.id_avail_bitmap); msk->pm.add_addr_signaled++; -- cgit v1.2.3 From 85df533a787bf07bf4367ce2a02b822ff1fba1a3 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Wed, 31 Jul 2024 13:05:57 +0200 Subject: mptcp: pm: do not ignore 'subflow' if 'signal' flag is also set Up to the 'Fixes' commit, having an endpoint with both the 'signal' and 'subflow' flags, resulted in the creation of a subflow and an address announcement using the address linked to this endpoint. After this commit, only the address announcement was done, ignoring the 'subflow' flag. That's because the same bitmap is used for the two flags. It is OK to keep this single bitmap, the already selected local endpoint simply have to be re-used, but not via select_local_address() not to look at the just modified bitmap. Note that it is unusual to set the two flags together: creating a new subflow using a new local address will implicitly advertise it to the other peer. So in theory, no need to advertise it explicitly as well. Maybe there are use-cases -- the subflow might not reach the other peer that way, we can ask the other peer to try initiating the new subflow without delay -- or very likely the user is confused, and put both flags "just to be sure at least the right one is set". Still, if it is allowed, the kernel should do what has been asked: using this endpoint to announce the address and to create a new subflow from it. An alternative is to forbid the use of the two flags together, but that's probably too late, there are maybe use-cases, and it was working before. This patch will avoid people complaining subflows are not created using the endpoint they added with the 'subflow' and 'signal' flag. Note that with the current patch, the subflow might not be created in some corner cases, e.g. if the 'subflows' limit was reached when sending the ADD_ADDR, but changed later on. It is probably not worth splitting id_avail_bitmap per target ('signal', 'subflow'), which will add another large field to the msk "just" to track (again) endpoints. Anyway, currently when the limits are changed, the kernel doesn't check if new subflows can be created or removed, because we would need to keep track of the received ADD_ADDR, and more. It sounds OK to assume that the limits should be properly configured before establishing new connections. Fixes: 86e39e04482b ("mptcp: keep track of local endpoint still available for each msk") Cc: stable@vger.kernel.org Suggested-by: Paolo Abeni Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20240731-upstream-net-20240731-mptcp-endp-subflow-signal-v1-5-c8a9b036493b@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/pm_netlink.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 2be7af377cda..4cae2aa7be5c 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -512,8 +512,8 @@ __lookup_addr(struct pm_nl_pernet *pernet, const struct mptcp_addr_info *info) static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk) { + struct mptcp_pm_addr_entry *local, *signal_and_subflow = NULL; struct sock *sk = (struct sock *)msk; - struct mptcp_pm_addr_entry *local; unsigned int add_addr_signal_max; unsigned int local_addr_max; struct pm_nl_pernet *pernet; @@ -579,6 +579,9 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk) msk->pm.add_addr_signaled++; mptcp_pm_announce_addr(msk, &local->addr, false); mptcp_pm_nl_addr_send_ack(msk); + + if (local->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW) + signal_and_subflow = local; } subflow: @@ -589,9 +592,14 @@ subflow: bool fullmesh; int i, nr; - local = select_local_address(pernet, msk); - if (!local) - break; + if (signal_and_subflow) { + local = signal_and_subflow; + signal_and_subflow = NULL; + } else { + local = select_local_address(pernet, msk); + if (!local) + break; + } fullmesh = !!(local->flags & MPTCP_PM_ADDR_FLAG_FULLMESH); -- cgit v1.2.3 From bec1f3b119ebc613d08dfbcdbaef01a79aa7de92 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Wed, 31 Jul 2024 13:05:58 +0200 Subject: selftests: mptcp: join: ability to invert ADD_ADDR check In the following commit, the client will initiate the ADD_ADDR, instead of the server. We need to way to verify the ADD_ADDR have been correctly sent. Note: the default expected counters for when the port number is given are never changed by the caller, no need to accept them as parameter then. The 'Fixes' tag here below is the same as the one from the previous commit: this patch here is not fixing anything wrong in the selftests, but it validates the previous fix for an issue introduced by this commit ID. Fixes: 86e39e04482b ("mptcp: keep track of local endpoint still available for each msk") Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20240731-upstream-net-20240731-mptcp-endp-subflow-signal-v1-6-c8a9b036493b@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/mptcp_join.sh | 40 ++++++++++++++++--------- 1 file changed, 26 insertions(+), 14 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 4df48f1f14ab..52a25ac43d10 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -1415,18 +1415,28 @@ chk_add_nr() local add_nr=$1 local echo_nr=$2 local port_nr=${3:-0} - local syn_nr=${4:-$port_nr} - local syn_ack_nr=${5:-$port_nr} - local ack_nr=${6:-$port_nr} - local mis_syn_nr=${7:-0} - local mis_ack_nr=${8:-0} + local ns_invert=${4:-""} + local syn_nr=$port_nr + local syn_ack_nr=$port_nr + local ack_nr=$port_nr + local mis_syn_nr=0 + local mis_ack_nr=0 + local ns_tx=$ns1 + local ns_rx=$ns2 + local extra_msg="" local count local timeout - timeout=$(ip netns exec $ns1 sysctl -n net.mptcp.add_addr_timeout) + if [[ $ns_invert = "invert" ]]; then + ns_tx=$ns2 + ns_rx=$ns1 + extra_msg="invert" + fi + + timeout=$(ip netns exec ${ns_tx} sysctl -n net.mptcp.add_addr_timeout) print_check "add" - count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtAddAddr") + count=$(mptcp_lib_get_counter ${ns_rx} "MPTcpExtAddAddr") if [ -z "$count" ]; then print_skip # if the test configured a short timeout tolerate greater then expected @@ -1438,7 +1448,7 @@ chk_add_nr() fi print_check "echo" - count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtEchoAdd") + count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtEchoAdd") if [ -z "$count" ]; then print_skip elif [ "$count" != "$echo_nr" ]; then @@ -1449,7 +1459,7 @@ chk_add_nr() if [ $port_nr -gt 0 ]; then print_check "pt" - count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtPortAdd") + count=$(mptcp_lib_get_counter ${ns_rx} "MPTcpExtPortAdd") if [ -z "$count" ]; then print_skip elif [ "$count" != "$port_nr" ]; then @@ -1459,7 +1469,7 @@ chk_add_nr() fi print_check "syn" - count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinPortSynRx") + count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtMPJoinPortSynRx") if [ -z "$count" ]; then print_skip elif [ "$count" != "$syn_nr" ]; then @@ -1470,7 +1480,7 @@ chk_add_nr() fi print_check "synack" - count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtMPJoinPortSynAckRx") + count=$(mptcp_lib_get_counter ${ns_rx} "MPTcpExtMPJoinPortSynAckRx") if [ -z "$count" ]; then print_skip elif [ "$count" != "$syn_ack_nr" ]; then @@ -1481,7 +1491,7 @@ chk_add_nr() fi print_check "ack" - count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinPortAckRx") + count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtMPJoinPortAckRx") if [ -z "$count" ]; then print_skip elif [ "$count" != "$ack_nr" ]; then @@ -1492,7 +1502,7 @@ chk_add_nr() fi print_check "syn" - count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMismatchPortSynRx") + count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtMismatchPortSynRx") if [ -z "$count" ]; then print_skip elif [ "$count" != "$mis_syn_nr" ]; then @@ -1503,7 +1513,7 @@ chk_add_nr() fi print_check "ack" - count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMismatchPortAckRx") + count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtMismatchPortAckRx") if [ -z "$count" ]; then print_skip elif [ "$count" != "$mis_ack_nr" ]; then @@ -1513,6 +1523,8 @@ chk_add_nr() print_ok fi fi + + print_info "$extra_msg" } chk_add_tx_nr() -- cgit v1.2.3 From 4d2868b5d191c74262f7407972d68d1bf3245d6a Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Wed, 31 Jul 2024 13:05:59 +0200 Subject: selftests: mptcp: join: test both signal & subflow It should be quite uncommon to set both the subflow and the signal flags: the initiator of the connection is typically the one creating new subflows, not the other peer, then no need to announce additional local addresses, and use it to create subflows. But some people might be confused about the flags, and set both "just to be sure at least the right one is set". To verify the previous fix, and avoid future regressions, this specific case is now validated: the client announces a new address, and initiates a new subflow from the same address. While working on this, another bug has been noticed, where the client reset the new subflow because an ADD_ADDR echo got received as the 3rd ACK: this new test also explicitly checks that no RST have been sent by the client and server. The 'Fixes' tag here below is the same as the one from the previous commit: this patch here is not fixing anything wrong in the selftests, but it validates the previous fix for an issue introduced by this commit ID. Fixes: 86e39e04482b ("mptcp: keep track of local endpoint still available for each msk") Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20240731-upstream-net-20240731-mptcp-endp-subflow-signal-v1-7-c8a9b036493b@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/mptcp_join.sh | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 52a25ac43d10..9ea6d698e9d3 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -1989,6 +1989,21 @@ signal_address_tests() chk_add_nr 1 1 fi + # uncommon: subflow and signal flags on the same endpoint + # or because the user wrongly picked both, but still expects the client + # to create additional subflows + if reset "subflow and signal together"; then + pm_nl_set_limits $ns1 0 2 + pm_nl_set_limits $ns2 0 2 + pm_nl_add_endpoint $ns2 10.0.3.2 flags signal,subflow + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 1 1 1 + chk_add_nr 1 1 0 invert # only initiated by ns2 + chk_add_nr 0 0 0 # none initiated by ns1 + chk_rst_nr 0 0 invert # no RST sent by the client + chk_rst_nr 0 0 # no RST sent by the server + fi + # accept and use add_addr with additional subflows if reset "multiple subflows and signal"; then pm_nl_set_limits $ns1 0 3 -- cgit v1.2.3 From eeef5f183f1c1bdc3ea42b26ded1da2a8a5c69d9 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Thu, 1 Aug 2024 09:28:42 -0700 Subject: MAINTAINERS: update status of sky2 and skge drivers The old SysKonnect NIc's are not used or actively maintained anymore. My sky2 NIC's are all in box in back corner of attic. Signed-off-by: Stephen Hemminger Link: https://patch.msgid.link/20240801162930.212299-1-stephen@networkplumber.org Signed-off-by: Jakub Kicinski --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 8766f3e5e87e..714e113d6eed 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -13539,7 +13539,7 @@ MARVELL GIGABIT ETHERNET DRIVERS (skge/sky2) M: Mirko Lindner M: Stephen Hemminger L: netdev@vger.kernel.org -S: Maintained +S: Odd fixes F: drivers/net/ethernet/marvell/sk* MARVELL LIBERTAS WIRELESS DRIVER -- cgit v1.2.3 From 9ab0faa7f9ffe31296dbb9bbe6f76c72c14eea18 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 31 Jul 2024 16:46:24 -0700 Subject: sctp: Fix null-ptr-deref in reuseport_add_sock(). syzbot reported a null-ptr-deref while accessing sk2->sk_reuseport_cb in reuseport_add_sock(). [0] The repro first creates a listener with SO_REUSEPORT. Then, it creates another listener on the same port and concurrently closes the first listener. The second listen() calls reuseport_add_sock() with the first listener as sk2, where sk2->sk_reuseport_cb is not expected to be cleared concurrently, but the close() does clear it by reuseport_detach_sock(). The problem is SCTP does not properly synchronise reuseport_alloc(), reuseport_add_sock(), and reuseport_detach_sock(). The caller of reuseport_alloc() and reuseport_{add,detach}_sock() must provide synchronisation for sockets that are classified into the same reuseport group. Otherwise, such sockets form multiple identical reuseport groups, and all groups except one would be silently dead. 1. Two sockets call listen() concurrently 2. No socket in the same group found in sctp_ep_hashtable[] 3. Two sockets call reuseport_alloc() and form two reuseport groups 4. Only one group hit first in __sctp_rcv_lookup_endpoint() receives incoming packets Also, the reported null-ptr-deref could occur. TCP/UDP guarantees that would not happen by holding the hash bucket lock. Let's apply the locking strategy to __sctp_hash_endpoint() and __sctp_unhash_endpoint(). [0]: Oops: general protection fault, probably for non-canonical address 0xdffffc0000000002: 0000 [#1] PREEMPT SMP KASAN PTI KASAN: null-ptr-deref in range [0x0000000000000010-0x0000000000000017] CPU: 1 UID: 0 PID: 10230 Comm: syz-executor119 Not tainted 6.10.0-syzkaller-12585-g301927d2d2eb #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 06/27/2024 RIP: 0010:reuseport_add_sock+0x27e/0x5e0 net/core/sock_reuseport.c:350 Code: 00 0f b7 5d 00 bf 01 00 00 00 89 de e8 1b a4 ff f7 83 fb 01 0f 85 a3 01 00 00 e8 6d a0 ff f7 49 8d 7e 12 48 89 f8 48 c1 e8 03 <42> 0f b6 04 28 84 c0 0f 85 4b 02 00 00 41 0f b7 5e 12 49 8d 7e 14 RSP: 0018:ffffc9000b947c98 EFLAGS: 00010202 RAX: 0000000000000002 RBX: ffff8880252ddf98 RCX: ffff888079478000 RDX: 0000000000000000 RSI: 0000000000000001 RDI: 0000000000000012 RBP: 0000000000000001 R08: ffffffff8993e18d R09: 1ffffffff1fef385 R10: dffffc0000000000 R11: fffffbfff1fef386 R12: ffff8880252ddac0 R13: dffffc0000000000 R14: 0000000000000000 R15: 0000000000000000 FS: 00007f24e45b96c0(0000) GS:ffff8880b9300000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007ffcced5f7b8 CR3: 00000000241be000 CR4: 00000000003506f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: __sctp_hash_endpoint net/sctp/input.c:762 [inline] sctp_hash_endpoint+0x52a/0x600 net/sctp/input.c:790 sctp_listen_start net/sctp/socket.c:8570 [inline] sctp_inet_listen+0x767/0xa20 net/sctp/socket.c:8625 __sys_listen_socket net/socket.c:1883 [inline] __sys_listen+0x1b7/0x230 net/socket.c:1894 __do_sys_listen net/socket.c:1902 [inline] __se_sys_listen net/socket.c:1900 [inline] __x64_sys_listen+0x5a/0x70 net/socket.c:1900 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xf3/0x230 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f RIP: 0033:0x7f24e46039b9 Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 91 1a 00 00 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b0 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007f24e45b9228 EFLAGS: 00000246 ORIG_RAX: 0000000000000032 RAX: ffffffffffffffda RBX: 00007f24e468e428 RCX: 00007f24e46039b9 RDX: 00007f24e46039b9 RSI: 0000000000000003 RDI: 0000000000000004 RBP: 00007f24e468e420 R08: 00007f24e45b96c0 R09: 00007f24e45b96c0 R10: 00007f24e45b96c0 R11: 0000000000000246 R12: 00007f24e468e42c R13: 00007f24e465a5dc R14: 0020000000000001 R15: 00007ffcced5f7d8 Modules linked in: Fixes: 6ba845740267 ("sctp: process sk_reuseport in sctp_get_port_local") Reported-by: syzbot+e6979a5d2f10ecb700e4@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=e6979a5d2f10ecb700e4 Tested-by: syzbot+e6979a5d2f10ecb700e4@syzkaller.appspotmail.com Signed-off-by: Kuniyuki Iwashima Acked-by: Xin Long Link: https://patch.msgid.link/20240731234624.94055-1-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- net/sctp/input.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/net/sctp/input.c b/net/sctp/input.c index 17fcaa9b0df9..a8a254a5008e 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -735,15 +735,19 @@ static int __sctp_hash_endpoint(struct sctp_endpoint *ep) struct sock *sk = ep->base.sk; struct net *net = sock_net(sk); struct sctp_hashbucket *head; + int err = 0; ep->hashent = sctp_ep_hashfn(net, ep->base.bind_addr.port); head = &sctp_ep_hashtable[ep->hashent]; + write_lock(&head->lock); if (sk->sk_reuseport) { bool any = sctp_is_ep_boundall(sk); struct sctp_endpoint *ep2; struct list_head *list; - int cnt = 0, err = 1; + int cnt = 0; + + err = 1; list_for_each(list, &ep->base.bind_addr.address_list) cnt++; @@ -761,24 +765,24 @@ static int __sctp_hash_endpoint(struct sctp_endpoint *ep) if (!err) { err = reuseport_add_sock(sk, sk2, any); if (err) - return err; + goto out; break; } else if (err < 0) { - return err; + goto out; } } if (err) { err = reuseport_alloc(sk, any); if (err) - return err; + goto out; } } - write_lock(&head->lock); hlist_add_head(&ep->node, &head->chain); +out: write_unlock(&head->lock); - return 0; + return err; } /* Add an endpoint to the hash. Local BH-safe. */ @@ -803,10 +807,9 @@ static void __sctp_unhash_endpoint(struct sctp_endpoint *ep) head = &sctp_ep_hashtable[ep->hashent]; + write_lock(&head->lock); if (rcu_access_pointer(sk->sk_reuseport_cb)) reuseport_detach_sock(sk); - - write_lock(&head->lock); hlist_del_init(&ep->node); write_unlock(&head->lock); } -- cgit v1.2.3 From 89108cb5c28527c1882df2987394e5c261a1f4aa Mon Sep 17 00:00:00 2001 From: Kyle Swenson Date: Wed, 31 Jul 2024 15:42:14 +0000 Subject: net: pse-pd: tps23881: Fix the device ID check The DEVID register contains two pieces of information: the device ID in the upper nibble, and the silicon revision number in the lower nibble. The driver should work fine with any silicon revision, so let's mask that out in the device ID check. Fixes: 20e6d190ffe1 ("net: pse-pd: Add TI TPS23881 PSE controller driver") Signed-off-by: Kyle Swenson Reviewed-by: Thomas Petazzoni Acked-by: Oleksij Rempel Link: https://patch.msgid.link/20240731154152.4020668-1-kyle.swenson@est.tech Signed-off-by: Jakub Kicinski --- drivers/net/pse-pd/tps23881.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/pse-pd/tps23881.c b/drivers/net/pse-pd/tps23881.c index 61f6ad9c1934..f90db758554b 100644 --- a/drivers/net/pse-pd/tps23881.c +++ b/drivers/net/pse-pd/tps23881.c @@ -29,6 +29,8 @@ #define TPS23881_REG_TPON BIT(0) #define TPS23881_REG_FWREV 0x41 #define TPS23881_REG_DEVID 0x43 +#define TPS23881_REG_DEVID_MASK 0xF0 +#define TPS23881_DEVICE_ID 0x02 #define TPS23881_REG_SRAM_CTRL 0x60 #define TPS23881_REG_SRAM_DATA 0x61 @@ -750,7 +752,7 @@ static int tps23881_i2c_probe(struct i2c_client *client) if (ret < 0) return ret; - if (ret != 0x22) { + if (FIELD_GET(TPS23881_REG_DEVID_MASK, ret) != TPS23881_DEVICE_ID) { dev_err(dev, "Wrong device ID\n"); return -ENXIO; } -- cgit v1.2.3 From fba917b169bea5f8f2ee300e19d5f7a6341a5251 Mon Sep 17 00:00:00 2001 From: Praveen Kaligineedi Date: Thu, 1 Aug 2024 13:56:19 -0700 Subject: gve: Fix use of netif_carrier_ok() GVE driver wrongly relies on netif_carrier_ok() to check the interface administrative state when resources are being allocated/deallocated for queue(s). netif_carrier_ok() needs to be replaced with netif_running() for all such cases. Administrative state is the result of "ip link set dev up/down". It reflects whether the administrator wants to use the device for traffic and the corresponding resources have been allocated. Fixes: 5f08cd3d6423 ("gve: Alloc before freeing when adjusting queues") Signed-off-by: Praveen Kaligineedi Reviewed-by: Shailend Chand Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/20240801205619.987396-1-pkaligineedi@google.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/google/gve/gve_ethtool.c | 2 +- drivers/net/ethernet/google/gve/gve_main.c | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/google/gve/gve_ethtool.c b/drivers/net/ethernet/google/gve/gve_ethtool.c index 3480ff5c7ed6..5a8b490ab3ad 100644 --- a/drivers/net/ethernet/google/gve/gve_ethtool.c +++ b/drivers/net/ethernet/google/gve/gve_ethtool.c @@ -495,7 +495,7 @@ static int gve_set_channels(struct net_device *netdev, return -EINVAL; } - if (!netif_carrier_ok(netdev)) { + if (!netif_running(netdev)) { priv->tx_cfg.num_queues = new_tx; priv->rx_cfg.num_queues = new_rx; return 0; diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c index 9744b426940e..661566db68c8 100644 --- a/drivers/net/ethernet/google/gve/gve_main.c +++ b/drivers/net/ethernet/google/gve/gve_main.c @@ -1566,7 +1566,7 @@ static int gve_set_xdp(struct gve_priv *priv, struct bpf_prog *prog, u32 status; old_prog = READ_ONCE(priv->xdp_prog); - if (!netif_carrier_ok(priv->dev)) { + if (!netif_running(priv->dev)) { WRITE_ONCE(priv->xdp_prog, prog); if (old_prog) bpf_prog_put(old_prog); @@ -1847,7 +1847,7 @@ int gve_adjust_queues(struct gve_priv *priv, rx_alloc_cfg.qcfg = &new_rx_config; tx_alloc_cfg.num_rings = new_tx_config.num_queues; - if (netif_carrier_ok(priv->dev)) { + if (netif_running(priv->dev)) { err = gve_adjust_config(priv, &tx_alloc_cfg, &rx_alloc_cfg); return err; } @@ -2064,7 +2064,7 @@ static int gve_set_features(struct net_device *netdev, if ((netdev->features & NETIF_F_LRO) != (features & NETIF_F_LRO)) { netdev->features ^= NETIF_F_LRO; - if (netif_carrier_ok(netdev)) { + if (netif_running(netdev)) { err = gve_adjust_config(priv, &tx_alloc_cfg, &rx_alloc_cfg); if (err) goto revert_features; @@ -2359,7 +2359,7 @@ err: int gve_reset(struct gve_priv *priv, bool attempt_teardown) { - bool was_up = netif_carrier_ok(priv->dev); + bool was_up = netif_running(priv->dev); int err; dev_info(&priv->pdev->dev, "Performing reset\n"); @@ -2700,7 +2700,7 @@ static void gve_shutdown(struct pci_dev *pdev) { struct net_device *netdev = pci_get_drvdata(pdev); struct gve_priv *priv = netdev_priv(netdev); - bool was_up = netif_carrier_ok(priv->dev); + bool was_up = netif_running(priv->dev); rtnl_lock(); if (was_up && gve_close(priv->dev)) { @@ -2718,7 +2718,7 @@ static int gve_suspend(struct pci_dev *pdev, pm_message_t state) { struct net_device *netdev = pci_get_drvdata(pdev); struct gve_priv *priv = netdev_priv(netdev); - bool was_up = netif_carrier_ok(priv->dev); + bool was_up = netif_running(priv->dev); priv->suspend_cnt++; rtnl_lock(); -- cgit v1.2.3 From 14ab4792ee120c022f276a7e4768f4dcb08f0cdd Mon Sep 17 00:00:00 2001 From: Dmitry Safonov <0x7f454c46@gmail.com> Date: Thu, 1 Aug 2024 01:13:28 +0100 Subject: net/tcp: Disable TCP-AO static key after RCU grace period The lifetime of TCP-AO static_key is the same as the last tcp_ao_info. On the socket destruction tcp_ao_info ceases to be with RCU grace period, while tcp-ao static branch is currently deferred destructed. The static key definition is : DEFINE_STATIC_KEY_DEFERRED_FALSE(tcp_ao_needed, HZ); which means that if RCU grace period is delayed by more than a second and tcp_ao_needed is in the process of disablement, other CPUs may yet see tcp_ao_info which atent dead, but soon-to-be. And that breaks the assumption of static_key_fast_inc_not_disabled(). See the comment near the definition: > * The caller must make sure that the static key can't get disabled while > * in this function. It doesn't patch jump labels, only adds a user to > * an already enabled static key. Originally it was introduced in commit eb8c507296f6 ("jump_label: Prevent key->enabled int overflow"), which is needed for the atomic contexts, one of which would be the creation of a full socket from a request socket. In that atomic context, it's known by the presence of the key (md5/ao) that the static branch is already enabled. So, the ref counter for that static branch is just incremented instead of holding the proper mutex. static_key_fast_inc_not_disabled() is just a helper for such usage case. But it must not be used if the static branch could get disabled in parallel as it's not protected by jump_label_mutex and as a result, races with jump_label_update() implementation details. Happened on netdev test-bot[1], so not a theoretical issue: [] jump_label: Fatal kernel bug, unexpected op at tcp_inbound_hash+0x1a7/0x870 [ffffffffa8c4e9b7] (eb 50 0f 1f 44 != 66 90 0f 1f 00)) size:2 type:1 [] ------------[ cut here ]------------ [] kernel BUG at arch/x86/kernel/jump_label.c:73! [] Oops: invalid opcode: 0000 [#1] PREEMPT SMP KASAN NOPTI [] CPU: 3 PID: 243 Comm: kworker/3:3 Not tainted 6.10.0-virtme #1 [] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.3-0-ga6ed6b701f0a-prebuilt.qemu.org 04/01/2014 [] Workqueue: events jump_label_update_timeout [] RIP: 0010:__jump_label_patch+0x2f6/0x350 ... [] Call Trace: [] [] arch_jump_label_transform_queue+0x6c/0x110 [] __jump_label_update+0xef/0x350 [] __static_key_slow_dec_cpuslocked.part.0+0x3c/0x60 [] jump_label_update_timeout+0x2c/0x40 [] process_one_work+0xe3b/0x1670 [] worker_thread+0x587/0xce0 [] kthread+0x28a/0x350 [] ret_from_fork+0x31/0x70 [] ret_from_fork_asm+0x1a/0x30 [] [] Modules linked in: veth [] ---[ end trace 0000000000000000 ]--- [] RIP: 0010:__jump_label_patch+0x2f6/0x350 [1]: https://netdev-3.bots.linux.dev/vmksft-tcp-ao-dbg/results/696681/5-connect-deny-ipv6/stderr Cc: stable@kernel.org Fixes: 67fa83f7c86a ("net/tcp: Add static_key for TCP-AO") Signed-off-by: Dmitry Safonov <0x7f454c46@gmail.com> Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_ao.c | 43 ++++++++++++++++++++++++++++++------------- 1 file changed, 30 insertions(+), 13 deletions(-) diff --git a/net/ipv4/tcp_ao.c b/net/ipv4/tcp_ao.c index 85531437890c..db6516092daf 100644 --- a/net/ipv4/tcp_ao.c +++ b/net/ipv4/tcp_ao.c @@ -267,32 +267,49 @@ static void tcp_ao_key_free_rcu(struct rcu_head *head) kfree_sensitive(key); } -void tcp_ao_destroy_sock(struct sock *sk, bool twsk) +static void tcp_ao_info_free_rcu(struct rcu_head *head) { - struct tcp_ao_info *ao; + struct tcp_ao_info *ao = container_of(head, struct tcp_ao_info, rcu); struct tcp_ao_key *key; struct hlist_node *n; + hlist_for_each_entry_safe(key, n, &ao->head, node) { + hlist_del(&key->node); + tcp_sigpool_release(key->tcp_sigpool_id); + kfree_sensitive(key); + } + kfree(ao); + static_branch_slow_dec_deferred(&tcp_ao_needed); +} + +static void tcp_ao_sk_omem_free(struct sock *sk, struct tcp_ao_info *ao) +{ + size_t total_ao_sk_mem = 0; + struct tcp_ao_key *key; + + hlist_for_each_entry(key, &ao->head, node) + total_ao_sk_mem += tcp_ao_sizeof_key(key); + atomic_sub(total_ao_sk_mem, &sk->sk_omem_alloc); +} + +void tcp_ao_destroy_sock(struct sock *sk, bool twsk) +{ + struct tcp_ao_info *ao; + if (twsk) { ao = rcu_dereference_protected(tcp_twsk(sk)->ao_info, 1); - tcp_twsk(sk)->ao_info = NULL; + rcu_assign_pointer(tcp_twsk(sk)->ao_info, NULL); } else { ao = rcu_dereference_protected(tcp_sk(sk)->ao_info, 1); - tcp_sk(sk)->ao_info = NULL; + rcu_assign_pointer(tcp_sk(sk)->ao_info, NULL); } if (!ao || !refcount_dec_and_test(&ao->refcnt)) return; - hlist_for_each_entry_safe(key, n, &ao->head, node) { - hlist_del_rcu(&key->node); - if (!twsk) - atomic_sub(tcp_ao_sizeof_key(key), &sk->sk_omem_alloc); - call_rcu(&key->rcu, tcp_ao_key_free_rcu); - } - - kfree_rcu(ao, rcu); - static_branch_slow_dec_deferred(&tcp_ao_needed); + if (!twsk) + tcp_ao_sk_omem_free(sk, ao); + call_rcu(&ao->rcu, tcp_ao_info_free_rcu); } void tcp_ao_time_wait(struct tcp_timewait_sock *tcptw, struct tcp_sock *tp) -- cgit v1.2.3 From b50f2af9fbc5c00103ca8b72752b15310bd77762 Mon Sep 17 00:00:00 2001 From: Heng Qi Date: Thu, 1 Aug 2024 21:23:37 +0800 Subject: virtio-net: check feature before configuring the vq coalescing command Virtio spec says: The driver MUST have negotiated the VIRTIO_NET_F_VQ_NOTF_COAL feature when issuing commands VIRTIO_NET_CTRL_NOTF_COAL_VQ_SET and VIRTIO_NET_CTRL_NOTF_COAL_VQ_GET. So we add the feature negotiation check to virtnet_send_{r,t}x_ctrl_coal_vq_cmd as a basis for the next bugfix patch. Suggested-by: Michael S. Tsirkin Signed-off-by: Heng Qi Acked-by: Jason Wang Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 0383a3e136d6..b1176be8fcfd 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -3658,6 +3658,9 @@ static int virtnet_send_rx_ctrl_coal_vq_cmd(struct virtnet_info *vi, { int err; + if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) + return -EOPNOTSUPP; + err = virtnet_send_ctrl_coal_vq_cmd(vi, rxq2vq(queue), max_usecs, max_packets); if (err) @@ -3675,6 +3678,9 @@ static int virtnet_send_tx_ctrl_coal_vq_cmd(struct virtnet_info *vi, { int err; + if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) + return -EOPNOTSUPP; + err = virtnet_send_ctrl_coal_vq_cmd(vi, txq2vq(queue), max_usecs, max_packets); if (err) -- cgit v1.2.3 From 4ba8d97083707409822264fd1776aad7233f353e Mon Sep 17 00:00:00 2001 From: Heng Qi Date: Thu, 1 Aug 2024 21:23:38 +0800 Subject: virtio-net: unbreak vq resizing when coalescing is not negotiated MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Don't break the resize action if the vq coalescing feature named VIRTIO_NET_F_VQ_NOTF_COAL is not negotiated. Fixes: f61fe5f081cf ("virtio-net: fix the vq coalescing setting for vq resize") Signed-off-by: Heng Qi Reviewed-by: Xuan Zhuo Acked-by: Eugenio Pé rez Acked-by: Jason Wang Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index b1176be8fcfd..3f10c72743e9 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -3749,7 +3749,11 @@ static int virtnet_set_ringparam(struct net_device *dev, err = virtnet_send_tx_ctrl_coal_vq_cmd(vi, i, vi->intr_coal_tx.max_usecs, vi->intr_coal_tx.max_packets); - if (err) + + /* Don't break the tx resize action if the vq coalescing is not + * supported. The same is true for rx resize below. + */ + if (err && err != -EOPNOTSUPP) return err; } @@ -3764,7 +3768,7 @@ static int virtnet_set_ringparam(struct net_device *dev, vi->intr_coal_rx.max_usecs, vi->intr_coal_rx.max_packets); mutex_unlock(&vi->rq[i].dim_lock); - if (err) + if (err && err != -EOPNOTSUPP) return err; } } -- cgit v1.2.3 From 7ab107544b777c3bd7feb9fe447367d8edd5b202 Mon Sep 17 00:00:00 2001 From: Daniele Palmas Date: Thu, 1 Aug 2024 15:55:12 +0200 Subject: net: usb: qmi_wwan: fix memory leak for not ip packets MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Free the unused skb when not ip packets arrive. Fixes: c6adf77953bc ("net: usb: qmi_wwan: add qmap mux protocol support") Signed-off-by: Daniele Palmas Acked-by: Bjørn Mork Signed-off-by: David S. Miller --- drivers/net/usb/qmi_wwan.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 386d62769ded..cfda32047cff 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -201,6 +201,7 @@ static int qmimux_rx_fixup(struct usbnet *dev, struct sk_buff *skb) break; default: /* not ip - do not know what to do */ + kfree_skb(skbn); goto skip; } -- cgit v1.2.3 From 92c4ee25208d0f35dafc3213cdf355fbe449e078 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Fri, 2 Aug 2024 11:07:30 +0300 Subject: net: bridge: mcast: wait for previous gc cycles when removing port syzbot hit a use-after-free[1] which is caused because the bridge doesn't make sure that all previous garbage has been collected when removing a port. What happens is: CPU 1 CPU 2 start gc cycle remove port acquire gc lock first wait for lock call br_multicasg_gc() directly acquire lock now but free port the port can be freed while grp timers still running Make sure all previous gc cycles have finished by using flush_work before freeing the port. [1] BUG: KASAN: slab-use-after-free in br_multicast_port_group_expired+0x4c0/0x550 net/bridge/br_multicast.c:861 Read of size 8 at addr ffff888071d6d000 by task syz.5.1232/9699 CPU: 1 PID: 9699 Comm: syz.5.1232 Not tainted 6.10.0-rc5-syzkaller-00021-g24ca36a562d6 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 06/07/2024 Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0x116/0x1f0 lib/dump_stack.c:114 print_address_description mm/kasan/report.c:377 [inline] print_report+0xc3/0x620 mm/kasan/report.c:488 kasan_report+0xd9/0x110 mm/kasan/report.c:601 br_multicast_port_group_expired+0x4c0/0x550 net/bridge/br_multicast.c:861 call_timer_fn+0x1a3/0x610 kernel/time/timer.c:1792 expire_timers kernel/time/timer.c:1843 [inline] __run_timers+0x74b/0xaf0 kernel/time/timer.c:2417 __run_timer_base kernel/time/timer.c:2428 [inline] __run_timer_base kernel/time/timer.c:2421 [inline] run_timer_base+0x111/0x190 kernel/time/timer.c:2437 Reported-by: syzbot+263426984509be19c9a0@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=263426984509be19c9a0 Fixes: e12cec65b554 ("net: bridge: mcast: destroy all entries via gc") Signed-off-by: Nikolay Aleksandrov Link: https://patch.msgid.link/20240802080730.3206303-1-razor@blackwall.org Signed-off-by: Jakub Kicinski --- net/bridge/br_multicast.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 9a1cb5079a7a..b2ae0d2434d2 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -2045,16 +2045,14 @@ void br_multicast_del_port(struct net_bridge_port *port) { struct net_bridge *br = port->br; struct net_bridge_port_group *pg; - HLIST_HEAD(deleted_head); struct hlist_node *n; /* Take care of the remaining groups, only perm ones should be left */ spin_lock_bh(&br->multicast_lock); hlist_for_each_entry_safe(pg, n, &port->mglist, mglist) br_multicast_find_del_pg(br, pg); - hlist_move_list(&br->mcast_gc_list, &deleted_head); spin_unlock_bh(&br->multicast_lock); - br_multicast_gc(&deleted_head); + flush_work(&br->mcast_gc_work); br_multicast_port_ctx_deinit(&port->multicast_ctx); free_percpu(port->mcast_stats); } -- cgit v1.2.3 From 3e7917c0cdad835a5121520fc5686d954b7a61ab Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 5 Aug 2024 08:58:21 +0000 Subject: net: linkwatch: use system_unbound_wq linkwatch_event() grabs possibly very contended RTNL mutex. system_wq is not suitable for such work. Inspired by many noisy syzbot reports. 3 locks held by kworker/0:7/5266: #0: ffff888015480948 ((wq_completion)events){+.+.}-{0:0}, at: process_one_work kernel/workqueue.c:3206 [inline] #0: ffff888015480948 ((wq_completion)events){+.+.}-{0:0}, at: process_scheduled_works+0x90a/0x1830 kernel/workqueue.c:3312 #1: ffffc90003f6fd00 ((linkwatch_work).work){+.+.}-{0:0}, at: process_one_work kernel/workqueue.c:3207 [inline] , at: process_scheduled_works+0x945/0x1830 kernel/workqueue.c:3312 #2: ffffffff8fa6f208 (rtnl_mutex){+.+.}-{3:3}, at: linkwatch_event+0xe/0x60 net/core/link_watch.c:276 Reported-by: syzbot Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20240805085821.1616528-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/core/link_watch.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/core/link_watch.c b/net/core/link_watch.c index 8ec35194bfcb..ab150641142a 100644 --- a/net/core/link_watch.c +++ b/net/core/link_watch.c @@ -148,9 +148,9 @@ static void linkwatch_schedule_work(int urgent) * override the existing timer. */ if (test_bit(LW_URGENT, &linkwatch_flags)) - mod_delayed_work(system_wq, &linkwatch_work, 0); + mod_delayed_work(system_unbound_wq, &linkwatch_work, 0); else - schedule_delayed_work(&linkwatch_work, delay); + queue_delayed_work(system_unbound_wq, &linkwatch_work, delay); } -- cgit v1.2.3 From c7a19018bd557c24072b59088ad2684fd83ea3f4 Mon Sep 17 00:00:00 2001 From: Tristram Ha Date: Mon, 5 Aug 2024 16:52:00 -0700 Subject: net: dsa: microchip: Fix Wake-on-LAN check to not return an error The wol variable in ksz_port_set_mac_address() is declared with random data, but the code in ksz_get_wol call may not be executed so the WAKE_MAGIC check may be invalid resulting in an error message when setting a MAC address after starting the DSA driver. Fixes: 3b454b6390c3 ("net: dsa: microchip: ksz9477: Add Wake on Magic Packet support") Signed-off-by: Tristram Ha Reviewed-by: Oleksij Rempel Reviewed-by: Florian Fainelli Link: https://patch.msgid.link/20240805235200.24982-1-Tristram.Ha@microchip.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/microchip/ksz_common.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c index b074b4bb0629..b120e66d5669 100644 --- a/drivers/net/dsa/microchip/ksz_common.c +++ b/drivers/net/dsa/microchip/ksz_common.c @@ -3764,6 +3764,11 @@ static int ksz_port_set_mac_address(struct dsa_switch *ds, int port, return -EBUSY; } + /* Need to initialize variable as the code to fill in settings may + * not be executed. + */ + wol.wolopts = 0; + ksz_get_wol(ds, dp->index, &wol); if (wol.wolopts & WAKE_MAGIC) { dev_err(ds->dev, -- cgit v1.2.3 From 1ca645a2f74a4290527ae27130c8611391b07dbf Mon Sep 17 00:00:00 2001 From: ZHANG Yuntian Date: Sat, 3 Aug 2024 15:46:51 +0800 Subject: net: usb: qmi_wwan: add MeiG Smart SRM825L Add support for MeiG Smart SRM825L which is based on Qualcomm 315 chip. T: Bus=04 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 2 Spd=5000 MxCh= 0 D: Ver= 3.20 Cls=00(>ifc ) Sub=00 Prot=00 MxPS= 9 #Cfgs= 1 P: Vendor=2dee ProdID=4d22 Rev= 4.14 S: Manufacturer=MEIG S: Product=LTE-A Module S: SerialNumber=6f345e48 C:* #Ifs= 6 Cfg#= 1 Atr=80 MxPwr=896mA I:* If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=30 Driver=option E: Ad=81(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=01(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms I:* If#= 1 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option E: Ad=83(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=82(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms I:* If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option E: Ad=85(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=84(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=03(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms I:* If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=60 Driver=option E: Ad=87(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=86(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=04(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms I:* If#= 4 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=42 Prot=01 Driver=(none) E: Ad=05(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=88(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms I:* If#= 5 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=50 Driver=qmi_wwan E: Ad=89(I) Atr=03(Int.) MxPS= 8 Ivl=32ms E: Ad=8e(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=0f(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms Signed-off-by: ZHANG Yuntian Link: https://patch.msgid.link/D1EB81385E405DFE+20240803074656.567061-1-yt@radxa.com Signed-off-by: Jakub Kicinski --- drivers/net/usb/qmi_wwan.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index cfda32047cff..4823dbdf5465 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -1432,6 +1432,7 @@ static const struct usb_device_id products[] = { {QMI_QUIRK_SET_DTR(0x1546, 0x1312, 4)}, /* u-blox LARA-R6 01B */ {QMI_QUIRK_SET_DTR(0x1546, 0x1342, 4)}, /* u-blox LARA-L6 */ {QMI_QUIRK_SET_DTR(0x33f8, 0x0104, 4)}, /* Rolling RW101 RMNET */ + {QMI_FIXED_INTF(0x2dee, 0x4d22, 5)}, /* MeiG Smart SRM825L */ /* 4. Gobi 1000 devices */ {QMI_GOBI1K_DEVICE(0x05c6, 0x9212)}, /* Acer Gobi Modem Device */ -- cgit v1.2.3 From 25a7123579ecac9a89a7e5b8d8a580bee4b68acd Mon Sep 17 00:00:00 2001 From: Grzegorz Nitka Date: Mon, 15 Jul 2024 17:39:10 +0200 Subject: ice: Fix reset handler Synchronize OICR IRQ when preparing for reset to avoid potential race conditions between the reset procedure and OICR Fixes: 4aad5335969f ("ice: add individual interrupt allocation") Signed-off-by: Grzegorz Nitka Signed-off-by: Sergey Temerkhanov Reviewed-by: Przemek Kitszel Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 3de020020bc4..6f97ed471fe9 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -559,6 +559,8 @@ ice_prepare_for_reset(struct ice_pf *pf, enum ice_reset_req reset_type) if (test_bit(ICE_PREPARED_FOR_RESET, pf->state)) return; + synchronize_irq(pf->oicr_irq.virq); + ice_unplug_aux_dev(pf); /* Notify VFs of impending reset */ -- cgit v1.2.3 From bca515d58367494d8699ab53c645b57b71fb4785 Mon Sep 17 00:00:00 2001 From: Grzegorz Nitka Date: Mon, 15 Jul 2024 17:39:11 +0200 Subject: ice: Skip PTP HW writes during PTP reset procedure Block HW write access for the driver while the device is in reset to avoid potential race condition and access to the PTP HW in non-nominal state which could lead to undesired effects Fixes: 4aad5335969f ("ice: add individual interrupt allocation") Signed-off-by: Grzegorz Nitka Co-developed-by: Karol Kolacinski Signed-off-by: Karol Kolacinski Signed-off-by: Sergey Temerkhanov Reviewed-by: Przemek Kitszel Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_ptp.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c index e2786cc13286..ef2e858f49bb 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp.c +++ b/drivers/net/ethernet/intel/ice/ice_ptp.c @@ -1477,6 +1477,10 @@ void ice_ptp_link_change(struct ice_pf *pf, u8 port, bool linkup) /* Update cached link status for this port immediately */ ptp_port->link_up = linkup; + /* Skip HW writes if reset is in progress */ + if (pf->hw.reset_ongoing) + return; + switch (hw->ptp.phy_model) { case ICE_PHY_E810: /* Do not reconfigure E810 PHY */ -- cgit v1.2.3 From c181da18a7302c5de510fe975a3a333299c6e4b7 Mon Sep 17 00:00:00 2001 From: Mateusz Polchlopek Date: Fri, 26 Jul 2024 06:19:28 -0400 Subject: ice: Fix incorrect assigns of FEC counts Commit ac21add2540e ("ice: Implement driver functionality to dump fec statistics") introduces obtaining FEC correctable and uncorrectable stats per netdev in ICE driver. Unfortunately the assignment of values to fec_stats structure has been done incorrectly. This commit fixes the assignments. Fixes: ac21add2540e ("ice: Implement driver functionality to dump fec statistics") Reviewed-by: Wojciech Drewek Signed-off-by: Mateusz Polchlopek Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_ethtool.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c index 8c990c976132..bc79ba974e49 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c @@ -4673,10 +4673,10 @@ static int ice_get_port_fec_stats(struct ice_hw *hw, u16 pcs_quad, u16 pcs_port, if (err) return err; - fec_stats->uncorrectable_blocks.total = (fec_corr_high_val << 16) + - fec_corr_low_val; - fec_stats->corrected_blocks.total = (fec_uncorr_high_val << 16) + - fec_uncorr_low_val; + fec_stats->corrected_blocks.total = (fec_corr_high_val << 16) + + fec_corr_low_val; + fec_stats->uncorrectable_blocks.total = (fec_uncorr_high_val << 16) + + fec_uncorr_low_val; return 0; } -- cgit v1.2.3 From 541b80216cd1d511841c3c8d9559a7b13f4f79f2 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Wed, 31 Jul 2024 17:20:48 +0200 Subject: Bluetooth: hci_qca: don't call pwrseq_power_off() twice for QCA6390 Now that we call pwrseq_power_off() for all models that hold a valid power sequencing handle, we can remove the switch case for QCA_6390. The switch will now use the default label for this model but that's fine: if it has the BT-enable GPIO than we should use it. Fixes: eba1718717b0 ("Bluetooth: hci_qca: make pwrseq calls the default if available") Signed-off-by: Bartosz Golaszewski Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/hci_qca.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c index ca6466676902..a20dd5015346 100644 --- a/drivers/bluetooth/hci_qca.c +++ b/drivers/bluetooth/hci_qca.c @@ -2187,10 +2187,6 @@ static void qca_power_shutdown(struct hci_uart *hu) } break; - case QCA_QCA6390: - pwrseq_power_off(qcadev->bt_power->pwrseq); - break; - default: gpiod_set_value_cansleep(qcadev->bt_en, 0); } -- cgit v1.2.3 From f3660957303b822e5dd6e10fe9e1e19afc6d33de Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Wed, 31 Jul 2024 17:20:49 +0200 Subject: Bluetooth: hci_qca: fix QCA6390 support on non-DT platforms QCA6390 can albo be used on non-DT systems so we must not make the power sequencing the only option. Check if the serdev device consumes an OF node. If so: honor the new contract as per the DT bindings. If not: fall back to the previous behavior by falling through to the existing default label. Fixes: 9a15ce685706 ("Bluetooth: qca: use the power sequencer for QCA6390") Reported-by: Wren Turkal Closes: https://lore.kernel.org/linux-bluetooth/27e6a6c5-fb63-4219-be0b-eefa2c116e06@penguintechs.org/ Signed-off-by: Bartosz Golaszewski Reviewed-by: Paul Menzel Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/hci_qca.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c index a20dd5015346..2baed7d0f479 100644 --- a/drivers/bluetooth/hci_qca.c +++ b/drivers/bluetooth/hci_qca.c @@ -2412,11 +2412,14 @@ static int qca_serdev_probe(struct serdev_device *serdev) break; case QCA_QCA6390: - qcadev->bt_power->pwrseq = devm_pwrseq_get(&serdev->dev, - "bluetooth"); - if (IS_ERR(qcadev->bt_power->pwrseq)) - return PTR_ERR(qcadev->bt_power->pwrseq); - break; + if (dev_of_node(&serdev->dev)) { + qcadev->bt_power->pwrseq = devm_pwrseq_get(&serdev->dev, + "bluetooth"); + if (IS_ERR(qcadev->bt_power->pwrseq)) + return PTR_ERR(qcadev->bt_power->pwrseq); + break; + } + fallthrough; default: qcadev->bt_en = devm_gpiod_get_optional(&serdev->dev, "enable", -- cgit v1.2.3 From e1d28be268cfe978e19a18a3a9b37a4a7d37745e Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Wed, 31 Jul 2024 17:20:50 +0200 Subject: Bluetooth: hci_qca: fix a NULL-pointer derefence at shutdown Unlike qca_regulator_init(), qca_power_shutdown() may be called for QCA_ROME which does not have qcadev->bt_power assigned. Add a NULL-pointer check before dereferencing the struct qca_power pointer. Fixes: eba1718717b0 ("Bluetooth: hci_qca: make pwrseq calls the default if available") Reported-by: Dmitry Baryshkov Closes: https://lore.kernel.org/linux-bluetooth/su3wp6s44hrxf4ijvsdfzbvv4unu4ycb7kkvwbx6ltdafkldir@4g7ydqm2ap5j/ Signed-off-by: Bartosz Golaszewski Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/hci_qca.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c index 2baed7d0f479..45adc1560d94 100644 --- a/drivers/bluetooth/hci_qca.c +++ b/drivers/bluetooth/hci_qca.c @@ -2160,7 +2160,7 @@ static void qca_power_shutdown(struct hci_uart *hu) qcadev = serdev_device_get_drvdata(hu->serdev); power = qcadev->bt_power; - if (power->pwrseq) { + if (power && power->pwrseq) { pwrseq_power_off(power->pwrseq); set_bit(QCA_BT_OFF, &qca->flags); return; -- cgit v1.2.3 From c531e63871c0b50c8c4e62c048535a08886fba3e Mon Sep 17 00:00:00 2001 From: Dmitry Antipov Date: Wed, 31 Jul 2024 12:19:36 +0300 Subject: Bluetooth: l2cap: always unlock channel in l2cap_conless_channel() Add missing call to 'l2cap_chan_unlock()' on receive error handling path in 'l2cap_conless_channel()'. Fixes: a24cce144b98 ("Bluetooth: Fix reference counting of global L2CAP channels") Reported-by: syzbot+45ac74737e866894acb0@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=45ac74737e866894acb0 Signed-off-by: Dmitry Antipov Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/l2cap_core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index c3c26bbb5dda..9988ba382b68 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -6774,6 +6774,7 @@ static void l2cap_conless_channel(struct l2cap_conn *conn, __le16 psm, bt_cb(skb)->l2cap.psm = psm; if (!chan->ops->recv(chan, skb)) { + l2cap_chan_unlock(chan); l2cap_chan_put(chan); return; } -- cgit v1.2.3 From b5431dc2803ac159d6d4645ae237d15c3cb252db Mon Sep 17 00:00:00 2001 From: Anton Khirnov Date: Mon, 29 Jul 2024 21:58:10 +0200 Subject: Bluetooth: hci_sync: avoid dup filtering when passive scanning with adv monitor This restores behaviour (including the comment) from now-removed hci_request.c, and also matches existing code for active scanning. Without this, the duplicates filter is always active when passive scanning, which makes it impossible to work with devices that send nontrivial dynamic data in their advertisement reports. Fixes: abfeea476c68 ("Bluetooth: hci_sync: Convert MGMT_OP_START_DISCOVERY") Signed-off-by: Anton Khirnov Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_sync.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index a31d39a821f4..e79cd40bd079 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -3019,6 +3019,20 @@ static int hci_passive_scan_sync(struct hci_dev *hdev) } else if (hci_is_adv_monitoring(hdev)) { window = hdev->le_scan_window_adv_monitor; interval = hdev->le_scan_int_adv_monitor; + + /* Disable duplicates filter when scanning for advertisement + * monitor for the following reasons. + * + * For HW pattern filtering (ex. MSFT), Realtek and Qualcomm + * controllers ignore RSSI_Sampling_Period when the duplicates + * filter is enabled. + * + * For SW pattern filtering, when we're not doing interleaved + * scanning, it is necessary to disable duplicates filter, + * otherwise hosts can only receive one advertisement and it's + * impossible to know if a peer is still in range. + */ + filter_dups = LE_SCAN_FILTER_DUP_DISABLE; } else { window = hdev->le_scan_window; interval = hdev->le_scan_interval; -- cgit v1.2.3 From d27a835f41d947f62e6a95e89ba523299c9e6437 Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Mon, 5 Aug 2024 12:38:56 +0800 Subject: net/smc: add the max value of fallback reason count The number of fallback reasons defined in the smc_clc.h file has reached 36. For historical reasons, some are no longer quoted, and there's 33 actually in use. So, add the max value of fallback reason count to 36. Fixes: 6ac1e6563f59 ("net/smc: support smc v2.x features validate") Fixes: 7f0620b9940b ("net/smc: support max connections per lgr negotiation") Fixes: 69b888e3bb4b ("net/smc: support max links per lgr negotiation in clc handshake") Signed-off-by: Zhengchao Shao Reviewed-by: Wenjia Zhang Reviewed-by: D. Wythe Link: https://patch.msgid.link/20240805043856.565677-1-shaozhengchao@huawei.com Signed-off-by: Jakub Kicinski --- net/smc/smc_stats.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/smc/smc_stats.h b/net/smc/smc_stats.h index 9d32058db2b5..e19177ce4092 100644 --- a/net/smc/smc_stats.h +++ b/net/smc/smc_stats.h @@ -19,7 +19,7 @@ #include "smc_clc.h" -#define SMC_MAX_FBACK_RSN_CNT 30 +#define SMC_MAX_FBACK_RSN_CNT 36 enum { SMC_BUF_8K, -- cgit v1.2.3 From e3862093ee93fcfbdadcb7957f5f8974fffa806a Mon Sep 17 00:00:00 2001 From: Joe Hattori Date: Tue, 6 Aug 2024 10:13:27 +0900 Subject: net: dsa: bcm_sf2: Fix a possible memory leak in bcm_sf2_mdio_register() bcm_sf2_mdio_register() calls of_phy_find_device() and then phy_device_remove() in a loop to remove existing PHY devices. of_phy_find_device() eventually calls bus_find_device(), which calls get_device() on the returned struct device * to increment the refcount. The current implementation does not decrement the refcount, which causes memory leak. This commit adds the missing phy_device_free() call to decrement the refcount via put_device() to balance the refcount. Fixes: 771089c2a485 ("net: dsa: bcm_sf2: Ensure that MDIO diversion is used") Signed-off-by: Joe Hattori Tested-by: Florian Fainelli Reviewed-by: Florian Fainelli Link: https://patch.msgid.link/20240806011327.3817861-1-joe@pf.is.s.u-tokyo.ac.jp Signed-off-by: Jakub Kicinski --- drivers/net/dsa/bcm_sf2.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index ed1e6560df25..0e663ec0c12a 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -675,8 +675,10 @@ static int bcm_sf2_mdio_register(struct dsa_switch *ds) of_remove_property(child, prop); phydev = of_phy_find_device(child); - if (phydev) + if (phydev) { phy_device_remove(phydev); + phy_device_free(phydev); + } } err = mdiobus_register(priv->user_mii_bus); -- cgit v1.2.3 From da03f5d1b2c319a2b74fe76edeadcd8fa5f44376 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Mon, 5 Aug 2024 22:37:42 -0700 Subject: bnxt_en : Fix memory out-of-bounds in bnxt_fill_hw_rss_tbl() A recent commit has modified the code in __bnxt_reserve_rings() to set the default RSS indirection table to default only when the number of RX rings is changing. While this works for newer firmware that requires RX ring reservations, it causes the regression on older firmware not requiring RX ring resrvations (BNXT_NEW_RM() returns false). With older firmware, RX ring reservations are not required and so hw_resc->resv_rx_rings is not always set to the proper value. The comparison: if (old_rx_rings != bp->hw_resc.resv_rx_rings) in __bnxt_reserve_rings() may be false even when the RX rings are changing. This will cause __bnxt_reserve_rings() to skip setting the default RSS indirection table to default to match the current number of RX rings. This may later cause bnxt_fill_hw_rss_tbl() to use an out-of-range index. We already have bnxt_check_rss_tbl_no_rmgr() to handle exactly this scenario. We just need to move it up in bnxt_need_reserve_rings() to be called unconditionally when using older firmware. Without the fix, if the TX rings are changing, we'll skip the bnxt_check_rss_tbl_no_rmgr() call and __bnxt_reserve_rings() may also skip the bnxt_set_dflt_rss_indir_tbl() call for the reason explained in the last paragraph. Without setting the default RSS indirection table to default, it causes the regression: BUG: KASAN: slab-out-of-bounds in __bnxt_hwrm_vnic_set_rss+0xb79/0xe40 Read of size 2 at addr ffff8881c5809618 by task ethtool/31525 Call Trace: __bnxt_hwrm_vnic_set_rss+0xb79/0xe40 bnxt_hwrm_vnic_rss_cfg_p5+0xf7/0x460 __bnxt_setup_vnic_p5+0x12e/0x270 __bnxt_open_nic+0x2262/0x2f30 bnxt_open_nic+0x5d/0xf0 ethnl_set_channels+0x5d4/0xb30 ethnl_default_set_doit+0x2f1/0x620 Reported-by: Breno Leitao Closes: https://lore.kernel.org/netdev/ZrC6jpghA3PWVWSB@gmail.com/ Fixes: 98ba1d931f61 ("bnxt_en: Fix RSS logic in __bnxt_reserve_rings()") Reviewed-by: Pavan Chebbi Reviewed-by: Kalesh AP Reviewed-by: Somnath Kotur Signed-off-by: Michael Chan Tested-by: Breno Leitao Link: https://patch.msgid.link/20240806053742.140304-1-michael.chan@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 23f74c6c88b9..e27e1082ee33 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -7591,19 +7591,20 @@ static bool bnxt_need_reserve_rings(struct bnxt *bp) int rx = bp->rx_nr_rings, stat; int vnic, grp = rx; - if (hw_resc->resv_tx_rings != bp->tx_nr_rings && - bp->hwrm_spec_code >= 0x10601) - return true; - /* Old firmware does not need RX ring reservations but we still * need to setup a default RSS map when needed. With new firmware * we go through RX ring reservations first and then set up the * RSS map for the successfully reserved RX rings when needed. */ - if (!BNXT_NEW_RM(bp)) { + if (!BNXT_NEW_RM(bp)) bnxt_check_rss_tbl_no_rmgr(bp); + + if (hw_resc->resv_tx_rings != bp->tx_nr_rings && + bp->hwrm_spec_code >= 0x10601) + return true; + + if (!BNXT_NEW_RM(bp)) return false; - } vnic = bnxt_get_total_vnics(bp, rx); -- cgit v1.2.3 From f01032a2ca099ec8d619aaa916c3762aa62495df Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Tue, 6 Aug 2024 15:09:20 -0700 Subject: idpf: fix memory leaks and crashes while performing a soft reset The second tagged commit introduced a UAF, as it removed restoring q_vector->vport pointers after reinitializating the structures. This is due to that all queue allocation functions are performed here with the new temporary vport structure and those functions rewrite the backpointers to the vport. Then, this new struct is freed and the pointers start leading to nowhere. But generally speaking, the current logic is very fragile. It claims to be more reliable when the system is low on memory, but in fact, it consumes two times more memory as at the moment of running this function, there are two vports allocated with their queues and vectors. Moreover, it claims to prevent the driver from running into "bad state", but in fact, any error during the rebuild leaves the old vport in the partially allocated state. Finally, if the interface is down when the function is called, it always allocates a new queue set, but when the user decides to enable the interface later on, vport_open() allocates them once again, IOW there's a clear memory leak here. Just don't allocate a new queue set when performing a reset, that solves crashes and memory leaks. Readd the old queue number and reopen the interface on rollback - that solves limbo states when the device is left disabled and/or without HW queues enabled. Fixes: 02cbfba1add5 ("idpf: add ethtool callbacks") Fixes: e4891e4687c8 ("idpf: split &idpf_queue into 4 strictly-typed queue structures") Signed-off-by: Alexander Lobakin Reviewed-by: Simon Horman Tested-by: Krishneil Singh Signed-off-by: Tony Nguyen Link: https://patch.msgid.link/20240806220923.3359860-2-anthony.l.nguyen@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/idpf/idpf_lib.c | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/intel/idpf/idpf_lib.c b/drivers/net/ethernet/intel/idpf/idpf_lib.c index 5dbf2b4ba1b0..10b884dd3475 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_lib.c +++ b/drivers/net/ethernet/intel/idpf/idpf_lib.c @@ -1335,9 +1335,8 @@ static void idpf_rx_init_buf_tail(struct idpf_vport *vport) /** * idpf_vport_open - Bring up a vport * @vport: vport to bring up - * @alloc_res: allocate queue resources */ -static int idpf_vport_open(struct idpf_vport *vport, bool alloc_res) +static int idpf_vport_open(struct idpf_vport *vport) { struct idpf_netdev_priv *np = netdev_priv(vport->netdev); struct idpf_adapter *adapter = vport->adapter; @@ -1350,11 +1349,9 @@ static int idpf_vport_open(struct idpf_vport *vport, bool alloc_res) /* we do not allow interface up just yet */ netif_carrier_off(vport->netdev); - if (alloc_res) { - err = idpf_vport_queues_alloc(vport); - if (err) - return err; - } + err = idpf_vport_queues_alloc(vport); + if (err) + return err; err = idpf_vport_intr_alloc(vport); if (err) { @@ -1539,7 +1536,7 @@ void idpf_init_task(struct work_struct *work) np = netdev_priv(vport->netdev); np->state = __IDPF_VPORT_DOWN; if (test_and_clear_bit(IDPF_VPORT_UP_REQUESTED, vport_config->flags)) - idpf_vport_open(vport, true); + idpf_vport_open(vport); /* Spawn and return 'idpf_init_task' work queue until all the * default vports are created @@ -1898,9 +1895,6 @@ int idpf_initiate_soft_reset(struct idpf_vport *vport, goto free_vport; } - err = idpf_vport_queues_alloc(new_vport); - if (err) - goto free_vport; if (current_state <= __IDPF_VPORT_DOWN) { idpf_send_delete_queues_msg(vport); } else { @@ -1932,17 +1926,23 @@ int idpf_initiate_soft_reset(struct idpf_vport *vport, err = idpf_set_real_num_queues(vport); if (err) - goto err_reset; + goto err_open; if (current_state == __IDPF_VPORT_UP) - err = idpf_vport_open(vport, false); + err = idpf_vport_open(vport); kfree(new_vport); return err; err_reset: - idpf_vport_queues_rel(new_vport); + idpf_send_add_queues_msg(vport, vport->num_txq, vport->num_complq, + vport->num_rxq, vport->num_bufq); + +err_open: + if (current_state == __IDPF_VPORT_UP) + idpf_vport_open(vport); + free_vport: kfree(new_vport); @@ -2171,7 +2171,7 @@ static int idpf_open(struct net_device *netdev) idpf_vport_ctrl_lock(netdev); vport = idpf_netdev_to_vport(netdev); - err = idpf_vport_open(vport, true); + err = idpf_vport_open(vport); idpf_vport_ctrl_unlock(netdev); -- cgit v1.2.3 From 3cc88e8405b8d55e0ff035e31971aadd6baee2b6 Mon Sep 17 00:00:00 2001 From: Michal Kubiak Date: Tue, 6 Aug 2024 15:09:21 -0700 Subject: idpf: fix memleak in vport interrupt configuration The initialization of vport interrupt consists of two functions: 1) idpf_vport_intr_init() where a generic configuration is done 2) idpf_vport_intr_req_irq() where the irq for each q_vector is requested. The first function used to create a base name for each interrupt using "kasprintf()" call. Unfortunately, although that call allocated memory for a text buffer, that memory was never released. Fix this by removing creating the interrupt base name in 1). Instead, always create a full interrupt name in the function 2), because there is no need to create a base name separately, considering that the function 2) is never called out of idpf_vport_intr_init() context. Fixes: d4d558718266 ("idpf: initialize interrupts and enable vport") Cc: stable@vger.kernel.org # 6.7 Signed-off-by: Michal Kubiak Reviewed-by: Pavan Kumar Linga Signed-off-by: Alexander Lobakin Reviewed-by: Simon Horman Tested-by: Krishneil Singh Signed-off-by: Tony Nguyen Link: https://patch.msgid.link/20240806220923.3359860-3-anthony.l.nguyen@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/idpf/idpf_txrx.c | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_txrx.c index af2879f03b8d..a2f9f252694a 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_txrx.c +++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.c @@ -3780,13 +3780,15 @@ void idpf_vport_intr_update_itr_ena_irq(struct idpf_q_vector *q_vector) /** * idpf_vport_intr_req_irq - get MSI-X vectors from the OS for the vport * @vport: main vport structure - * @basename: name for the vector */ -static int idpf_vport_intr_req_irq(struct idpf_vport *vport, char *basename) +static int idpf_vport_intr_req_irq(struct idpf_vport *vport) { struct idpf_adapter *adapter = vport->adapter; + const char *drv_name, *if_name, *vec_name; int vector, err, irq_num, vidx; - const char *vec_name; + + drv_name = dev_driver_string(&adapter->pdev->dev); + if_name = netdev_name(vport->netdev); for (vector = 0; vector < vport->num_q_vectors; vector++) { struct idpf_q_vector *q_vector = &vport->q_vectors[vector]; @@ -3804,8 +3806,8 @@ static int idpf_vport_intr_req_irq(struct idpf_vport *vport, char *basename) else continue; - name = kasprintf(GFP_KERNEL, "%s-%s-%d", basename, vec_name, - vidx); + name = kasprintf(GFP_KERNEL, "%s-%s-%s-%d", drv_name, if_name, + vec_name, vidx); err = request_irq(irq_num, idpf_vport_intr_clean_queues, 0, name, q_vector); @@ -4326,7 +4328,6 @@ error: */ int idpf_vport_intr_init(struct idpf_vport *vport) { - char *int_name; int err; err = idpf_vport_intr_init_vec_idx(vport); @@ -4340,11 +4341,7 @@ int idpf_vport_intr_init(struct idpf_vport *vport) if (err) goto unroll_vectors_alloc; - int_name = kasprintf(GFP_KERNEL, "%s-%s", - dev_driver_string(&vport->adapter->pdev->dev), - vport->netdev->name); - - err = idpf_vport_intr_req_irq(vport, int_name); + err = idpf_vport_intr_req_irq(vport); if (err) goto unroll_vectors_alloc; -- cgit v1.2.3 From 290f1c033281c1a502a3cd1c53c3a549259c491f Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Tue, 6 Aug 2024 15:09:22 -0700 Subject: idpf: fix UAFs when destroying the queues The second tagged commit started sometimes (very rarely, but possible) throwing WARNs from net/core/page_pool.c:page_pool_disable_direct_recycling(). Turned out idpf frees interrupt vectors with embedded NAPIs *before* freeing the queues making page_pools' NAPI pointers lead to freed memory before these pools are destroyed by libeth. It's not clear whether there are other accesses to the freed vectors when destroying the queues, but anyway, we usually free queue/interrupt vectors only when the queues are destroyed and the NAPIs are guaranteed to not be referenced anywhere. Invert the allocation and freeing logic making queue/interrupt vectors be allocated first and freed last. Vectors don't require queues to be present, so this is safe. Additionally, this change allows to remove that useless queue->q_vector pointer cleanup, as vectors are still valid when freeing the queues (+ both are freed within one function, so it's not clear why nullify the pointers at all). Fixes: 1c325aac10a8 ("idpf: configure resources for TX queues") Fixes: 90912f9f4f2d ("idpf: convert header split mode to libeth + napi_build_skb()") Reported-by: Michal Kubiak Signed-off-by: Alexander Lobakin Reviewed-by: Simon Horman Tested-by: Krishneil Singh Signed-off-by: Tony Nguyen Link: https://patch.msgid.link/20240806220923.3359860-4-anthony.l.nguyen@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/idpf/idpf_lib.c | 24 ++++++++++++------------ drivers/net/ethernet/intel/idpf/idpf_txrx.c | 24 +----------------------- 2 files changed, 13 insertions(+), 35 deletions(-) diff --git a/drivers/net/ethernet/intel/idpf/idpf_lib.c b/drivers/net/ethernet/intel/idpf/idpf_lib.c index 10b884dd3475..0b6c8fd5bc90 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_lib.c +++ b/drivers/net/ethernet/intel/idpf/idpf_lib.c @@ -900,8 +900,8 @@ static void idpf_vport_stop(struct idpf_vport *vport) vport->link_up = false; idpf_vport_intr_deinit(vport); - idpf_vport_intr_rel(vport); idpf_vport_queues_rel(vport); + idpf_vport_intr_rel(vport); np->state = __IDPF_VPORT_DOWN; } @@ -1349,43 +1349,43 @@ static int idpf_vport_open(struct idpf_vport *vport) /* we do not allow interface up just yet */ netif_carrier_off(vport->netdev); - err = idpf_vport_queues_alloc(vport); - if (err) - return err; - err = idpf_vport_intr_alloc(vport); if (err) { dev_err(&adapter->pdev->dev, "Failed to allocate interrupts for vport %u: %d\n", vport->vport_id, err); - goto queues_rel; + return err; } + err = idpf_vport_queues_alloc(vport); + if (err) + goto intr_rel; + err = idpf_vport_queue_ids_init(vport); if (err) { dev_err(&adapter->pdev->dev, "Failed to initialize queue ids for vport %u: %d\n", vport->vport_id, err); - goto intr_rel; + goto queues_rel; } err = idpf_vport_intr_init(vport); if (err) { dev_err(&adapter->pdev->dev, "Failed to initialize interrupts for vport %u: %d\n", vport->vport_id, err); - goto intr_rel; + goto queues_rel; } err = idpf_rx_bufs_init_all(vport); if (err) { dev_err(&adapter->pdev->dev, "Failed to initialize RX buffers for vport %u: %d\n", vport->vport_id, err); - goto intr_rel; + goto queues_rel; } err = idpf_queue_reg_init(vport); if (err) { dev_err(&adapter->pdev->dev, "Failed to initialize queue registers for vport %u: %d\n", vport->vport_id, err); - goto intr_rel; + goto queues_rel; } idpf_rx_init_buf_tail(vport); @@ -1452,10 +1452,10 @@ unmap_queue_vectors: idpf_send_map_unmap_queue_vector_msg(vport, false); intr_deinit: idpf_vport_intr_deinit(vport); -intr_rel: - idpf_vport_intr_rel(vport); queues_rel: idpf_vport_queues_rel(vport); +intr_rel: + idpf_vport_intr_rel(vport); return err; } diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_txrx.c index a2f9f252694a..585c3dadd9bf 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_txrx.c +++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.c @@ -3576,9 +3576,7 @@ static void idpf_vport_intr_napi_dis_all(struct idpf_vport *vport) */ void idpf_vport_intr_rel(struct idpf_vport *vport) { - int i, j, v_idx; - - for (v_idx = 0; v_idx < vport->num_q_vectors; v_idx++) { + for (u32 v_idx = 0; v_idx < vport->num_q_vectors; v_idx++) { struct idpf_q_vector *q_vector = &vport->q_vectors[v_idx]; kfree(q_vector->complq); @@ -3593,26 +3591,6 @@ void idpf_vport_intr_rel(struct idpf_vport *vport) free_cpumask_var(q_vector->affinity_mask); } - /* Clean up the mapping of queues to vectors */ - for (i = 0; i < vport->num_rxq_grp; i++) { - struct idpf_rxq_group *rx_qgrp = &vport->rxq_grps[i]; - - if (idpf_is_queue_model_split(vport->rxq_model)) - for (j = 0; j < rx_qgrp->splitq.num_rxq_sets; j++) - rx_qgrp->splitq.rxq_sets[j]->rxq.q_vector = NULL; - else - for (j = 0; j < rx_qgrp->singleq.num_rxq; j++) - rx_qgrp->singleq.rxqs[j]->q_vector = NULL; - } - - if (idpf_is_queue_model_split(vport->txq_model)) - for (i = 0; i < vport->num_txq_grp; i++) - vport->txq_grps[i].complq->q_vector = NULL; - else - for (i = 0; i < vport->num_txq_grp; i++) - for (j = 0; j < vport->txq_grps[i].num_txq; j++) - vport->txq_grps[i].txqs[j]->q_vector = NULL; - kfree(vport->q_vectors); vport->q_vectors = NULL; } -- cgit v1.2.3 From 85ba108a529d99c82e814eaf782a9443acf5eaed Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Tue, 6 Aug 2024 14:08:41 +0100 Subject: net: stmmac: dwmac4: fix PCS duplex mode decode dwmac4 was decoding the duplex mode from the GMAC_PHYIF_CONTROL_STATUS register incorrectly, using GMAC_PHYIF_CTRLSTATUS_LNKMOD_MASK (value 1) rather than GMAC_PHYIF_CTRLSTATUS_LNKMOD (bit 16). Fix this. Fixes: 70523e639bf8c ("drivers: net: stmmac: reworking the PCS code.") Reviewed-by: Andrew Halaney Reviewed-by: Andrew Lunn Reviewed-by: Serge Semin Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1sbJvd-001rGD-E3@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/dwmac4.h | 2 -- drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4.h b/drivers/net/ethernet/stmicro/stmmac/dwmac4.h index d3c5306f1c41..93a78fd0737b 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4.h +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4.h @@ -573,8 +573,6 @@ static inline u32 mtl_low_credx_base_addr(const struct dwmac4_addrs *addrs, #define GMAC_PHYIF_CTRLSTATUS_LNKSTS BIT(19) #define GMAC_PHYIF_CTRLSTATUS_JABTO BIT(20) #define GMAC_PHYIF_CTRLSTATUS_FALSECARDET BIT(21) -/* LNKMOD */ -#define GMAC_PHYIF_CTRLSTATUS_LNKMOD_MASK 0x1 /* LNKSPEED */ #define GMAC_PHYIF_CTRLSTATUS_SPEED_125 0x2 #define GMAC_PHYIF_CTRLSTATUS_SPEED_25 0x1 diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c index f98741d2607e..31c387cc5f26 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c @@ -786,7 +786,7 @@ static void dwmac4_phystatus(void __iomem *ioaddr, struct stmmac_extra_stats *x) else x->pcs_speed = SPEED_10; - x->pcs_duplex = (status & GMAC_PHYIF_CTRLSTATUS_LNKMOD_MASK); + x->pcs_duplex = (status & GMAC_PHYIF_CTRLSTATUS_LNKMOD); pr_info("Link is Up - %d/%s\n", (int)x->pcs_speed, x->pcs_duplex ? "Full" : "Half"); -- cgit v1.2.3 From 86a41ea9fd79ddb6145cb8ebf5aeafceabca6f7d Mon Sep 17 00:00:00 2001 From: James Chapman Date: Tue, 6 Aug 2024 17:06:26 +0100 Subject: l2tp: fix lockdep splat When l2tp tunnels use a socket provided by userspace, we can hit lockdep splats like the below when data is transmitted through another (unrelated) userspace socket which then gets routed over l2tp. This issue was previously discussed here: https://lore.kernel.org/netdev/87sfialu2n.fsf@cloudflare.com/ The solution is to have lockdep treat socket locks of l2tp tunnel sockets separately than those of standard INET sockets. To do so, use a different lockdep subclass where lock nesting is possible. ============================================ WARNING: possible recursive locking detected 6.10.0+ #34 Not tainted -------------------------------------------- iperf3/771 is trying to acquire lock: ffff8881027601d8 (slock-AF_INET/1){+.-.}-{2:2}, at: l2tp_xmit_skb+0x243/0x9d0 but task is already holding lock: ffff888102650d98 (slock-AF_INET/1){+.-.}-{2:2}, at: tcp_v4_rcv+0x1848/0x1e10 other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(slock-AF_INET/1); lock(slock-AF_INET/1); *** DEADLOCK *** May be due to missing lock nesting notation 10 locks held by iperf3/771: #0: ffff888102650258 (sk_lock-AF_INET){+.+.}-{0:0}, at: tcp_sendmsg+0x1a/0x40 #1: ffffffff822ac220 (rcu_read_lock){....}-{1:2}, at: __ip_queue_xmit+0x4b/0xbc0 #2: ffffffff822ac220 (rcu_read_lock){....}-{1:2}, at: ip_finish_output2+0x17a/0x1130 #3: ffffffff822ac220 (rcu_read_lock){....}-{1:2}, at: process_backlog+0x28b/0x9f0 #4: ffffffff822ac220 (rcu_read_lock){....}-{1:2}, at: ip_local_deliver_finish+0xf9/0x260 #5: ffff888102650d98 (slock-AF_INET/1){+.-.}-{2:2}, at: tcp_v4_rcv+0x1848/0x1e10 #6: ffffffff822ac220 (rcu_read_lock){....}-{1:2}, at: __ip_queue_xmit+0x4b/0xbc0 #7: ffffffff822ac220 (rcu_read_lock){....}-{1:2}, at: ip_finish_output2+0x17a/0x1130 #8: ffffffff822ac1e0 (rcu_read_lock_bh){....}-{1:2}, at: __dev_queue_xmit+0xcc/0x1450 #9: ffff888101f33258 (dev->qdisc_tx_busylock ?: &qdisc_tx_busylock#2){+...}-{2:2}, at: __dev_queue_xmit+0x513/0x1450 stack backtrace: CPU: 2 UID: 0 PID: 771 Comm: iperf3 Not tainted 6.10.0+ #34 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1 04/01/2014 Call Trace: dump_stack_lvl+0x69/0xa0 dump_stack+0xc/0x20 __lock_acquire+0x135d/0x2600 ? srso_alias_return_thunk+0x5/0xfbef5 lock_acquire+0xc4/0x2a0 ? l2tp_xmit_skb+0x243/0x9d0 ? __skb_checksum+0xa3/0x540 _raw_spin_lock_nested+0x35/0x50 ? l2tp_xmit_skb+0x243/0x9d0 l2tp_xmit_skb+0x243/0x9d0 l2tp_eth_dev_xmit+0x3c/0xc0 dev_hard_start_xmit+0x11e/0x420 sch_direct_xmit+0xc3/0x640 __dev_queue_xmit+0x61c/0x1450 ? ip_finish_output2+0xf4c/0x1130 ip_finish_output2+0x6b6/0x1130 ? srso_alias_return_thunk+0x5/0xfbef5 ? __ip_finish_output+0x217/0x380 ? srso_alias_return_thunk+0x5/0xfbef5 __ip_finish_output+0x217/0x380 ip_output+0x99/0x120 __ip_queue_xmit+0xae4/0xbc0 ? srso_alias_return_thunk+0x5/0xfbef5 ? srso_alias_return_thunk+0x5/0xfbef5 ? tcp_options_write.constprop.0+0xcb/0x3e0 ip_queue_xmit+0x34/0x40 __tcp_transmit_skb+0x1625/0x1890 __tcp_send_ack+0x1b8/0x340 tcp_send_ack+0x23/0x30 __tcp_ack_snd_check+0xa8/0x530 ? srso_alias_return_thunk+0x5/0xfbef5 tcp_rcv_established+0x412/0xd70 tcp_v4_do_rcv+0x299/0x420 tcp_v4_rcv+0x1991/0x1e10 ip_protocol_deliver_rcu+0x50/0x220 ip_local_deliver_finish+0x158/0x260 ip_local_deliver+0xc8/0xe0 ip_rcv+0xe5/0x1d0 ? __pfx_ip_rcv+0x10/0x10 __netif_receive_skb_one_core+0xce/0xe0 ? process_backlog+0x28b/0x9f0 __netif_receive_skb+0x34/0xd0 ? process_backlog+0x28b/0x9f0 process_backlog+0x2cb/0x9f0 __napi_poll.constprop.0+0x61/0x280 net_rx_action+0x332/0x670 ? srso_alias_return_thunk+0x5/0xfbef5 ? find_held_lock+0x2b/0x80 ? srso_alias_return_thunk+0x5/0xfbef5 ? srso_alias_return_thunk+0x5/0xfbef5 handle_softirqs+0xda/0x480 ? __dev_queue_xmit+0xa2c/0x1450 do_softirq+0xa1/0xd0 __local_bh_enable_ip+0xc8/0xe0 ? __dev_queue_xmit+0xa2c/0x1450 __dev_queue_xmit+0xa48/0x1450 ? ip_finish_output2+0xf4c/0x1130 ip_finish_output2+0x6b6/0x1130 ? srso_alias_return_thunk+0x5/0xfbef5 ? __ip_finish_output+0x217/0x380 ? srso_alias_return_thunk+0x5/0xfbef5 __ip_finish_output+0x217/0x380 ip_output+0x99/0x120 __ip_queue_xmit+0xae4/0xbc0 ? srso_alias_return_thunk+0x5/0xfbef5 ? srso_alias_return_thunk+0x5/0xfbef5 ? tcp_options_write.constprop.0+0xcb/0x3e0 ip_queue_xmit+0x34/0x40 __tcp_transmit_skb+0x1625/0x1890 tcp_write_xmit+0x766/0x2fb0 ? __entry_text_end+0x102ba9/0x102bad ? srso_alias_return_thunk+0x5/0xfbef5 ? __might_fault+0x74/0xc0 ? srso_alias_return_thunk+0x5/0xfbef5 __tcp_push_pending_frames+0x56/0x190 tcp_push+0x117/0x310 tcp_sendmsg_locked+0x14c1/0x1740 tcp_sendmsg+0x28/0x40 inet_sendmsg+0x5d/0x90 sock_write_iter+0x242/0x2b0 vfs_write+0x68d/0x800 ? __pfx_sock_write_iter+0x10/0x10 ksys_write+0xc8/0xf0 __x64_sys_write+0x3d/0x50 x64_sys_call+0xfaf/0x1f50 do_syscall_64+0x6d/0x140 entry_SYSCALL_64_after_hwframe+0x76/0x7e RIP: 0033:0x7f4d143af992 Code: c3 8b 07 85 c0 75 24 49 89 fb 48 89 f0 48 89 d7 48 89 ce 4c 89 c2 4d 89 ca 4c 8b 44 24 08 4c 8b 4c 24 10 4c 89 5c 24 08 0f 05 e9 01 cc ff ff 41 54 b8 02 00 00 0 RSP: 002b:00007ffd65032058 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 RAX: ffffffffffffffda RBX: 0000000000000001 RCX: 00007f4d143af992 RDX: 0000000000000025 RSI: 00007f4d143f3bcc RDI: 0000000000000005 RBP: 00007f4d143f2b28 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 00007f4d143f3bcc R13: 0000000000000005 R14: 0000000000000000 R15: 00007ffd650323f0 Fixes: 0b2c59720e65 ("l2tp: close all race conditions in l2tp_tunnel_register()") Suggested-by: Eric Dumazet Reported-by: syzbot+6acef9e0a4d1f46c83d4@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=6acef9e0a4d1f46c83d4 CC: gnault@redhat.com CC: cong.wang@bytedance.com Signed-off-by: James Chapman Signed-off-by: Tom Parkin Link: https://patch.msgid.link/20240806160626.1248317-1-jchapman@katalix.com Signed-off-by: Jakub Kicinski --- net/l2tp/l2tp_core.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index c80ab3f26084..2e86f520f799 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -86,6 +86,11 @@ /* Default trace flags */ #define L2TP_DEFAULT_DEBUG_FLAGS 0 +#define L2TP_DEPTH_NESTING 2 +#if L2TP_DEPTH_NESTING == SINGLE_DEPTH_NESTING +#error "L2TP requires its own lockdep subclass" +#endif + /* Private data stored for received packets in the skb. */ struct l2tp_skb_cb { @@ -1124,7 +1129,13 @@ static int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb, uns IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED | IPSKB_REROUTED); nf_reset_ct(skb); - bh_lock_sock_nested(sk); + /* L2TP uses its own lockdep subclass to avoid lockdep splats caused by + * nested socket calls on the same lockdep socket class. This can + * happen when data from a user socket is routed over l2tp, which uses + * another userspace socket. + */ + spin_lock_nested(&sk->sk_lock.slock, L2TP_DEPTH_NESTING); + if (sock_owned_by_user(sk)) { kfree_skb(skb); ret = NET_XMIT_DROP; @@ -1176,7 +1187,7 @@ static int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb, uns ret = l2tp_xmit_queue(tunnel, skb, &inet->cork.fl); out_unlock: - bh_unlock_sock(sk); + spin_unlock(&sk->sk_lock.slock); return ret; } -- cgit v1.2.3 From 9ee09edc05f20422e7ced84b1f8a5d3359926ac8 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 6 Aug 2024 10:56:59 -0700 Subject: net: bcmgenet: Properly overlay PHY and MAC Wake-on-LAN capabilities Some Wake-on-LAN modes such as WAKE_FILTER may only be supported by the MAC, while others might be only supported by the PHY. Make sure that the .get_wol() returns the union of both rather than only that of the PHY if the PHY supports Wake-on-LAN. Fixes: 7e400ff35cbe ("net: bcmgenet: Add support for PHY-based Wake-on-LAN") Signed-off-by: Florian Fainelli Link: https://patch.msgid.link/20240806175659.3232204-1-florian.fainelli@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c b/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c index 1248792d7fd4..0715ea5bf13e 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c @@ -42,19 +42,15 @@ void bcmgenet_get_wol(struct net_device *dev, struct ethtool_wolinfo *wol) struct bcmgenet_priv *priv = netdev_priv(dev); struct device *kdev = &priv->pdev->dev; - if (dev->phydev) { + if (dev->phydev) phy_ethtool_get_wol(dev->phydev, wol); - if (wol->supported) - return; - } - if (!device_can_wakeup(kdev)) { - wol->supported = 0; - wol->wolopts = 0; + /* MAC is not wake-up capable, return what the PHY does */ + if (!device_can_wakeup(kdev)) return; - } - wol->supported = WAKE_MAGIC | WAKE_MAGICSECURE | WAKE_FILTER; + /* Overlay MAC capabilities with that of the PHY queried before */ + wol->supported |= WAKE_MAGIC | WAKE_MAGICSECURE | WAKE_FILTER; wol->wolopts = priv->wolopts; memset(wol->sopass, 0, sizeof(wol->sopass)); -- cgit v1.2.3 From 8fee6d5ad5fa18c270eedb2a2cdf58dbadefb94b Mon Sep 17 00:00:00 2001 From: "Csókás, Bence" Date: Wed, 7 Aug 2024 10:09:56 +0200 Subject: net: fec: Stop PPS on driver remove MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PPS was not stopped in `fec_ptp_stop()`, called when the adapter was removed. Consequentially, you couldn't safely reload the driver with the PPS signal on. Fixes: 32cba57ba74b ("net: fec: introduce fec_ptp_stop and use in probe fail path") Reviewed-by: Fabio Estevam Link: https://lore.kernel.org/netdev/CAOMZO5BzcZR8PwKKwBssQq_wAGzVgf1ffwe_nhpQJjviTdxy-w@mail.gmail.com/T/#m01dcb810bfc451a492140f6797ca77443d0cb79f Signed-off-by: Csókás, Bence Reviewed-by: Andrew Lunn Reviewed-by: Frank Li Link: https://patch.msgid.link/20240807080956.2556602-1-csokas.bence@prolan.hu Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/fec_ptp.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/freescale/fec_ptp.c b/drivers/net/ethernet/freescale/fec_ptp.c index e32f6724f568..2e4f3e1782a2 100644 --- a/drivers/net/ethernet/freescale/fec_ptp.c +++ b/drivers/net/ethernet/freescale/fec_ptp.c @@ -775,6 +775,9 @@ void fec_ptp_stop(struct platform_device *pdev) struct net_device *ndev = platform_get_drvdata(pdev); struct fec_enet_private *fep = netdev_priv(ndev); + if (fep->pps_enable) + fec_ptp_enable_pps(fep, 0); + cancel_delayed_work_sync(&fep->time_keep); hrtimer_cancel(&fep->perout_timer); if (fep->ptp_clock) -- cgit v1.2.3 From a70b637db15b4de25af3c5946c4399144b3bc241 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 7 Aug 2024 09:54:22 +0200 Subject: net: pse-pd: tps23881: include missing bitfield.h header Using FIELD_GET() fails in configurations that don't already include the header file indirectly: drivers/net/pse-pd/tps23881.c: In function 'tps23881_i2c_probe': drivers/net/pse-pd/tps23881.c:755:13: error: implicit declaration of function 'FIELD_GET' [-Wimplicit-function-declaration] 755 | if (FIELD_GET(TPS23881_REG_DEVID_MASK, ret) != TPS23881_DEVICE_ID) { | ^~~~~~~~~ Fixes: 89108cb5c285 ("net: pse-pd: tps23881: Fix the device ID check") Signed-off-by: Arnd Bergmann Acked-by: Oleksij Rempel Link: https://patch.msgid.link/20240807075455.2055224-1-arnd@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/pse-pd/tps23881.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/pse-pd/tps23881.c b/drivers/net/pse-pd/tps23881.c index f90db758554b..2ea75686a319 100644 --- a/drivers/net/pse-pd/tps23881.c +++ b/drivers/net/pse-pd/tps23881.c @@ -5,6 +5,7 @@ * Copyright (c) 2023 Bootlin, Kory Maincent */ +#include #include #include #include -- cgit v1.2.3 From b54de55990b0467538c6bb33523b28816384958a Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Wed, 7 Aug 2024 17:06:12 +0100 Subject: net: ethtool: fix off-by-one error in max RSS context IDs Both ethtool_ops.rxfh_max_context_id and the default value used when it's not specified are supposed to be exclusive maxima (the former is documented as such; the latter, U32_MAX, cannot be used as an ID since it equals ETH_RXFH_CONTEXT_ALLOC), but xa_alloc() expects an inclusive maximum. Subtract one from 'limit' to produce an inclusive maximum, and pass that to xa_alloc(). Increase bnxt's max by one to prevent a (very minor) regression, as BNXT_MAX_ETH_RSS_CTX is an inclusive max. This is safe since bnxt is not actually hard-limited; BNXT_MAX_ETH_RSS_CTX is just a leftover from old driver code that managed context IDs itself. Rename rxfh_max_context_id to rxfh_max_num_contexts to make its semantics (hopefully) more obvious. Fixes: 847a8ab18676 ("net: ethtool: let the core choose RSS context IDs") Signed-off-by: Edward Cree Link: https://patch.msgid.link/5a2d11a599aa5b0cc6141072c01accfb7758650c.1723045898.git.ecree.xilinx@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 2 +- include/linux/ethtool.h | 10 +++++----- net/ethtool/ioctl.c | 5 +++-- 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index ab8e3f197e7b..9dadc89378f0 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -5290,7 +5290,7 @@ void bnxt_ethtool_free(struct bnxt *bp) const struct ethtool_ops bnxt_ethtool_ops = { .cap_link_lanes_supported = 1, .cap_rss_ctx_supported = 1, - .rxfh_max_context_id = BNXT_MAX_ETH_RSS_CTX, + .rxfh_max_num_contexts = BNXT_MAX_ETH_RSS_CTX + 1, .rxfh_indir_space = BNXT_MAX_RSS_TABLE_ENTRIES_P5, .rxfh_priv_size = sizeof(struct bnxt_rss_ctx), .supported_coalesce_params = ETHTOOL_COALESCE_USECS | diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 303fda54ef17..989c94eddb2b 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -736,10 +736,10 @@ struct kernel_ethtool_ts_info { * @rxfh_key_space: same as @rxfh_indir_space, but for the key. * @rxfh_priv_size: size of the driver private data area the core should * allocate for an RSS context (in &struct ethtool_rxfh_context). - * @rxfh_max_context_id: maximum (exclusive) supported RSS context ID. If this - * is zero then the core may choose any (nonzero) ID, otherwise the core - * will only use IDs strictly less than this value, as the @rss_context - * argument to @create_rxfh_context and friends. + * @rxfh_max_num_contexts: maximum (exclusive) supported RSS context ID. + * If this is zero then the core may choose any (nonzero) ID, otherwise + * the core will only use IDs strictly less than this value, as the + * @rss_context argument to @create_rxfh_context and friends. * @supported_coalesce_params: supported types of interrupt coalescing. * @supported_ring_params: supported ring params. * @get_drvinfo: Report driver/device information. Modern drivers no @@ -954,7 +954,7 @@ struct ethtool_ops { u32 rxfh_indir_space; u16 rxfh_key_space; u16 rxfh_priv_size; - u32 rxfh_max_context_id; + u32 rxfh_max_num_contexts; u32 supported_coalesce_params; u32 supported_ring_params; void (*get_drvinfo)(struct net_device *, struct ethtool_drvinfo *); diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index 8ca13208d240..a8e276ecf723 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -1449,12 +1449,13 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, } if (ops->create_rxfh_context) { - u32 limit = ops->rxfh_max_context_id ?: U32_MAX; + u32 limit = ops->rxfh_max_num_contexts ?: U32_MAX; u32 ctx_id; /* driver uses new API, core allocates ID */ ret = xa_alloc(&dev->ethtool->rss_ctx, &ctx_id, ctx, - XA_LIMIT(1, limit), GFP_KERNEL_ACCOUNT); + XA_LIMIT(1, limit - 1), + GFP_KERNEL_ACCOUNT); if (ret < 0) { kfree(ctx); goto out; -- cgit v1.2.3 From 4d7c3c1aba3ca12fad2e90163b8d5153363f93e5 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Wed, 7 Aug 2024 20:33:52 +0300 Subject: ethtool: Fix context creation with no parameters The 'at least one change' requirement is not applicable for context creation, skip the check in such case. This allows a command such as 'ethtool -X eth0 context new' to work. The command works by mistake when using older versions of userspace ethtool due to an incompatibility issue where rxfh.input_xfrm is passed as zero (unset) instead of RXH_XFRM_NO_CHANGE as done with recent userspace. This patch does not try to solve the incompatibility issue. Link: https://lore.kernel.org/netdev/05ae8316-d3aa-4356-98c6-55ed4253c8a7@nvidia.com/ Fixes: 84a1d9c48200 ("net: ethtool: extend RXNFC API to support RSS spreading of filter matches") Reviewed-by: Dragos Tatulea Reviewed-by: Jianbo Liu Signed-off-by: Gal Pressman Reviewed-by: Edward Cree Link: https://patch.msgid.link/20240807173352.3501746-1-gal@nvidia.com Signed-off-by: Jakub Kicinski --- net/ethtool/ioctl.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index a8e276ecf723..e18823bf2330 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -1369,14 +1369,17 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, return -EOPNOTSUPP; create = rxfh.rss_context == ETH_RXFH_CONTEXT_ALLOC; - /* If either indir, hash key or function is valid, proceed further. - * Must request at least one change: indir size, hash key, function - * or input transformation. - */ if ((rxfh.indir_size && rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE && rxfh.indir_size != dev_indir_size) || - (rxfh.key_size && (rxfh.key_size != dev_key_size)) || + (rxfh.key_size && rxfh.key_size != dev_key_size)) + return -EINVAL; + + /* Must request at least one change: indir size, hash key, function + * or input transformation. + * There's no need for any of it in case of context creation. + */ + if (!create && (rxfh.indir_size == ETH_RXFH_INDIR_NO_CHANGE && rxfh.key_size == 0 && rxfh.hfunc == ETH_RSS_HASH_NO_CHANGE && rxfh.input_xfrm == RXH_XFRM_NO_CHANGE)) -- cgit v1.2.3 From 0411f73c13afcf619d7aa7546edbc5710a871cae Mon Sep 17 00:00:00 2001 From: Martin Whitaker Date: Wed, 7 Aug 2024 21:52:09 +0100 Subject: net: dsa: microchip: disable EEE for KSZ8567/KSZ9567/KSZ9896/KSZ9897. As noted in the device errata [1-8], EEE support is not fully operational in the KSZ8567, KSZ9477, KSZ9567, KSZ9896, and KSZ9897 devices, causing link drops when connected to another device that supports EEE. The patch series "net: add EEE support for KSZ9477 switch family" merged in commit 9b0bf4f77162 caused EEE support to be enabled in these devices. A fix for this regression for the KSZ9477 alone was merged in commit 08c6d8bae48c2. This patch extends this fix to the other affected devices. [1] https://ww1.microchip.com/downloads/aemDocuments/documents/UNG/ProductDocuments/Errata/KSZ8567R-Errata-DS80000752.pdf [2] https://ww1.microchip.com/downloads/aemDocuments/documents/UNG/ProductDocuments/Errata/KSZ8567S-Errata-DS80000753.pdf [3] https://ww1.microchip.com/downloads/aemDocuments/documents/UNG/ProductDocuments/Errata/KSZ9477S-Errata-DS80000754.pdf [4] https://ww1.microchip.com/downloads/aemDocuments/documents/UNG/ProductDocuments/Errata/KSZ9567R-Errata-DS80000755.pdf [5] https://ww1.microchip.com/downloads/aemDocuments/documents/UNG/ProductDocuments/Errata/KSZ9567S-Errata-DS80000756.pdf [6] https://ww1.microchip.com/downloads/aemDocuments/documents/UNG/ProductDocuments/Errata/KSZ9896C-Errata-DS80000757.pdf [7] https://ww1.microchip.com/downloads/aemDocuments/documents/UNG/ProductDocuments/Errata/KSZ9897R-Errata-DS80000758.pdf [8] https://ww1.microchip.com/downloads/aemDocuments/documents/UNG/ProductDocuments/Errata/KSZ9897S-Errata-DS80000759.pdf Fixes: 69d3b36ca045 ("net: dsa: microchip: enable EEE support") # for KSZ8567/KSZ9567/KSZ9896/KSZ9897 Link: https://lore.kernel.org/netdev/137ce1ee-0b68-4c96-a717-c8164b514eec@martin-whitaker.me.uk/ Signed-off-by: Martin Whitaker Acked-by: Arun Ramadoss Reviewed-by: Oleksij Rempel Reviewed-by: Lukasz Majewski Link: https://patch.msgid.link/20240807205209.21464-1-foss@martin-whitaker.me.uk Signed-off-by: Jakub Kicinski --- drivers/net/dsa/microchip/ksz_common.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c index b120e66d5669..1491099528be 100644 --- a/drivers/net/dsa/microchip/ksz_common.c +++ b/drivers/net/dsa/microchip/ksz_common.c @@ -2578,7 +2578,11 @@ static u32 ksz_get_phy_flags(struct dsa_switch *ds, int port) if (!port) return MICREL_KSZ8_P1_ERRATA; break; + case KSZ8567_CHIP_ID: case KSZ9477_CHIP_ID: + case KSZ9567_CHIP_ID: + case KSZ9896_CHIP_ID: + case KSZ9897_CHIP_ID: /* KSZ9477 Errata DS80000754C * * Module 4: Energy Efficient Ethernet (EEE) feature select must @@ -2588,6 +2592,13 @@ static u32 ksz_get_phy_flags(struct dsa_switch *ds, int port) * controls. If not disabled, the PHY ports can auto-negotiate * to enable EEE, and this feature can cause link drops when * linked to another device supporting EEE. + * + * The same item appears in the errata for the KSZ9567, KSZ9896, + * and KSZ9897. + * + * A similar item appears in the errata for the KSZ8567, but + * provides an alternative workaround. For now, use the simple + * workaround of disabling the EEE feature for this device too. */ return MICREL_NO_EEE; } -- cgit v1.2.3