From 96628951869c0dedf0377adca01c8675172d8639 Mon Sep 17 00:00:00 2001 From: Peilin Ye Date: Thu, 8 Sep 2022 16:15:23 -0700 Subject: tcp: Use WARN_ON_ONCE() in tcp_read_skb() Prevent tcp_read_skb() from flooding the syslog. Suggested-by: Jakub Sitnicki Signed-off-by: Peilin Ye Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv4') diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 6cdfce6f2867..3488388eea5d 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1766,7 +1766,7 @@ int tcp_read_skb(struct sock *sk, skb_read_actor_t recv_actor) return 0; __skb_unlink(skb, &sk->sk_receive_queue); - WARN_ON(!skb_set_owner_sk_safe(skb, sk)); + WARN_ON_ONCE(!skb_set_owner_sk_safe(skb, sk)); copied = recv_actor(sk, skb); if (copied >= 0) { seq += copied; -- cgit v1.2.3 From db4192a754ebd52300a28abe1a50dd18eae0eb12 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Mon, 12 Sep 2022 10:35:53 -0700 Subject: tcp: read multiple skbs in tcp_read_skb() Before we switched to ->read_skb(), ->read_sock() was passed with desc.count=1, which technically indicates we only read one skb per ->sk_data_ready() call. However, for TCP, this is not true. TCP at least has sk_rcvlowat which intentionally holds skb's in receive queue until this watermark is reached. This means when ->sk_data_ready() is invoked there could be multiple skb's in the queue, therefore we have to read multiple skbs in tcp_read_skb() instead of one. Fixes: 965b57b469a5 ("net: Introduce a new proto_ops ->read_skb()") Reported-by: Peilin Ye Cc: John Fastabend Cc: Jakub Sitnicki Cc: Eric Dumazet Signed-off-by: Cong Wang Link: https://lore.kernel.org/r/20220912173553.235838-1-xiyou.wangcong@gmail.com Signed-off-by: Paolo Abeni --- net/ipv4/tcp.c | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 3488388eea5d..e373dde1f46f 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1761,19 +1761,28 @@ int tcp_read_skb(struct sock *sk, skb_read_actor_t recv_actor) if (sk->sk_state == TCP_LISTEN) return -ENOTCONN; - skb = tcp_recv_skb(sk, seq, &offset); - if (!skb) - return 0; + while ((skb = tcp_recv_skb(sk, seq, &offset)) != NULL) { + u8 tcp_flags; + int used; - __skb_unlink(skb, &sk->sk_receive_queue); - WARN_ON_ONCE(!skb_set_owner_sk_safe(skb, sk)); - copied = recv_actor(sk, skb); - if (copied >= 0) { - seq += copied; - if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) + __skb_unlink(skb, &sk->sk_receive_queue); + WARN_ON_ONCE(!skb_set_owner_sk_safe(skb, sk)); + tcp_flags = TCP_SKB_CB(skb)->tcp_flags; + used = recv_actor(sk, skb); + consume_skb(skb); + if (used < 0) { + if (!copied) + copied = used; + break; + } + seq += used; + copied += used; + + if (tcp_flags & TCPHDR_FIN) { ++seq; + break; + } } - consume_skb(skb); WRITE_ONCE(tp->copied_seq, seq); tcp_rcv_space_adjust(sk); -- cgit v1.2.3 From b07a9b26e2b1aa3711fd6935eccb08a463b1fb11 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 14 Sep 2022 10:53:38 +0300 Subject: ipmr: Always call ip{,6}_mr_forward() from RCU read-side critical section These functions expect to be called from RCU read-side critical section, but this only happens when invoked from the data path via ip{,6}_mr_input(). They can also be invoked from process context in response to user space adding a multicast route which resolves a cache entry with queued packets [1][2]. Fix by adding missing rcu_read_lock() / rcu_read_unlock() in these call paths. [1] WARNING: suspicious RCU usage 6.0.0-rc3-custom-15969-g049d233c8bcc-dirty #1387 Not tainted ----------------------------- net/ipv4/ipmr.c:84 suspicious rcu_dereference_check() usage! other info that might help us debug this: rcu_scheduler_active = 2, debug_locks = 1 1 lock held by smcrouted/246: #0: ffffffff862389b0 (rtnl_mutex){+.+.}-{3:3}, at: ip_mroute_setsockopt+0x11c/0x1420 stack backtrace: CPU: 0 PID: 246 Comm: smcrouted Not tainted 6.0.0-rc3-custom-15969-g049d233c8bcc-dirty #1387 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.0-1.fc36 04/01/2014 Call Trace: dump_stack_lvl+0x91/0xb9 vif_dev_read+0xbf/0xd0 ipmr_queue_xmit+0x135/0x1ab0 ip_mr_forward+0xe7b/0x13d0 ipmr_mfc_add+0x1a06/0x2ad0 ip_mroute_setsockopt+0x5c1/0x1420 do_ip_setsockopt+0x23d/0x37f0 ip_setsockopt+0x56/0x80 raw_setsockopt+0x219/0x290 __sys_setsockopt+0x236/0x4d0 __x64_sys_setsockopt+0xbe/0x160 do_syscall_64+0x34/0x80 entry_SYSCALL_64_after_hwframe+0x63/0xcd [2] WARNING: suspicious RCU usage 6.0.0-rc3-custom-15969-g049d233c8bcc-dirty #1387 Not tainted ----------------------------- net/ipv6/ip6mr.c:69 suspicious rcu_dereference_check() usage! other info that might help us debug this: rcu_scheduler_active = 2, debug_locks = 1 1 lock held by smcrouted/246: #0: ffffffff862389b0 (rtnl_mutex){+.+.}-{3:3}, at: ip6_mroute_setsockopt+0x6b9/0x2630 stack backtrace: CPU: 1 PID: 246 Comm: smcrouted Not tainted 6.0.0-rc3-custom-15969-g049d233c8bcc-dirty #1387 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.0-1.fc36 04/01/2014 Call Trace: dump_stack_lvl+0x91/0xb9 vif_dev_read+0xbf/0xd0 ip6mr_forward2.isra.0+0xc9/0x1160 ip6_mr_forward+0xef0/0x13f0 ip6mr_mfc_add+0x1ff2/0x31f0 ip6_mroute_setsockopt+0x1825/0x2630 do_ipv6_setsockopt+0x462/0x4440 ipv6_setsockopt+0x105/0x140 rawv6_setsockopt+0xd8/0x690 __sys_setsockopt+0x236/0x4d0 __x64_sys_setsockopt+0xbe/0x160 do_syscall_64+0x34/0x80 entry_SYSCALL_64_after_hwframe+0x63/0xcd Fixes: ebc3197963fc ("ipmr: add rcu protection over (struct vif_device)->dev") Signed-off-by: Ido Schimmel Signed-off-by: Jakub Kicinski --- net/ipv4/ipmr.c | 2 ++ net/ipv6/ip6mr.c | 5 ++++- 2 files changed, 6 insertions(+), 1 deletion(-) (limited to 'net/ipv4') diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 73651d17e51f..e11d6b0b62b7 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1004,7 +1004,9 @@ static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt, rtnl_unicast(skb, net, NETLINK_CB(skb).portid); } else { + rcu_read_lock(); ip_mr_forward(net, mrt, skb->dev, skb, c, 0); + rcu_read_unlock(); } } } diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index a9ba41648e36..858fd8a28b5b 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -1028,8 +1028,11 @@ static void ip6mr_cache_resolve(struct net *net, struct mr_table *mrt, ((struct nlmsgerr *)nlmsg_data(nlh))->error = -EMSGSIZE; } rtnl_unicast(skb, net, NETLINK_CB(skb).portid); - } else + } else { + rcu_read_lock(); ip6_mr_forward(net, mrt, skb->dev, skb, c); + rcu_read_unlock(); + } } } -- cgit v1.2.3 From db39dfdc1c3bd9da273902da12f01973792ff911 Mon Sep 17 00:00:00 2001 From: Peilin Ye Date: Tue, 20 Sep 2022 17:59:15 -0700 Subject: udp: Use WARN_ON_ONCE() in udp_read_skb() Prevent udp_read_skb() from flooding the syslog. Suggested-by: Jakub Sitnicki Signed-off-by: Peilin Ye Link: https://lore.kernel.org/r/20220921005915.2697-1-yepeilin.cs@gmail.com Signed-off-by: Jakub Kicinski --- net/ipv4/udp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv4') diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index cd72158e953a..560d9eadeaa5 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1821,7 +1821,7 @@ int udp_read_skb(struct sock *sk, skb_read_actor_t recv_actor) continue; } - WARN_ON(!skb_set_owner_sk_safe(skb, sk)); + WARN_ON_ONCE(!skb_set_owner_sk_safe(skb, sk)); used = recv_actor(sk, skb); if (used <= 0) { if (!copied) -- cgit v1.2.3