diff options
author | Eric Dumazet <eric.dumazet@gmail.com> | 2010-04-27 15:13:20 -0700 |
---|---|---|
committer | Paul Gortmaker <paul.gortmaker@windriver.com> | 2012-03-14 10:58:05 -0400 |
commit | a31deff4d20232dfc8370f0320e8b0925f5655b0 (patch) | |
tree | 4e41be5e40ec6982ee4b6f8d11ec6a9b6bd56998 | |
parent | 56defd4dc08f361bdf74c6cb457aebb1ccf442d1 (diff) | |
download | lwn-a31deff4d20232dfc8370f0320e8b0925f5655b0.tar.gz lwn-a31deff4d20232dfc8370f0320e8b0925f5655b0.zip |
net: sk_add_backlog() take rmem_alloc into account
commit c377411f2494a931ff7facdbb3a6839b1266bcf6 upstream.
Current socket backlog limit is not enough to really stop DDOS attacks,
because user thread spend many time to process a full backlog each
round, and user might crazy spin on socket lock.
We should add backlog size and receive_queue size (aka rmem_alloc) to
pace writers, and let user run without being slow down too much.
Introduce a sk_rcvqueues_full() helper, to avoid taking socket lock in
stress situations.
Under huge stress from a multiqueue/RPS enabled NIC, a single flow udp
receiver can now process ~200.000 pps (instead of ~100 pps before the
patch) on a 8 core machine.
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
-rw-r--r-- | include/net/sock.h | 13 | ||||
-rw-r--r-- | net/core/sock.c | 5 | ||||
-rw-r--r-- | net/ipv4/udp.c | 4 | ||||
-rw-r--r-- | net/ipv6/udp.c | 8 | ||||
-rw-r--r-- | net/sctp/socket.c | 3 |
5 files changed, 27 insertions, 6 deletions
diff --git a/include/net/sock.h b/include/net/sock.h index 721c7b3a7a73..b365fc2597c3 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -255,7 +255,6 @@ struct sock { struct sk_buff *head; struct sk_buff *tail; int len; - int limit; } sk_backlog; wait_queue_head_t *sk_sleep; struct dst_entry *sk_dst_cache; @@ -604,10 +603,20 @@ static inline void __sk_add_backlog(struct sock *sk, struct sk_buff *skb) skb->next = NULL; } +/* + * Take into account size of receive queue and backlog queue + */ +static inline bool sk_rcvqueues_full(const struct sock *sk, const struct sk_buff *skb) +{ + unsigned int qsize = sk->sk_backlog.len + atomic_read(&sk->sk_rmem_alloc); + + return qsize + skb->truesize > sk->sk_rcvbuf; +} + /* The per-socket spinlock must be held here. */ static inline __must_check int sk_add_backlog(struct sock *sk, struct sk_buff *skb) { - if (sk->sk_backlog.len >= max(sk->sk_backlog.limit, sk->sk_rcvbuf << 1)) + if (sk_rcvqueues_full(sk, skb)) return -ENOBUFS; __sk_add_backlog(sk, skb); diff --git a/net/core/sock.c b/net/core/sock.c index cf3b9aa3f264..78b708780d30 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -327,6 +327,10 @@ int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested) skb->dev = NULL; + if (sk_rcvqueues_full(sk, skb)) { + atomic_inc(&sk->sk_drops); + goto discard_and_relse; + } if (nested) bh_lock_sock_nested(sk); else @@ -1885,7 +1889,6 @@ void sock_init_data(struct socket *sock, struct sock *sk) sk->sk_allocation = GFP_KERNEL; sk->sk_rcvbuf = sysctl_rmem_default; sk->sk_sndbuf = sysctl_wmem_default; - sk->sk_backlog.limit = sk->sk_rcvbuf << 1; sk->sk_state = TCP_CLOSE; sk_set_socket(sk, sock); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index fd510bcdc1c2..ff6a18ea8151 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1410,6 +1410,10 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) goto drop; } + + if (sk_rcvqueues_full(sk, skb)) + goto drop; + rc = 0; bh_lock_sock(sk); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 402a61cd8ef7..479f9266a199 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -590,6 +590,10 @@ static void flush_stack(struct sock **stack, unsigned int count, sk = stack[i]; if (skb1) { + if (sk_rcvqueues_full(sk, skb)) { + kfree_skb(skb1); + goto drop; + } bh_lock_sock(sk); if (!sock_owned_by_user(sk)) udpv6_queue_rcv_skb(sk, skb1); @@ -765,6 +769,10 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, /* deliver */ + if (sk_rcvqueues_full(sk, skb)) { + sock_put(sk); + goto discard; + } bh_lock_sock(sk); if (!sock_owned_by_user(sk)) udpv6_queue_rcv_skb(sk, skb); diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 7f28df58d251..8375609fc423 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -3720,9 +3720,6 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk) SCTP_DBG_OBJCNT_INC(sock); - /* Set socket backlog limit. */ - sk->sk_backlog.limit = sysctl_sctp_rmem[1]; - local_bh_disable(); percpu_counter_inc(&sctp_sockets_allocated); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); |