summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEric Dumazet <eric.dumazet@gmail.com>2010-04-27 15:13:20 -0700
committerPaul Gortmaker <paul.gortmaker@windriver.com>2012-03-14 10:58:05 -0400
commita31deff4d20232dfc8370f0320e8b0925f5655b0 (patch)
tree4e41be5e40ec6982ee4b6f8d11ec6a9b6bd56998
parent56defd4dc08f361bdf74c6cb457aebb1ccf442d1 (diff)
downloadlwn-a31deff4d20232dfc8370f0320e8b0925f5655b0.tar.gz
lwn-a31deff4d20232dfc8370f0320e8b0925f5655b0.zip
net: sk_add_backlog() take rmem_alloc into account
commit c377411f2494a931ff7facdbb3a6839b1266bcf6 upstream. Current socket backlog limit is not enough to really stop DDOS attacks, because user thread spend many time to process a full backlog each round, and user might crazy spin on socket lock. We should add backlog size and receive_queue size (aka rmem_alloc) to pace writers, and let user run without being slow down too much. Introduce a sk_rcvqueues_full() helper, to avoid taking socket lock in stress situations. Under huge stress from a multiqueue/RPS enabled NIC, a single flow udp receiver can now process ~200.000 pps (instead of ~100 pps before the patch) on a 8 core machine. Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net> Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
-rw-r--r--include/net/sock.h13
-rw-r--r--net/core/sock.c5
-rw-r--r--net/ipv4/udp.c4
-rw-r--r--net/ipv6/udp.c8
-rw-r--r--net/sctp/socket.c3
5 files changed, 27 insertions, 6 deletions
diff --git a/include/net/sock.h b/include/net/sock.h
index 721c7b3a7a73..b365fc2597c3 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -255,7 +255,6 @@ struct sock {
struct sk_buff *head;
struct sk_buff *tail;
int len;
- int limit;
} sk_backlog;
wait_queue_head_t *sk_sleep;
struct dst_entry *sk_dst_cache;
@@ -604,10 +603,20 @@ static inline void __sk_add_backlog(struct sock *sk, struct sk_buff *skb)
skb->next = NULL;
}
+/*
+ * Take into account size of receive queue and backlog queue
+ */
+static inline bool sk_rcvqueues_full(const struct sock *sk, const struct sk_buff *skb)
+{
+ unsigned int qsize = sk->sk_backlog.len + atomic_read(&sk->sk_rmem_alloc);
+
+ return qsize + skb->truesize > sk->sk_rcvbuf;
+}
+
/* The per-socket spinlock must be held here. */
static inline __must_check int sk_add_backlog(struct sock *sk, struct sk_buff *skb)
{
- if (sk->sk_backlog.len >= max(sk->sk_backlog.limit, sk->sk_rcvbuf << 1))
+ if (sk_rcvqueues_full(sk, skb))
return -ENOBUFS;
__sk_add_backlog(sk, skb);
diff --git a/net/core/sock.c b/net/core/sock.c
index cf3b9aa3f264..78b708780d30 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -327,6 +327,10 @@ int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested)
skb->dev = NULL;
+ if (sk_rcvqueues_full(sk, skb)) {
+ atomic_inc(&sk->sk_drops);
+ goto discard_and_relse;
+ }
if (nested)
bh_lock_sock_nested(sk);
else
@@ -1885,7 +1889,6 @@ void sock_init_data(struct socket *sock, struct sock *sk)
sk->sk_allocation = GFP_KERNEL;
sk->sk_rcvbuf = sysctl_rmem_default;
sk->sk_sndbuf = sysctl_wmem_default;
- sk->sk_backlog.limit = sk->sk_rcvbuf << 1;
sk->sk_state = TCP_CLOSE;
sk_set_socket(sk, sock);
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index fd510bcdc1c2..ff6a18ea8151 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1410,6 +1410,10 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
goto drop;
}
+
+ if (sk_rcvqueues_full(sk, skb))
+ goto drop;
+
rc = 0;
bh_lock_sock(sk);
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 402a61cd8ef7..479f9266a199 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -590,6 +590,10 @@ static void flush_stack(struct sock **stack, unsigned int count,
sk = stack[i];
if (skb1) {
+ if (sk_rcvqueues_full(sk, skb)) {
+ kfree_skb(skb1);
+ goto drop;
+ }
bh_lock_sock(sk);
if (!sock_owned_by_user(sk))
udpv6_queue_rcv_skb(sk, skb1);
@@ -765,6 +769,10 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
/* deliver */
+ if (sk_rcvqueues_full(sk, skb)) {
+ sock_put(sk);
+ goto discard;
+ }
bh_lock_sock(sk);
if (!sock_owned_by_user(sk))
udpv6_queue_rcv_skb(sk, skb);
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 7f28df58d251..8375609fc423 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -3720,9 +3720,6 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk)
SCTP_DBG_OBJCNT_INC(sock);
- /* Set socket backlog limit. */
- sk->sk_backlog.limit = sysctl_sctp_rmem[1];
-
local_bh_disable();
percpu_counter_inc(&sctp_sockets_allocated);
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);