[ICSK]: Move generalised functions from tcp to inet_connection_sock

This also improves reqsk_queue_prune and renames it to inet_csk_reqsk_queue_prune, as it deals with both inet_connection_sock and inet_request_sock objects, not just with request_sock ones thus belonging to inet_request_sock. Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net> Signed-off-by: David S. Miller <davem@davemloft.net>
author: Arnaldo Carvalho de Melo <acme@ghostprotocols.net> 2005-08-09 20:15:09 -0700
committer: David S. Miller <davem@sunset.davemloft.net> 2005-08-29 15:49:50 -0700
commit: a019d6fe2b9da68ea4ba6cf3c4e86fc1dbf554c3 (patch)
tree: f82f0523c313228d64998fac30790edcfd0785c3 /net/ipv4/inet_connection_sock.c
parent: 7c657876b63cb1d8a2ec06f8fc6c37bb8412e66c (diff)
download: lwn-a019d6fe2b9da68ea4ba6cf3c4e86fc1dbf554c3.tar.gz
lwn-a019d6fe2b9da68ea4ba6cf3c4e86fc1dbf554c3.zip
1 files changed, 214 insertions, 0 deletions
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 136ada050b63..026630a15ea0 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -23,6 +23,7 @@
 #include <net/ip.h>
 #include <net/route.h>
 #include <net/tcp_states.h>
+#include <net/xfrm.h>
 
 #ifdef INET_CSK_DEBUG
 const char inet_csk_timer_bug_msg[] = "inet_csk BUG: unknown timer value\n";
@@ -398,8 +399,100 @@ void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req,
 	inet_csk_reqsk_queue_added(sk, timeout);
 }
 
+/* Only thing we need from tcp.h */
+extern int sysctl_tcp_synack_retries;
+
 EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_hash_add);
 
+void inet_csk_reqsk_queue_prune(struct sock *parent,
+				const unsigned long interval,
+				const unsigned long timeout,
+				const unsigned long max_rto)
+{
+	struct inet_connection_sock *icsk = inet_csk(parent);
+	struct request_sock_queue *queue = &icsk->icsk_accept_queue;
+	struct listen_sock *lopt = queue->listen_opt;
+	int max_retries = icsk->icsk_syn_retries ? : sysctl_tcp_synack_retries;
+	int thresh = max_retries;
+	unsigned long now = jiffies;
+	struct request_sock **reqp, *req;
+	int i, budget;
+
+	if (lopt == NULL || lopt->qlen == 0)
+		return;
+
+	/* Normally all the openreqs are young and become mature
+	 * (i.e. converted to established socket) for first timeout.
+	 * If synack was not acknowledged for 3 seconds, it means
+	 * one of the following things: synack was lost, ack was lost,
+	 * rtt is high or nobody planned to ack (i.e. synflood).
+	 * When server is a bit loaded, queue is populated with old
+	 * open requests, reducing effective size of queue.
+	 * When server is well loaded, queue size reduces to zero
+	 * after several minutes of work. It is not synflood,
+	 * it is normal operation. The solution is pruning
+	 * too old entries overriding normal timeout, when
+	 * situation becomes dangerous.
+	 *
+	 * Essentially, we reserve half of room for young
+	 * embrions; and abort old ones without pity, if old
+	 * ones are about to clog our table.
+	 */
+	if (lopt->qlen>>(lopt->max_qlen_log-1)) {
+		int young = (lopt->qlen_young<<1);
+
+		while (thresh > 2) {
+			if (lopt->qlen < young)
+				break;
+			thresh--;
+			young <<= 1;
+		}
+	}
+
+	if (queue->rskq_defer_accept)
+		max_retries = queue->rskq_defer_accept;
+
+	budget = 2 * (lopt->nr_table_entries / (timeout / interval));
+	i = lopt->clock_hand;
+
+	do {
+		reqp=&lopt->syn_table[i];
+		while ((req = *reqp) != NULL) {
+			if (time_after_eq(now, req->expires)) {
+				if ((req->retrans < thresh ||
+				     (inet_rsk(req)->acked && req->retrans < max_retries))
+				    && !req->rsk_ops->rtx_syn_ack(parent, req, NULL)) {
+					unsigned long timeo;
+
+					if (req->retrans++ == 0)
+						lopt->qlen_young--;
+					timeo = min((timeout << req->retrans), max_rto);
+					req->expires = now + timeo;
+					reqp = &req->dl_next;
+					continue;
+				}
+
+				/* Drop this request */
+				inet_csk_reqsk_queue_unlink(parent, req, reqp);
+				reqsk_queue_removed(queue, req);
+				reqsk_free(req);
+				continue;
+			}
+			reqp = &req->dl_next;
+		}
+
+		i = (i + 1) & (lopt->nr_table_entries - 1);
+
+	} while (--budget > 0);
+
+	lopt->clock_hand = i;
+
+	if (lopt->qlen)
+		inet_csk_reset_keepalive_timer(parent, interval);
+}
+
+EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_prune);
+
 struct sock *inet_csk_clone(struct sock *sk, const struct request_sock *req,
 			    const unsigned int __nocast priority)
 {
@@ -424,3 +517,124 @@ struct sock *inet_csk_clone(struct sock *sk, const struct request_sock *req,
 }
 
 EXPORT_SYMBOL_GPL(inet_csk_clone);
+
+/*
+ * At this point, there should be no process reference to this
+ * socket, and thus no user references at all.  Therefore we
+ * can assume the socket waitqueue is inactive and nobody will
+ * try to jump onto it.
+ */
+void inet_csk_destroy_sock(struct sock *sk)
+{
+	BUG_TRAP(sk->sk_state == TCP_CLOSE);
+	BUG_TRAP(sock_flag(sk, SOCK_DEAD));
+
+	/* It cannot be in hash table! */
+	BUG_TRAP(sk_unhashed(sk));
+
+	/* If it has not 0 inet_sk(sk)->num, it must be bound */
+	BUG_TRAP(!inet_sk(sk)->num || inet_csk(sk)->icsk_bind_hash);
+
+	sk->sk_prot->destroy(sk);
+
+	sk_stream_kill_queues(sk);
+
+	xfrm_sk_free_policy(sk);
+
+	sk_refcnt_debug_release(sk);
+
+	atomic_dec(sk->sk_prot->orphan_count);
+	sock_put(sk);
+}
+
+EXPORT_SYMBOL(inet_csk_destroy_sock);
+
+int inet_csk_listen_start(struct sock *sk, const int nr_table_entries)
+{
+	struct inet_sock *inet = inet_sk(sk);
+	struct inet_connection_sock *icsk = inet_csk(sk);
+	int rc = reqsk_queue_alloc(&icsk->icsk_accept_queue, nr_table_entries);
+
+	if (rc != 0)
+		return rc;
+
+	sk->sk_max_ack_backlog = 0;
+	sk->sk_ack_backlog = 0;
+	inet_csk_delack_init(sk);
+
+	/* There is race window here: we announce ourselves listening,
+	 * but this transition is still not validated by get_port().
+	 * It is OK, because this socket enters to hash table only
+	 * after validation is complete.
+	 */
+	sk->sk_state = TCP_LISTEN;
+	if (!sk->sk_prot->get_port(sk, inet->num)) {
+		inet->sport = htons(inet->num);
+
+		sk_dst_reset(sk);
+		sk->sk_prot->hash(sk);
+
+		return 0;
+	}
+
+	sk->sk_state = TCP_CLOSE;
+	__reqsk_queue_destroy(&icsk->icsk_accept_queue);
+	return -EADDRINUSE;
+}
+
+EXPORT_SYMBOL_GPL(inet_csk_listen_start);
+
+/*
+ *	This routine closes sockets which have been at least partially
+ *	opened, but not yet accepted.
+ */
+void inet_csk_listen_stop(struct sock *sk)
+{
+	struct inet_connection_sock *icsk = inet_csk(sk);
+	struct request_sock *acc_req;
+	struct request_sock *req;
+
+	inet_csk_delete_keepalive_timer(sk);
+
+	/* make all the listen_opt local to us */
+	acc_req = reqsk_queue_yank_acceptq(&icsk->icsk_accept_queue);
+
+	/* Following specs, it would be better either to send FIN
+	 * (and enter FIN-WAIT-1, it is normal close)
+	 * or to send active reset (abort).
+	 * Certainly, it is pretty dangerous while synflood, but it is
+	 * bad justification for our negligence 8)
+	 * To be honest, we are not able to make either
+	 * of the variants now.			--ANK
+	 */
+	reqsk_queue_destroy(&icsk->icsk_accept_queue);
+
+	while ((req = acc_req) != NULL) {
+		struct sock *child = req->sk;
+
+		acc_req = req->dl_next;
+
+		local_bh_disable();
+		bh_lock_sock(child);
+		BUG_TRAP(!sock_owned_by_user(child));
+		sock_hold(child);
+
+		sk->sk_prot->disconnect(child, O_NONBLOCK);
+
+		sock_orphan(child);
+
+		atomic_inc(sk->sk_prot->orphan_count);
+
+		inet_csk_destroy_sock(child);
+
+		bh_unlock_sock(child);
+		local_bh_enable();
+		sock_put(child);
+
+		sk_acceptq_removed(sk);
+		__reqsk_free(req);
+	}
+	BUG_TRAP(!sk->sk_ack_backlog);
+}
+
+EXPORT_SYMBOL_GPL(inet_csk_listen_stop);
author	Arnaldo Carvalho de Melo <acme@ghostprotocols.net>	2005-08-09 20:15:09 -0700
committer	David S. Miller <davem@sunset.davemloft.net>	2005-08-29 15:49:50 -0700
commit	a019d6fe2b9da68ea4ba6cf3c4e86fc1dbf554c3 (patch)
tree	f82f0523c313228d64998fac30790edcfd0785c3 /net/ipv4/inet_connection_sock.c
parent	7c657876b63cb1d8a2ec06f8fc6c37bb8412e66c (diff)
download	lwn-a019d6fe2b9da68ea4ba6cf3c4e86fc1dbf554c3.tar.gz lwn-a019d6fe2b9da68ea4ba6cf3c4e86fc1dbf554c3.zip