[INET]: Remove per bucket rwlock in tcp/dccp ehash table.

As done two years ago on IP route cache table (commit 22c047ccbc68fa8f3fa57f0e8f906479a062c426) , we can avoid using one lock per hash bucket for the huge TCP/DCCP hash tables. On a typical x86_64 platform, this saves about 2MB or 4MB of ram, for litle performance differences. (we hit a different cache line for the rwlock, but then the bucket cache line have a better sharing factor among cpus, since we dirty it less often). For netstat or ss commands that want a full scan of hash table, we perform fewer memory accesses. Using a 'small' table of hashed rwlocks should be more than enough to provide correct SMP concurrency between different buckets, without using too much memory. Sizing of this table depends on num_possible_cpus() and various CONFIG settings. This patch provides some locking abstraction that may ease a future work using a different model for TCP/DCCP table. Signed-off-by: Eric Dumazet <dada1@cosmosbay.com> Acked-by: Arnaldo Carvalho de Melo <acme@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
author: Eric Dumazet <dada1@cosmosbay.com> 2007-11-07 02:40:20 -0800
committer: David S. Miller <davem@sunset.davemloft.net> 2007-11-07 04:15:11 -0800
commit: 230140cffa7feae90ad50bf259db1fa07674f3a7 (patch)
tree: 815472add31606423a508a17806b7884f0ab3e2e /net/ipv4/tcp_ipv4.c
parent: efac52762b1e3fe3035d29e82d8ee1aebc45e4a7 (diff)
download: lwn-230140cffa7feae90ad50bf259db1fa07674f3a7.tar.gz
lwn-230140cffa7feae90ad50bf259db1fa07674f3a7.zip
1 files changed, 6 insertions, 5 deletions
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index e9127cdced20..e566f3c67677 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2049,8 +2049,9 @@ static void *established_get_first(struct seq_file *seq)
 		struct sock *sk;
 		struct hlist_node *node;
 		struct inet_timewait_sock *tw;
+		rwlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket);
 
-		read_lock_bh(&tcp_hashinfo.ehash[st->bucket].lock);
+		read_lock_bh(lock);
 		sk_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
 			if (sk->sk_family != st->family) {
 				continue;
@@ -2067,7 +2068,7 @@ static void *established_get_first(struct seq_file *seq)
 			rc = tw;
 			goto out;
 		}
-		read_unlock_bh(&tcp_hashinfo.ehash[st->bucket].lock);
+		read_unlock_bh(lock);
 		st->state = TCP_SEQ_STATE_ESTABLISHED;
 	}
 out:
@@ -2094,11 +2095,11 @@ get_tw:
 			cur = tw;
 			goto out;
 		}
-		read_unlock_bh(&tcp_hashinfo.ehash[st->bucket].lock);
+		read_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
 		st->state = TCP_SEQ_STATE_ESTABLISHED;
 
 		if (++st->bucket < tcp_hashinfo.ehash_size) {
-			read_lock_bh(&tcp_hashinfo.ehash[st->bucket].lock);
+			read_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
 			sk = sk_head(&tcp_hashinfo.ehash[st->bucket].chain);
 		} else {
 			cur = NULL;
@@ -2206,7 +2207,7 @@ static void tcp_seq_stop(struct seq_file *seq, void *v)
 	case TCP_SEQ_STATE_TIME_WAIT:
 	case TCP_SEQ_STATE_ESTABLISHED:
 		if (v)
-			read_unlock_bh(&tcp_hashinfo.ehash[st->bucket].lock);
+			read_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
 		break;
 	}
 }
author	Eric Dumazet <dada1@cosmosbay.com>	2007-11-07 02:40:20 -0800
committer	David S. Miller <davem@sunset.davemloft.net>	2007-11-07 04:15:11 -0800
commit	230140cffa7feae90ad50bf259db1fa07674f3a7 (patch)
tree	815472add31606423a508a17806b7884f0ab3e2e /net/ipv4/tcp_ipv4.c
parent	efac52762b1e3fe3035d29e82d8ee1aebc45e4a7 (diff)
download	lwn-230140cffa7feae90ad50bf259db1fa07674f3a7.tar.gz lwn-230140cffa7feae90ad50bf259db1fa07674f3a7.zip