diff options
author | Eric Dumazet <dada1@cosmosbay.com> | 2007-11-07 02:40:20 -0800 |
---|---|---|
committer | David S. Miller <davem@sunset.davemloft.net> | 2007-11-07 04:15:11 -0800 |
commit | 230140cffa7feae90ad50bf259db1fa07674f3a7 (patch) | |
tree | 815472add31606423a508a17806b7884f0ab3e2e /include/net/inet_hashtables.h | |
parent | efac52762b1e3fe3035d29e82d8ee1aebc45e4a7 (diff) | |
download | lwn-230140cffa7feae90ad50bf259db1fa07674f3a7.tar.gz lwn-230140cffa7feae90ad50bf259db1fa07674f3a7.zip |
[INET]: Remove per bucket rwlock in tcp/dccp ehash table.
As done two years ago on IP route cache table (commit
22c047ccbc68fa8f3fa57f0e8f906479a062c426) , we can avoid using one
lock per hash bucket for the huge TCP/DCCP hash tables.
On a typical x86_64 platform, this saves about 2MB or 4MB of ram, for
litle performance differences. (we hit a different cache line for the
rwlock, but then the bucket cache line have a better sharing factor
among cpus, since we dirty it less often). For netstat or ss commands
that want a full scan of hash table, we perform fewer memory accesses.
Using a 'small' table of hashed rwlocks should be more than enough to
provide correct SMP concurrency between different buckets, without
using too much memory. Sizing of this table depends on
num_possible_cpus() and various CONFIG settings.
This patch provides some locking abstraction that may ease a future
work using a different model for TCP/DCCP table.
Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Acked-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'include/net/inet_hashtables.h')
-rw-r--r-- | include/net/inet_hashtables.h | 71 |
1 files changed, 65 insertions, 6 deletions
diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index 4427dcd1e53a..8461cda37490 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -37,7 +37,6 @@ * I'll experiment with dynamic table growth later. */ struct inet_ehash_bucket { - rwlock_t lock; struct hlist_head chain; struct hlist_head twchain; }; @@ -100,6 +99,9 @@ struct inet_hashinfo { * TIME_WAIT sockets use a separate chain (twchain). */ struct inet_ehash_bucket *ehash; + rwlock_t *ehash_locks; + unsigned int ehash_size; + unsigned int ehash_locks_mask; /* Ok, let's try this, I give up, we do need a local binding * TCP hash as well as the others for fast bind/connect. @@ -107,7 +109,7 @@ struct inet_hashinfo { struct inet_bind_hashbucket *bhash; unsigned int bhash_size; - unsigned int ehash_size; + /* Note : 4 bytes padding on 64 bit arches */ /* All sockets in TCP_LISTEN state will be in here. This is the only * table where wildcard'd TCP sockets can exist. Hash function here @@ -134,6 +136,62 @@ static inline struct inet_ehash_bucket *inet_ehash_bucket( return &hashinfo->ehash[hash & (hashinfo->ehash_size - 1)]; } +static inline rwlock_t *inet_ehash_lockp( + struct inet_hashinfo *hashinfo, + unsigned int hash) +{ + return &hashinfo->ehash_locks[hash & hashinfo->ehash_locks_mask]; +} + +static inline int inet_ehash_locks_alloc(struct inet_hashinfo *hashinfo) +{ + unsigned int i, size = 256; +#if defined(CONFIG_PROVE_LOCKING) + unsigned int nr_pcpus = 2; +#else + unsigned int nr_pcpus = num_possible_cpus(); +#endif + if (nr_pcpus >= 4) + size = 512; + if (nr_pcpus >= 8) + size = 1024; + if (nr_pcpus >= 16) + size = 2048; + if (nr_pcpus >= 32) + size = 4096; + if (sizeof(rwlock_t) != 0) { +#ifdef CONFIG_NUMA + if (size * sizeof(rwlock_t) > PAGE_SIZE) + hashinfo->ehash_locks = vmalloc(size * sizeof(rwlock_t)); + else +#endif + hashinfo->ehash_locks = kmalloc(size * sizeof(rwlock_t), + GFP_KERNEL); + if (!hashinfo->ehash_locks) + return ENOMEM; + for (i = 0; i < size; i++) + rwlock_init(&hashinfo->ehash_locks[i]); + } + hashinfo->ehash_locks_mask = size - 1; + return 0; +} + +static inline void inet_ehash_locks_free(struct inet_hashinfo *hashinfo) +{ + if (hashinfo->ehash_locks) { +#ifdef CONFIG_NUMA + unsigned int size = (hashinfo->ehash_locks_mask + 1) * + sizeof(rwlock_t); + if (size > PAGE_SIZE) + vfree(hashinfo->ehash_locks); + else +#else + kfree(hashinfo->ehash_locks); +#endif + hashinfo->ehash_locks = NULL; + } +} + extern struct inet_bind_bucket * inet_bind_bucket_create(struct kmem_cache *cachep, struct inet_bind_hashbucket *head, @@ -222,7 +280,7 @@ static inline void __inet_hash(struct inet_hashinfo *hashinfo, sk->sk_hash = inet_sk_ehashfn(sk); head = inet_ehash_bucket(hashinfo, sk->sk_hash); list = &head->chain; - lock = &head->lock; + lock = inet_ehash_lockp(hashinfo, sk->sk_hash); write_lock(lock); } __sk_add_node(sk, list); @@ -253,7 +311,7 @@ static inline void inet_unhash(struct inet_hashinfo *hashinfo, struct sock *sk) inet_listen_wlock(hashinfo); lock = &hashinfo->lhash_lock; } else { - lock = &inet_ehash_bucket(hashinfo, sk->sk_hash)->lock; + lock = inet_ehash_lockp(hashinfo, sk->sk_hash); write_lock_bh(lock); } @@ -354,9 +412,10 @@ static inline struct sock * */ unsigned int hash = inet_ehashfn(daddr, hnum, saddr, sport); struct inet_ehash_bucket *head = inet_ehash_bucket(hashinfo, hash); + rwlock_t *lock = inet_ehash_lockp(hashinfo, hash); prefetch(head->chain.first); - read_lock(&head->lock); + read_lock(lock); sk_for_each(sk, node, &head->chain) { if (INET_MATCH(sk, hash, acookie, saddr, daddr, ports, dif)) goto hit; /* You sunk my battleship! */ @@ -369,7 +428,7 @@ static inline struct sock * } sk = NULL; out: - read_unlock(&head->lock); + read_unlock(lock); return sk; hit: sock_hold(sk); |