diff options
author | Joanne Koong <joannelkoong@gmail.com> | 2022-05-19 17:18:33 -0700 |
---|---|---|
committer | Jakub Kicinski <kuba@kernel.org> | 2022-05-20 18:16:24 -0700 |
commit | d5a42de8bdbe25081f07b801d8b35f4d75a791f4 (patch) | |
tree | 7bc53e45ea3628a75c25931f9f1d5c7f64d63cd5 /include/net/sock.h | |
parent | eac67d83bf2553f98cfddd42cfbcfd6f9ccfc287 (diff) | |
download | lwn-d5a42de8bdbe25081f07b801d8b35f4d75a791f4.tar.gz lwn-d5a42de8bdbe25081f07b801d8b35f4d75a791f4.zip |
net: Add a second bind table hashed by port and address
We currently have one tcp bind table (bhash) which hashes by port
number only. In the socket bind path, we check for bind conflicts by
traversing the specified port's inet_bind2_bucket while holding the
bucket's spinlock (see inet_csk_get_port() and inet_csk_bind_conflict()).
In instances where there are tons of sockets hashed to the same port
at different addresses, checking for a bind conflict is time-intensive
and can cause softirq cpu lockups, as well as stops new tcp connections
since __inet_inherit_port() also contests for the spinlock.
This patch proposes adding a second bind table, bhash2, that hashes by
port and ip address. Searching the bhash2 table leads to significantly
faster conflict resolution and less time holding the spinlock.
Signed-off-by: Joanne Koong <joannelkoong@gmail.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Acked-by: Kuniyuki Iwashima <kuniyu@amazon.co.jp>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'include/net/sock.h')
-rw-r--r-- | include/net/sock.h | 14 |
1 files changed, 14 insertions, 0 deletions
diff --git a/include/net/sock.h b/include/net/sock.h index 72ca97ccb460..c585ef6565d9 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -348,6 +348,7 @@ struct sk_filter; * @sk_txtime_report_errors: set report errors mode for SO_TXTIME * @sk_txtime_unused: unused txtime flags * @ns_tracker: tracker for netns reference + * @sk_bind2_node: bind node in the bhash2 table */ struct sock { /* @@ -537,6 +538,7 @@ struct sock { #endif struct rcu_head sk_rcu; netns_tracker ns_tracker; + struct hlist_node sk_bind2_node; }; enum sk_pacing { @@ -817,6 +819,16 @@ static inline void sk_add_bind_node(struct sock *sk, hlist_add_head(&sk->sk_bind_node, list); } +static inline void __sk_del_bind2_node(struct sock *sk) +{ + __hlist_del(&sk->sk_bind2_node); +} + +static inline void sk_add_bind2_node(struct sock *sk, struct hlist_head *list) +{ + hlist_add_head(&sk->sk_bind2_node, list); +} + #define sk_for_each(__sk, list) \ hlist_for_each_entry(__sk, list, sk_node) #define sk_for_each_rcu(__sk, list) \ @@ -834,6 +846,8 @@ static inline void sk_add_bind_node(struct sock *sk, hlist_for_each_entry_safe(__sk, tmp, list, sk_node) #define sk_for_each_bound(__sk, list) \ hlist_for_each_entry(__sk, list, sk_bind_node) +#define sk_for_each_bound_bhash2(__sk, list) \ + hlist_for_each_entry(__sk, list, sk_bind2_node) /** * sk_for_each_entry_offset_rcu - iterate over a list at a given struct offset |