summaryrefslogtreecommitdiff
path: root/net/core/net_namespace.c
diff options
context:
space:
mode:
authorKirill Tkhai <ktkhai@virtuozzo.com>2018-03-29 19:20:32 +0300
committerDavid S. Miller <davem@davemloft.net>2018-03-29 13:47:53 -0400
commitf0b07bb151b098d291fd1fd71ef7a2df56fb124a (patch)
tree24f28ec5ec61e4b0950fef35da79853357a34afb /net/core/net_namespace.c
parent906edee91e79af5a348f1ad1b3f9b4b948db3db7 (diff)
downloadlwn-f0b07bb151b098d291fd1fd71ef7a2df56fb124a.tar.gz
lwn-f0b07bb151b098d291fd1fd71ef7a2df56fb124a.zip
net: Introduce net_rwsem to protect net_namespace_list
rtnl_lock() is used everywhere, and contention is very high. When someone wants to iterate over alive net namespaces, he/she has no a possibility to do that without exclusive lock. But the exclusive rtnl_lock() in such places is overkill, and it just increases the contention. Yes, there is already for_each_net_rcu() in kernel, but it requires rcu_read_lock(), and this can't be sleepable. Also, sometimes it may be need really prevent net_namespace_list growth, so for_each_net_rcu() is not fit there. This patch introduces new rw_semaphore, which will be used instead of rtnl_mutex to protect net_namespace_list. It is sleepable and allows not-exclusive iterations over net namespaces list. It allows to stop using rtnl_lock() in several places (what is made in next patches) and makes less the time, we keep rtnl_mutex. Here we just add new lock, while the explanation of we can remove rtnl_lock() there are in next patches. Fine grained locks generally are better, then one big lock, so let's do that with net_namespace_list, while the situation allows that. Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/core/net_namespace.c')
-rw-r--r--net/core/net_namespace.c18
1 files changed, 13 insertions, 5 deletions
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index b5796d17a302..7fdf321d4997 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -33,6 +33,10 @@ static struct list_head *first_device = &pernet_list;
LIST_HEAD(net_namespace_list);
EXPORT_SYMBOL_GPL(net_namespace_list);
+/* Protects net_namespace_list. Nests iside rtnl_lock() */
+DECLARE_RWSEM(net_rwsem);
+EXPORT_SYMBOL_GPL(net_rwsem);
+
struct net init_net = {
.count = REFCOUNT_INIT(1),
.dev_base_head = LIST_HEAD_INIT(init_net.dev_base_head),
@@ -309,9 +313,9 @@ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns)
if (error < 0)
goto out_undo;
}
- rtnl_lock();
+ down_write(&net_rwsem);
list_add_tail_rcu(&net->list, &net_namespace_list);
- rtnl_unlock();
+ up_write(&net_rwsem);
out:
return error;
@@ -450,7 +454,7 @@ static void unhash_nsid(struct net *net, struct net *last)
* and this work is the only process, that may delete
* a net from net_namespace_list. So, when the below
* is executing, the list may only grow. Thus, we do not
- * use for_each_net_rcu() or rtnl_lock().
+ * use for_each_net_rcu() or net_rwsem.
*/
for_each_net(tmp) {
int id;
@@ -485,7 +489,7 @@ static void cleanup_net(struct work_struct *work)
down_read(&pernet_ops_rwsem);
/* Don't let anyone else find us. */
- rtnl_lock();
+ down_write(&net_rwsem);
llist_for_each_entry(net, net_kill_list, cleanup_list)
list_del_rcu(&net->list);
/* Cache last net. After we unlock rtnl, no one new net
@@ -499,7 +503,7 @@ static void cleanup_net(struct work_struct *work)
* useless anyway, as netns_ids are destroyed there.
*/
last = list_last_entry(&net_namespace_list, struct net, list);
- rtnl_unlock();
+ up_write(&net_rwsem);
llist_for_each_entry(net, net_kill_list, cleanup_list) {
unhash_nsid(net, last);
@@ -900,6 +904,9 @@ static int __register_pernet_operations(struct list_head *list,
list_add_tail(&ops->list, list);
if (ops->init || (ops->id && ops->size)) {
+ /* We held write locked pernet_ops_rwsem, and parallel
+ * setup_net() and cleanup_net() are not possible.
+ */
for_each_net(net) {
error = ops_init(ops, net);
if (error)
@@ -923,6 +930,7 @@ static void __unregister_pernet_operations(struct pernet_operations *ops)
LIST_HEAD(net_exit_list);
list_del(&ops->list);
+ /* See comment in __register_pernet_operations() */
for_each_net(net)
list_add_tail(&net->exit_list, &net_exit_list);
ops_exit_list(ops, &net_exit_list);