diff options
author | Jiri Pirko <jiri@nvidia.com> | 2023-05-10 16:46:21 +0200 |
---|---|---|
committer | Jakub Kicinski <kuba@kernel.org> | 2023-05-11 18:06:50 -0700 |
commit | e93c9378e33f68b61ea9318580d841caa22fb9ea (patch) | |
tree | 9af5b88c5af487772cd6d860fac3367b65bba583 | |
parent | 7ce93d6f91d8871a609f55bce91a507c392805e6 (diff) | |
download | lwn-e93c9378e33f68b61ea9318580d841caa22fb9ea.tar.gz lwn-e93c9378e33f68b61ea9318580d841caa22fb9ea.zip |
devlink: change per-devlink netdev notifier to static one
The commit 565b4824c39f ("devlink: change port event netdev notifier
from per-net to global") changed original per-net notifier to be
per-devlink instance. That fixed the issue of non-receiving events
of netdev uninit if that moved to a different namespace.
That worked fine in -net tree.
However, later on when commit ee75f1fc44dd ("net/mlx5e: Create
separate devlink instance for ethernet auxiliary device") and
commit 72ed5d5624af ("net/mlx5: Suspend auxiliary devices only in
case of PCI device suspend") were merged, a deadlock was introduced
when removing a namespace with devlink instance with another nested
instance.
Here there is the bad flow example resulting in deadlock with mlx5:
net_cleanup_work -> cleanup_net (takes down_read(&pernet_ops_rwsem) ->
devlink_pernet_pre_exit() -> devlink_reload() ->
mlx5_devlink_reload_down() -> mlx5_unload_one_devl_locked() ->
mlx5_detach_device() -> del_adev() -> mlx5e_remove() ->
mlx5e_destroy_devlink() -> devlink_free() ->
unregister_netdevice_notifier() (takes down_write(&pernet_ops_rwsem)
Steps to reproduce:
$ modprobe mlx5_core
$ ip netns add ns1
$ devlink dev reload pci/0000:08:00.0 netns ns1
$ ip netns del ns1
Resolve this by converting the notifier from per-devlink instance to
a static one registered during init phase and leaving it registered
forever. Use this notifier for all devlink port instances created
later on.
Note what a tree needs this fix only in case all of the cited fixes
commits are present.
Reported-by: Moshe Shemesh <moshe@nvidia.com>
Fixes: 565b4824c39f ("devlink: change port event netdev notifier from per-net to global")
Fixes: ee75f1fc44dd ("net/mlx5e: Create separate devlink instance for ethernet auxiliary device")
Fixes: 72ed5d5624af ("net/mlx5: Suspend auxiliary devices only in case of PCI device suspend")
Signed-off-by: Jiri Pirko <jiri@nvidia.com>
Reviewed-by: Simon Horman <simon.horman@corigine.com>
Link: https://lore.kernel.org/r/20230510144621.932017-1-jiri@resnulli.us
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
-rw-r--r-- | net/devlink/core.c | 16 | ||||
-rw-r--r-- | net/devlink/devl_internal.h | 1 | ||||
-rw-r--r-- | net/devlink/leftover.c | 5 |
3 files changed, 9 insertions, 13 deletions
diff --git a/net/devlink/core.c b/net/devlink/core.c index 777b091ef74d..0e58eee44bdb 100644 --- a/net/devlink/core.c +++ b/net/devlink/core.c @@ -204,11 +204,6 @@ struct devlink *devlink_alloc_ns(const struct devlink_ops *ops, if (ret < 0) goto err_xa_alloc; - devlink->netdevice_nb.notifier_call = devlink_port_netdevice_event; - ret = register_netdevice_notifier(&devlink->netdevice_nb); - if (ret) - goto err_register_netdevice_notifier; - devlink->dev = dev; devlink->ops = ops; xa_init_flags(&devlink->ports, XA_FLAGS_ALLOC); @@ -233,8 +228,6 @@ struct devlink *devlink_alloc_ns(const struct devlink_ops *ops, return devlink; -err_register_netdevice_notifier: - xa_erase(&devlinks, devlink->index); err_xa_alloc: kfree(devlink); return NULL; @@ -266,8 +259,6 @@ void devlink_free(struct devlink *devlink) xa_destroy(&devlink->params); xa_destroy(&devlink->ports); - WARN_ON_ONCE(unregister_netdevice_notifier(&devlink->netdevice_nb)); - xa_erase(&devlinks, devlink->index); devlink_put(devlink); @@ -303,6 +294,10 @@ static struct pernet_operations devlink_pernet_ops __net_initdata = { .pre_exit = devlink_pernet_pre_exit, }; +static struct notifier_block devlink_port_netdevice_nb __net_initdata = { + .notifier_call = devlink_port_netdevice_event, +}; + static int __init devlink_init(void) { int err; @@ -311,6 +306,9 @@ static int __init devlink_init(void) if (err) goto out; err = register_pernet_subsys(&devlink_pernet_ops); + if (err) + goto out; + err = register_netdevice_notifier(&devlink_port_netdevice_nb); out: WARN_ON(err); diff --git a/net/devlink/devl_internal.h b/net/devlink/devl_internal.h index e133f423294a..62921b2eb0d3 100644 --- a/net/devlink/devl_internal.h +++ b/net/devlink/devl_internal.h @@ -50,7 +50,6 @@ struct devlink { u8 reload_failed:1; refcount_t refcount; struct rcu_work rwork; - struct notifier_block netdevice_nb; char priv[] __aligned(NETDEV_ALIGN); }; diff --git a/net/devlink/leftover.c b/net/devlink/leftover.c index dffca2f9bfa7..cd0254968076 100644 --- a/net/devlink/leftover.c +++ b/net/devlink/leftover.c @@ -7073,10 +7073,9 @@ int devlink_port_netdevice_event(struct notifier_block *nb, struct devlink_port *devlink_port = netdev->devlink_port; struct devlink *devlink; - devlink = container_of(nb, struct devlink, netdevice_nb); - - if (!devlink_port || devlink_port->devlink != devlink) + if (!devlink_port) return NOTIFY_OK; + devlink = devlink_port->devlink; switch (event) { case NETDEV_POST_INIT: |