diff options
author | Jakub Kicinski <kuba@kernel.org> | 2024-02-28 19:36:41 -0800 |
---|---|---|
committer | Jakub Kicinski <kuba@kernel.org> | 2024-02-28 19:36:42 -0800 |
commit | 3cbab89268c60eb38a0371117fe5b258b1a22dbb (patch) | |
tree | df97cdf2c48cada009cc49dbb434c923a202f068 /net | |
parent | e83ddcea65491efe29e12765996966509aa795d3 (diff) | |
parent | 167487070d644a285ed863516c80b3c35ec929d6 (diff) | |
download | lwn-3cbab89268c60eb38a0371117fe5b258b1a22dbb.tar.gz lwn-3cbab89268c60eb38a0371117fe5b258b1a22dbb.zip |
Merge branch 'inet-implement-lockless-rtm_getnetconf-ops'
Eric Dumazet says:
====================
inet: implement lockless RTM_GETNETCONF ops
This series removes RTNL use for RTM_GETNETCONF operations on AF_INET.
- Annotate data-races to avoid possible KCSAN splats.
- "ip -4 netconf show dev XXX" can be implemented without RTNL [1]
- "ip -4 netconf" dumps can be implemented using RCU instead of RTNL [1]
[1] This only refers to RTM_GETNETCONF operation, "ip" command
also uses RTM_GETLINK dumps which are using RTNL at this moment.
====================
Link: https://lore.kernel.org/r/20240227092411.2315725-1-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'net')
-rw-r--r-- | net/ipv4/devinet.c | 147 | ||||
-rw-r--r-- | net/ipv4/igmp.c | 4 | ||||
-rw-r--r-- | net/ipv4/proc.c | 2 | ||||
-rw-r--r-- | net/ipv4/route.c | 4 |
4 files changed, 73 insertions, 84 deletions
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index bc74f131fe4d..af741af61830 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1982,7 +1982,7 @@ static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev, return -EMSGSIZE; for (i = 0; i < IPV4_DEVCONF_MAX; i++) - ((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i]; + ((u32 *) nla_data(nla))[i] = READ_ONCE(in_dev->cnf.data[i]); return 0; } @@ -2068,9 +2068,9 @@ static int inet_netconf_msgsize_devconf(int type) } static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex, - struct ipv4_devconf *devconf, u32 portid, - u32 seq, int event, unsigned int flags, - int type) + const struct ipv4_devconf *devconf, + u32 portid, u32 seq, int event, + unsigned int flags, int type) { struct nlmsghdr *nlh; struct netconfmsg *ncm; @@ -2095,27 +2095,28 @@ static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex, if ((all || type == NETCONFA_FORWARDING) && nla_put_s32(skb, NETCONFA_FORWARDING, - IPV4_DEVCONF(*devconf, FORWARDING)) < 0) + IPV4_DEVCONF_RO(*devconf, FORWARDING)) < 0) goto nla_put_failure; if ((all || type == NETCONFA_RP_FILTER) && nla_put_s32(skb, NETCONFA_RP_FILTER, - IPV4_DEVCONF(*devconf, RP_FILTER)) < 0) + IPV4_DEVCONF_RO(*devconf, RP_FILTER)) < 0) goto nla_put_failure; if ((all || type == NETCONFA_MC_FORWARDING) && nla_put_s32(skb, NETCONFA_MC_FORWARDING, - IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0) + IPV4_DEVCONF_RO(*devconf, MC_FORWARDING)) < 0) goto nla_put_failure; if ((all || type == NETCONFA_BC_FORWARDING) && nla_put_s32(skb, NETCONFA_BC_FORWARDING, - IPV4_DEVCONF(*devconf, BC_FORWARDING)) < 0) + IPV4_DEVCONF_RO(*devconf, BC_FORWARDING)) < 0) goto nla_put_failure; if ((all || type == NETCONFA_PROXY_NEIGH) && nla_put_s32(skb, NETCONFA_PROXY_NEIGH, - IPV4_DEVCONF(*devconf, PROXY_ARP)) < 0) + IPV4_DEVCONF_RO(*devconf, PROXY_ARP)) < 0) goto nla_put_failure; if ((all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) && nla_put_s32(skb, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN, - IPV4_DEVCONF(*devconf, IGNORE_ROUTES_WITH_LINKDOWN)) < 0) + IPV4_DEVCONF_RO(*devconf, + IGNORE_ROUTES_WITH_LINKDOWN)) < 0) goto nla_put_failure; out: @@ -2204,21 +2205,20 @@ static int inet_netconf_get_devconf(struct sk_buff *in_skb, struct netlink_ext_ack *extack) { struct net *net = sock_net(in_skb->sk); - struct nlattr *tb[NETCONFA_MAX+1]; + struct nlattr *tb[NETCONFA_MAX + 1]; + const struct ipv4_devconf *devconf; + struct in_device *in_dev = NULL; + struct net_device *dev = NULL; struct sk_buff *skb; - struct ipv4_devconf *devconf; - struct in_device *in_dev; - struct net_device *dev; int ifindex; int err; err = inet_netconf_valid_get_req(in_skb, nlh, tb, extack); if (err) - goto errout; + return err; - err = -EINVAL; if (!tb[NETCONFA_IFINDEX]) - goto errout; + return -EINVAL; ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]); switch (ifindex) { @@ -2229,10 +2229,10 @@ static int inet_netconf_get_devconf(struct sk_buff *in_skb, devconf = net->ipv4.devconf_dflt; break; default: - dev = __dev_get_by_index(net, ifindex); - if (!dev) - goto errout; - in_dev = __in_dev_get_rtnl(dev); + err = -ENODEV; + dev = dev_get_by_index(net, ifindex); + if (dev) + in_dev = in_dev_get(dev); if (!in_dev) goto errout; devconf = &in_dev->cnf; @@ -2256,6 +2256,9 @@ static int inet_netconf_get_devconf(struct sk_buff *in_skb, } err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid); errout: + if (in_dev) + in_dev_put(in_dev); + dev_put(dev); return err; } @@ -2264,11 +2267,13 @@ static int inet_netconf_dump_devconf(struct sk_buff *skb, { const struct nlmsghdr *nlh = cb->nlh; struct net *net = sock_net(skb->sk); - int h, s_h; - int idx, s_idx; + struct { + unsigned long ifindex; + unsigned int all_default; + } *ctx = (void *)cb->ctx; + const struct in_device *in_dev; struct net_device *dev; - struct in_device *in_dev; - struct hlist_head *head; + int err = 0; if (cb->strict_check) { struct netlink_ext_ack *extack = cb->extack; @@ -2285,64 +2290,47 @@ static int inet_netconf_dump_devconf(struct sk_buff *skb, } } - s_h = cb->args[0]; - s_idx = idx = cb->args[1]; - - for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { - idx = 0; - head = &net->dev_index_head[h]; - rcu_read_lock(); - cb->seq = inet_base_seq(net); - hlist_for_each_entry_rcu(dev, head, index_hlist) { - if (idx < s_idx) - goto cont; - in_dev = __in_dev_get_rcu(dev); - if (!in_dev) - goto cont; - - if (inet_netconf_fill_devconf(skb, dev->ifindex, - &in_dev->cnf, - NETLINK_CB(cb->skb).portid, - nlh->nlmsg_seq, - RTM_NEWNETCONF, - NLM_F_MULTI, - NETCONFA_ALL) < 0) { - rcu_read_unlock(); - goto done; - } - nl_dump_check_consistent(cb, nlmsg_hdr(skb)); -cont: - idx++; - } - rcu_read_unlock(); + rcu_read_lock(); + for_each_netdev_dump(net, dev, ctx->ifindex) { + in_dev = __in_dev_get_rcu(dev); + if (!in_dev) + continue; + err = inet_netconf_fill_devconf(skb, dev->ifindex, + &in_dev->cnf, + NETLINK_CB(cb->skb).portid, + nlh->nlmsg_seq, + RTM_NEWNETCONF, NLM_F_MULTI, + NETCONFA_ALL); + if (err < 0) + goto done; } - if (h == NETDEV_HASHENTRIES) { - if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL, - net->ipv4.devconf_all, - NETLINK_CB(cb->skb).portid, - nlh->nlmsg_seq, - RTM_NEWNETCONF, NLM_F_MULTI, - NETCONFA_ALL) < 0) + if (ctx->all_default == 0) { + err = inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL, + net->ipv4.devconf_all, + NETLINK_CB(cb->skb).portid, + nlh->nlmsg_seq, + RTM_NEWNETCONF, NLM_F_MULTI, + NETCONFA_ALL); + if (err < 0) goto done; - else - h++; - } - if (h == NETDEV_HASHENTRIES + 1) { - if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT, - net->ipv4.devconf_dflt, - NETLINK_CB(cb->skb).portid, - nlh->nlmsg_seq, - RTM_NEWNETCONF, NLM_F_MULTI, - NETCONFA_ALL) < 0) + ctx->all_default++; + } + if (ctx->all_default == 1) { + err = inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT, + net->ipv4.devconf_dflt, + NETLINK_CB(cb->skb).portid, + nlh->nlmsg_seq, + RTM_NEWNETCONF, NLM_F_MULTI, + NETCONFA_ALL); + if (err < 0) goto done; - else - h++; + ctx->all_default++; } done: - cb->args[0] = h; - cb->args[1] = idx; - - return skb->len; + if (err < 0 && likely(skb->len)) + err = skb->len; + rcu_read_unlock(); + return err; } #ifdef CONFIG_SYSCTL @@ -2825,5 +2813,6 @@ void __init devinet_init(void) rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, 0); rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, 0); rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf, - inet_netconf_dump_devconf, 0); + inet_netconf_dump_devconf, + RTNL_FLAG_DOIT_UNLOCKED | RTNL_FLAG_DUMP_UNLOCKED); } diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index efeeca2b1328..717e97a389a8 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -120,12 +120,12 @@ */ #define IGMP_V1_SEEN(in_dev) \ - (IPV4_DEVCONF_ALL(dev_net(in_dev->dev), FORCE_IGMP_VERSION) == 1 || \ + (IPV4_DEVCONF_ALL_RO(dev_net(in_dev->dev), FORCE_IGMP_VERSION) == 1 || \ IN_DEV_CONF_GET((in_dev), FORCE_IGMP_VERSION) == 1 || \ ((in_dev)->mr_v1_seen && \ time_before(jiffies, (in_dev)->mr_v1_seen))) #define IGMP_V2_SEEN(in_dev) \ - (IPV4_DEVCONF_ALL(dev_net(in_dev->dev), FORCE_IGMP_VERSION) == 2 || \ + (IPV4_DEVCONF_ALL_RO(dev_net(in_dev->dev), FORCE_IGMP_VERSION) == 2 || \ IN_DEV_CONF_GET((in_dev), FORCE_IGMP_VERSION) == 2 || \ ((in_dev)->mr_v2_seen && \ time_before(jiffies, (in_dev)->mr_v2_seen))) diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 5f4654ebff48..914bc9c35cc7 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -395,7 +395,7 @@ static int snmp_seq_show_ipstats(struct seq_file *seq, void *v) seq_printf(seq, " %s", snmp4_ipstats_list[i].name); seq_printf(seq, "\nIp: %d %d", - IPV4_DEVCONF_ALL(net, FORWARDING) ? 1 : 2, + IPV4_DEVCONF_ALL_RO(net, FORWARDING) ? 1 : 2, READ_ONCE(net->ipv4.sysctl_ip_default_ttl)); BUILD_BUG_ON(offsetof(struct ipstats_mib, mibs) != 0); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index b512288d6fcc..c8f76f56dc16 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2313,7 +2313,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, if (IN_DEV_BFORWARD(in_dev)) goto make_route; /* not do cache if bc_forwarding is enabled */ - if (IPV4_DEVCONF_ALL(net, BC_FORWARDING)) + if (IPV4_DEVCONF_ALL_RO(net, BC_FORWARDING)) do_cache = false; goto brd_input; } @@ -2993,7 +2993,7 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src, #ifdef CONFIG_IP_MROUTE if (ipv4_is_multicast(dst) && !ipv4_is_local_multicast(dst) && - IPV4_DEVCONF_ALL(net, MC_FORWARDING)) { + IPV4_DEVCONF_ALL_RO(net, MC_FORWARDING)) { int err = ipmr_get_route(net, skb, fl4->saddr, fl4->daddr, r, portid); |