diff options
author | Jakub Kicinski <kuba@kernel.org> | 2021-11-01 19:57:16 -0700 |
---|---|---|
committer | Jakub Kicinski <kuba@kernel.org> | 2021-11-01 19:57:17 -0700 |
commit | 52fa3ee0cce60a04739f4a5ca1c9d5c2a8ee1578 (patch) | |
tree | e51b800c5edc33c5fa13211680581bcc385c10a1 | |
parent | 1d6d336fed6b283a16594345a459b6e3bba3761a (diff) | |
parent | f86ca07eb5310a1bdc7032458c7f76862f5a1552 (diff) | |
download | lwn-52fa3ee0cce60a04739f4a5ca1c9d5c2a8ee1578.tar.gz lwn-52fa3ee0cce60a04739f4a5ca1c9d5c2a8ee1578.zip |
Merge branch 'make-neighbor-eviction-controllable-by-userspace'
James Prestwood says:
====================
Make neighbor eviction controllable by userspace
====================
Link: https://lore.kernel.org/r/20211101173630.300969-1-prestwoj@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
-rw-r--r-- | Documentation/networking/ip-sysctl.rst | 18 | ||||
-rw-r--r-- | include/linux/inetdevice.h | 2 | ||||
-rw-r--r-- | include/linux/ipv6.h | 1 | ||||
-rw-r--r-- | include/uapi/linux/ip.h | 1 | ||||
-rw-r--r-- | include/uapi/linux/ipv6.h | 1 | ||||
-rw-r--r-- | include/uapi/linux/sysctl.h | 1 | ||||
-rw-r--r-- | net/ipv4/arp.c | 11 | ||||
-rw-r--r-- | net/ipv4/devinet.c | 4 | ||||
-rw-r--r-- | net/ipv6/addrconf.c | 12 | ||||
-rw-r--r-- | net/ipv6/ndisc.c | 12 | ||||
-rwxr-xr-x | tools/testing/selftests/net/arp_ndisc_evict_nocarrier.sh | 220 |
11 files changed, 281 insertions, 2 deletions
diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst index 16b8bf72feaf..c61cc0219f4c 100644 --- a/Documentation/networking/ip-sysctl.rst +++ b/Documentation/networking/ip-sysctl.rst @@ -1611,6 +1611,15 @@ arp_accept - BOOLEAN gratuitous arp frame, the arp table will be updated regardless if this setting is on or off. +arp_evict_nocarrier - BOOLEAN + Clears the ARP cache on NOCARRIER events. This option is important for + wireless devices where the ARP cache should not be cleared when roaming + between access points on the same network. In most cases this should + remain as the default (1). + + - 1 - (default): Clear the ARP cache on NOCARRIER events + - 0 - Do not clear ARP cache on NOCARRIER events + mcast_solicit - INTEGER The maximum number of multicast probes in INCOMPLETE state, when the associated hardware address is unknown. Defaults @@ -2341,6 +2350,15 @@ ndisc_tclass - INTEGER * 0 - (default) +ndisc_evict_nocarrier - BOOLEAN + Clears the neighbor discovery table on NOCARRIER events. This option is + important for wireless devices where the neighbor discovery cache should + not be cleared when roaming between access points on the same network. + In most cases this should remain as the default (1). + + - 1 - (default): Clear neighbor discover cache on NOCARRIER events. + - 0 - Do not clear neighbor discovery cache on NOCARRIER events. + mldv1_unsolicited_report_interval - INTEGER The interval in milliseconds in which the next unsolicited MLDv1 report retransmit will take place. diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h index a038feb63f23..518b484a7f07 100644 --- a/include/linux/inetdevice.h +++ b/include/linux/inetdevice.h @@ -133,6 +133,8 @@ static inline void ipv4_devconf_setall(struct in_device *in_dev) #define IN_DEV_ARP_ANNOUNCE(in_dev) IN_DEV_MAXCONF((in_dev), ARP_ANNOUNCE) #define IN_DEV_ARP_IGNORE(in_dev) IN_DEV_MAXCONF((in_dev), ARP_IGNORE) #define IN_DEV_ARP_NOTIFY(in_dev) IN_DEV_MAXCONF((in_dev), ARP_NOTIFY) +#define IN_DEV_ARP_EVICT_NOCARRIER(in_dev) IN_DEV_ANDCONF((in_dev), \ + ARP_EVICT_NOCARRIER) struct in_ifaddr { struct hlist_node hash; diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index c383630d3f06..20c1f968da7c 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -79,6 +79,7 @@ struct ipv6_devconf { __u32 ioam6_id; __u32 ioam6_id_wide; __u8 ioam6_enabled; + __u8 ndisc_evict_nocarrier; struct ctl_table_header *sysctl_header; }; diff --git a/include/uapi/linux/ip.h b/include/uapi/linux/ip.h index e42d13b55cf3..e00bbb9c47bb 100644 --- a/include/uapi/linux/ip.h +++ b/include/uapi/linux/ip.h @@ -169,6 +169,7 @@ enum IPV4_DEVCONF_DROP_UNICAST_IN_L2_MULTICAST, IPV4_DEVCONF_DROP_GRATUITOUS_ARP, IPV4_DEVCONF_BC_FORWARDING, + IPV4_DEVCONF_ARP_EVICT_NOCARRIER, __IPV4_DEVCONF_MAX }; diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h index b243a53fa985..d4178dace0bf 100644 --- a/include/uapi/linux/ipv6.h +++ b/include/uapi/linux/ipv6.h @@ -193,6 +193,7 @@ enum { DEVCONF_IOAM6_ENABLED, DEVCONF_IOAM6_ID, DEVCONF_IOAM6_ID_WIDE, + DEVCONF_NDISC_EVICT_NOCARRIER, DEVCONF_MAX }; diff --git a/include/uapi/linux/sysctl.h b/include/uapi/linux/sysctl.h index 1e05d3caa712..6a3b194c50fe 100644 --- a/include/uapi/linux/sysctl.h +++ b/include/uapi/linux/sysctl.h @@ -482,6 +482,7 @@ enum NET_IPV4_CONF_PROMOTE_SECONDARIES=20, NET_IPV4_CONF_ARP_ACCEPT=21, NET_IPV4_CONF_ARP_NOTIFY=22, + NET_IPV4_CONF_ARP_EVICT_NOCARRIER=23, }; /* /proc/sys/net/ipv4/netfilter */ diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 922dd73e5740..857a144b1ea9 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -1247,6 +1247,8 @@ static int arp_netdev_event(struct notifier_block *this, unsigned long event, { struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct netdev_notifier_change_info *change_info; + struct in_device *in_dev; + bool evict_nocarrier; switch (event) { case NETDEV_CHANGEADDR: @@ -1257,7 +1259,14 @@ static int arp_netdev_event(struct notifier_block *this, unsigned long event, change_info = ptr; if (change_info->flags_changed & IFF_NOARP) neigh_changeaddr(&arp_tbl, dev); - if (!netif_carrier_ok(dev)) + + in_dev = __in_dev_get_rtnl(dev); + if (!in_dev) + evict_nocarrier = true; + else + evict_nocarrier = IN_DEV_ARP_EVICT_NOCARRIER(in_dev); + + if (evict_nocarrier && !netif_carrier_ok(dev)) neigh_carrier_down(&arp_tbl, dev); break; default: diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index f4468980b675..ec73a0d52d3e 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -75,6 +75,7 @@ static struct ipv4_devconf ipv4_devconf = { [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1, [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/, [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] = 1000 /*ms*/, + [IPV4_DEVCONF_ARP_EVICT_NOCARRIER - 1] = 1, }, }; @@ -87,6 +88,7 @@ static struct ipv4_devconf ipv4_devconf_dflt = { [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1, [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/, [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] = 1000 /*ms*/, + [IPV4_DEVCONF_ARP_EVICT_NOCARRIER - 1] = 1, }, }; @@ -2532,6 +2534,8 @@ static struct devinet_sysctl_table { DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"), DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"), DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"), + DEVINET_SYSCTL_RW_ENTRY(ARP_EVICT_NOCARRIER, + "arp_evict_nocarrier"), DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"), DEVINET_SYSCTL_RW_ENTRY(FORCE_IGMP_VERSION, "force_igmp_version"), diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 9e1463a2acae..3445f8017430 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -241,6 +241,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = { .ioam6_enabled = 0, .ioam6_id = IOAM6_DEFAULT_IF_ID, .ioam6_id_wide = IOAM6_DEFAULT_IF_ID_WIDE, + .ndisc_evict_nocarrier = 1, }; static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { @@ -300,6 +301,7 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { .ioam6_enabled = 0, .ioam6_id = IOAM6_DEFAULT_IF_ID, .ioam6_id_wide = IOAM6_DEFAULT_IF_ID_WIDE, + .ndisc_evict_nocarrier = 1, }; /* Check if link is ready: is it up and is a valid qdisc available */ @@ -5545,6 +5547,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf, array[DEVCONF_IOAM6_ENABLED] = cnf->ioam6_enabled; array[DEVCONF_IOAM6_ID] = cnf->ioam6_id; array[DEVCONF_IOAM6_ID_WIDE] = cnf->ioam6_id_wide; + array[DEVCONF_NDISC_EVICT_NOCARRIER] = cnf->ndisc_evict_nocarrier; } static inline size_t inet6_ifla6_size(void) @@ -6987,6 +6990,15 @@ static const struct ctl_table addrconf_sysctl[] = { .proc_handler = proc_douintvec, }, { + .procname = "ndisc_evict_nocarrier", + .data = &ipv6_devconf.ndisc_evict_nocarrier, + .maxlen = sizeof(u8), + .mode = 0644, + .proc_handler = proc_dou8vec_minmax, + .extra1 = (void *)SYSCTL_ZERO, + .extra2 = (void *)SYSCTL_ONE, + }, + { /* sentinel */ } }; diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 184190b9ea25..f03b597e4121 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1794,6 +1794,7 @@ static int ndisc_netdev_event(struct notifier_block *this, unsigned long event, struct netdev_notifier_change_info *change_info; struct net *net = dev_net(dev); struct inet6_dev *idev; + bool evict_nocarrier; switch (event) { case NETDEV_CHANGEADDR: @@ -1810,10 +1811,19 @@ static int ndisc_netdev_event(struct notifier_block *this, unsigned long event, in6_dev_put(idev); break; case NETDEV_CHANGE: + idev = in6_dev_get(dev); + if (!idev) + evict_nocarrier = true; + else { + evict_nocarrier = idev->cnf.ndisc_evict_nocarrier && + net->ipv6.devconf_all->ndisc_evict_nocarrier; + in6_dev_put(idev); + } + change_info = ptr; if (change_info->flags_changed & IFF_NOARP) neigh_changeaddr(&nd_tbl, dev); - if (!netif_carrier_ok(dev)) + if (evict_nocarrier && !netif_carrier_ok(dev)) neigh_carrier_down(&nd_tbl, dev); break; case NETDEV_DOWN: diff --git a/tools/testing/selftests/net/arp_ndisc_evict_nocarrier.sh b/tools/testing/selftests/net/arp_ndisc_evict_nocarrier.sh new file mode 100755 index 000000000000..b5af08af8559 --- /dev/null +++ b/tools/testing/selftests/net/arp_ndisc_evict_nocarrier.sh @@ -0,0 +1,220 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Tests sysctl options {arp,ndisc}_evict_nocarrier={0,1} +# +# Create a veth pair and set IPs/routes on both. Then ping to establish +# an entry in the ARP/ND table. Depending on the test set sysctl option to +# 1 or 0. Set remote veth down which will cause local veth to go into a no +# carrier state. Depending on the test check the ARP/ND table: +# +# {arp,ndisc}_evict_nocarrier=1 should contain no ARP/ND after no carrier +# {arp,ndisc}_evict_nocarrer=0 should still contain the single ARP/ND entry +# + +readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)" +readonly V4_ADDR0=10.0.10.1 +readonly V4_ADDR1=10.0.10.2 +readonly V6_ADDR0=2001:db8:91::1 +readonly V6_ADDR1=2001:db8:91::2 +nsid=100 + +cleanup_v6() +{ + ip netns del me + ip netns del peer + + sysctl -w net.ipv4.conf.veth0.ndisc_evict_nocarrier=1 >/dev/null 2>&1 + sysctl -w net.ipv4.conf.all.ndisc_evict_nocarrier=1 >/dev/null 2>&1 +} + +create_ns() +{ + local n=${1} + + ip netns del ${n} 2>/dev/null + + ip netns add ${n} + ip netns set ${n} $((nsid++)) + ip -netns ${n} link set lo up +} + + +setup_v6() { + create_ns me + create_ns peer + + IP="ip -netns me" + + $IP li add veth1 type veth peer name veth2 + $IP li set veth1 up + $IP -6 addr add $V6_ADDR0/64 dev veth1 nodad + $IP li set veth2 netns peer up + ip -netns peer -6 addr add $V6_ADDR1/64 dev veth2 nodad + + ip netns exec me sysctl -w $1 >/dev/null 2>&1 + + # Establish an ND cache entry + ip netns exec me ping -6 -c1 -Iveth1 $V6_ADDR1 >/dev/null 2>&1 + # Should have the veth1 entry in ND table + ip netns exec me ip -6 neigh get $V6_ADDR1 dev veth1 >/dev/null 2>&1 + if [ $? -ne 0 ]; then + cleanup_v6 + echo "failed" + exit + fi + + # Set veth2 down, which will put veth1 in NOCARRIER state + ip netns exec peer ip link set veth2 down +} + +setup_v4() { + ip netns add "${PEER_NS}" + ip link add name veth0 type veth peer name veth1 + ip link set dev veth0 up + ip link set dev veth1 netns "${PEER_NS}" + ip netns exec "${PEER_NS}" ip link set dev veth1 up + ip addr add $V4_ADDR0/24 dev veth0 + ip netns exec "${PEER_NS}" ip addr add $V4_ADDR1/24 dev veth1 + ip netns exec ${PEER_NS} ip route add default via $V4_ADDR1 dev veth1 + ip route add default via $V4_ADDR0 dev veth0 + + sysctl -w "$1" >/dev/null 2>&1 + + # Establish an ARP cache entry + ping -c1 -I veth0 $V4_ADDR1 -q >/dev/null 2>&1 + # Should have the veth1 entry in ARP table + ip neigh get $V4_ADDR1 dev veth0 >/dev/null 2>&1 + if [ $? -ne 0 ]; then + cleanup_v4 + echo "failed" + exit + fi + + # Set veth1 down, which will put veth0 in NOCARRIER state + ip netns exec "${PEER_NS}" ip link set veth1 down +} + +cleanup_v4() { + ip neigh flush dev veth0 + ip link del veth0 + local -r ns="$(ip netns list|grep $PEER_NS)" + [ -n "$ns" ] && ip netns del $ns 2>/dev/null + + sysctl -w net.ipv4.conf.veth0.arp_evict_nocarrier=1 >/dev/null 2>&1 + sysctl -w net.ipv4.conf.all.arp_evict_nocarrier=1 >/dev/null 2>&1 +} + +# Run test when arp_evict_nocarrier = 1 (default). +run_arp_evict_nocarrier_enabled() { + echo "run arp_evict_nocarrier=1 test" + setup_v4 "net.ipv4.conf.veth0.arp_evict_nocarrier=1" + + # ARP table should be empty + ip neigh get $V4_ADDR1 dev veth0 >/dev/null 2>&1 + + if [ $? -eq 0 ];then + echo "failed" + else + echo "ok" + fi + + cleanup_v4 +} + +# Run test when arp_evict_nocarrier = 0 +run_arp_evict_nocarrier_disabled() { + echo "run arp_evict_nocarrier=0 test" + setup_v4 "net.ipv4.conf.veth0.arp_evict_nocarrier=0" + + # ARP table should still contain the entry + ip neigh get $V4_ADDR1 dev veth0 >/dev/null 2>&1 + + if [ $? -eq 0 ];then + echo "ok" + else + echo "failed" + fi + + cleanup_v4 +} + +run_arp_evict_nocarrier_disabled_all() { + echo "run all.arp_evict_nocarrier=0 test" + setup_v4 "net.ipv4.conf.all.arp_evict_nocarrier=0" + + # ARP table should still contain the entry + ip neigh get $V4_ADDR1 dev veth0 >/dev/null 2>&1 + + if [ $? -eq 0 ];then + echo "ok" + else + echo "failed" + fi + + cleanup_v4 +} + +run_ndisc_evict_nocarrier_enabled() { + echo "run ndisc_evict_nocarrier=1 test" + + setup_v6 "net.ipv6.conf.veth1.ndisc_evict_nocarrier=1" + + ip netns exec me ip -6 neigh get $V6_ADDR1 dev veth1 >/dev/null 2>&1 + + if [ $? -eq 0 ];then + echo "failed" + else + echo "ok" + fi + + cleanup_v6 +} + +run_ndisc_evict_nocarrier_disabled() { + echo "run ndisc_evict_nocarrier=0 test" + + setup_v6 "net.ipv6.conf.veth1.ndisc_evict_nocarrier=0" + + ip netns exec me ip -6 neigh get $V6_ADDR1 dev veth1 >/dev/null 2>&1 + + if [ $? -eq 0 ];then + echo "ok" + else + echo "failed" + fi + + cleanup_v6 +} + +run_ndisc_evict_nocarrier_disabled_all() { + echo "run all.ndisc_evict_nocarrier=0 test" + + setup_v6 "net.ipv6.conf.all.ndisc_evict_nocarrier=0" + + ip netns exec me ip -6 neigh get $V6_ADDR1 dev veth1 >/dev/null 2>&1 + + if [ $? -eq 0 ];then + echo "ok" + else + echo "failed" + fi + + cleanup_v6 +} + +run_all_tests() { + run_arp_evict_nocarrier_enabled + run_arp_evict_nocarrier_disabled + run_arp_evict_nocarrier_disabled_all + run_ndisc_evict_nocarrier_enabled + run_ndisc_evict_nocarrier_disabled + run_ndisc_evict_nocarrier_disabled_all +} + +if [ "$(id -u)" -ne 0 ];then + echo "SKIP: Need root privileges" + exit $ksft_skip; +fi + +run_all_tests |