summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2021-03-12 17:44:10 -0800
committerDavid S. Miller <davem@davemloft.net>2021-03-12 17:44:10 -0800
commite9e90a70cc2d5690b84c7ca2f604e09a85222bb1 (patch)
treea55ccc44034dadb11e3750a54d990f0eb7f961cd
parentb202923d3a93296c2e2627eee0222dfef3606777 (diff)
parentb8a07c4cea04c14008f34880424852d38ae03758 (diff)
downloadlwn-e9e90a70cc2d5690b84c7ca2f604e09a85222bb1.tar.gz
lwn-e9e90a70cc2d5690b84c7ca2f604e09a85222bb1.zip
Merge branch 'resil-nhgroups-netdevsim-selftests'
Petr Machata says: ==================== net: Resilient NH groups: netdevsim, selftests Support for resilient next-hop groups was added in a previous patch set. Resilient next hop groups add a layer of indirection between the SKB hash and the next hop. Thus the hash is used to reference a hash table bucket, which is then used to reference a particular next hop. This allows the system more flexibility when assigning SKB hash space to next hops. Previously, each next hop had to be assigned a continuous range of SKB hash space. With a hash table as an intermediate layer, it is possible to reassign next hops with a hash table bucket granularity. In turn, this mends issues with traffic flow redirection resulting from next hop removal or adjustments in next-hop weights. This patch set introduces mock offloading of resilient next hop groups by the netdevsim driver, and a suite of selftests. - Patch #1 adds a netdevsim-specific lock to protect next-hop hashtable. Previously, netdevsim relied on RTNL to maintain mutual exclusion. Patch #2 extracts a helper to make the following patches clearer. - Patch #3 implements the support for offloading of resilient next-hop groups. - Patch #4 introduces a new debugfs interface to set activity on a selected next-hop bucket. This simulates how HW can periodically report bucket activity, and buckets thus marked are expected to be exempt from migration to new next hops when the group changes. - Patches #5 and #6 clean up the fib_nexthop selftests. - Patches #7, #8 and #9 add tests for resilient next hop groups. Patch #7 adds resilient-hashing counterparts to fib_nexthops.sh. Patch #8 adds a new traffic test for resilient next-hop groups. Patch #9 adds a new traffic test for tunneling. - Patch #10 actually leverages the netdevsim offload to implement a suite of algorithmic tests that verify how and when buckets are migrated under various simulated workload scenarios. The overall plan is to contribute approximately the following patchsets: 1) Nexthop policy refactoring (already pushed) 2) Preparations for resilient next hop groups (already pushed) 3) Implementation of resilient next hop group (already pushed) 4) Netdevsim offload plus a suite of selftests (this patchset) 5) Preparations for mlxsw offload of resilient next-hop groups 6) mlxsw offload including selftests Interested parties can look at the complete code at [2]. [1] https://tools.ietf.org/html/rfc2992 [2] https://github.com/idosch/linux/commits/submit/res_integ_v1 ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--drivers/net/netdevsim/fib.c139
-rwxr-xr-xtools/testing/selftests/drivers/net/netdevsim/nexthop.sh620
-rwxr-xr-xtools/testing/selftests/net/fib_nexthops.sh549
-rwxr-xr-xtools/testing/selftests/net/forwarding/gre_multipath_nh_res.sh361
-rwxr-xr-xtools/testing/selftests/net/forwarding/router_mpath_nh_res.sh400
5 files changed, 2059 insertions, 10 deletions
diff --git a/drivers/net/netdevsim/fib.c b/drivers/net/netdevsim/fib.c
index 3ca0f54d0c3b..fda6f37e7055 100644
--- a/drivers/net/netdevsim/fib.c
+++ b/drivers/net/netdevsim/fib.c
@@ -14,6 +14,7 @@
* THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
*/
+#include <linux/bitmap.h>
#include <linux/in6.h>
#include <linux/kernel.h>
#include <linux/list.h>
@@ -47,15 +48,18 @@ struct nsim_fib_data {
struct nsim_fib_entry nexthops;
struct rhashtable fib_rt_ht;
struct list_head fib_rt_list;
- struct mutex fib_lock; /* Protects hashtable and list */
+ struct mutex fib_lock; /* Protects FIB HT and list */
struct notifier_block nexthop_nb;
struct rhashtable nexthop_ht;
struct devlink *devlink;
struct work_struct fib_event_work;
struct list_head fib_event_queue;
spinlock_t fib_event_queue_lock; /* Protects fib event queue list */
+ struct mutex nh_lock; /* Protects NH HT */
struct dentry *ddir;
bool fail_route_offload;
+ bool fail_res_nexthop_group_replace;
+ bool fail_nexthop_bucket_replace;
};
struct nsim_fib_rt_key {
@@ -116,6 +120,7 @@ struct nsim_nexthop {
struct rhash_head ht_node;
u64 occ;
u32 id;
+ bool is_resilient;
};
static const struct rhashtable_params nsim_nexthop_ht_params = {
@@ -1114,6 +1119,10 @@ static struct nsim_nexthop *nsim_nexthop_create(struct nsim_fib_data *data,
for (i = 0; i < info->nh_grp->num_nh; i++)
occ += info->nh_grp->nh_entries[i].weight;
break;
+ case NH_NOTIFIER_INFO_TYPE_RES_TABLE:
+ occ = info->nh_res_table->num_nh_buckets;
+ nexthop->is_resilient = true;
+ break;
default:
NL_SET_ERR_MSG_MOD(info->extack, "Unsupported nexthop type");
kfree(nexthop);
@@ -1156,6 +1165,21 @@ err_num_decrease:
}
+static void nsim_nexthop_hw_flags_set(struct net *net,
+ const struct nsim_nexthop *nexthop,
+ bool trap)
+{
+ int i;
+
+ nexthop_set_hw_flags(net, nexthop->id, false, trap);
+
+ if (!nexthop->is_resilient)
+ return;
+
+ for (i = 0; i < nexthop->occ; i++)
+ nexthop_bucket_set_hw_flags(net, nexthop->id, i, false, trap);
+}
+
static int nsim_nexthop_add(struct nsim_fib_data *data,
struct nsim_nexthop *nexthop,
struct netlink_ext_ack *extack)
@@ -1174,7 +1198,7 @@ static int nsim_nexthop_add(struct nsim_fib_data *data,
goto err_nexthop_dismiss;
}
- nexthop_set_hw_flags(net, nexthop->id, false, true);
+ nsim_nexthop_hw_flags_set(net, nexthop, true);
return 0;
@@ -1203,7 +1227,7 @@ static int nsim_nexthop_replace(struct nsim_fib_data *data,
goto err_nexthop_dismiss;
}
- nexthop_set_hw_flags(net, nexthop->id, false, true);
+ nsim_nexthop_hw_flags_set(net, nexthop, true);
nsim_nexthop_account(data, nexthop_old->occ, false, extack);
nsim_nexthop_destroy(nexthop_old);
@@ -1254,6 +1278,32 @@ static void nsim_nexthop_remove(struct nsim_fib_data *data,
nsim_nexthop_destroy(nexthop);
}
+static int nsim_nexthop_res_table_pre_replace(struct nsim_fib_data *data,
+ struct nh_notifier_info *info)
+{
+ if (data->fail_res_nexthop_group_replace) {
+ NL_SET_ERR_MSG_MOD(info->extack, "Failed to replace a resilient nexthop group");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int nsim_nexthop_bucket_replace(struct nsim_fib_data *data,
+ struct nh_notifier_info *info)
+{
+ if (data->fail_nexthop_bucket_replace) {
+ NL_SET_ERR_MSG_MOD(info->extack, "Failed to replace nexthop bucket");
+ return -EINVAL;
+ }
+
+ nexthop_bucket_set_hw_flags(info->net, info->id,
+ info->nh_res_bucket->bucket_index,
+ false, true);
+
+ return 0;
+}
+
static int nsim_nexthop_event_nb(struct notifier_block *nb, unsigned long event,
void *ptr)
{
@@ -1262,8 +1312,7 @@ static int nsim_nexthop_event_nb(struct notifier_block *nb, unsigned long event,
struct nh_notifier_info *info = ptr;
int err = 0;
- ASSERT_RTNL();
-
+ mutex_lock(&data->nh_lock);
switch (event) {
case NEXTHOP_EVENT_REPLACE:
err = nsim_nexthop_insert(data, info);
@@ -1271,10 +1320,17 @@ static int nsim_nexthop_event_nb(struct notifier_block *nb, unsigned long event,
case NEXTHOP_EVENT_DEL:
nsim_nexthop_remove(data, info);
break;
+ case NEXTHOP_EVENT_RES_TABLE_PRE_REPLACE:
+ err = nsim_nexthop_res_table_pre_replace(data, info);
+ break;
+ case NEXTHOP_EVENT_BUCKET_REPLACE:
+ err = nsim_nexthop_bucket_replace(data, info);
+ break;
default:
break;
}
+ mutex_unlock(&data->nh_lock);
return notifier_from_errno(err);
}
@@ -1285,11 +1341,68 @@ static void nsim_nexthop_free(void *ptr, void *arg)
struct net *net;
net = devlink_net(data->devlink);
- nexthop_set_hw_flags(net, nexthop->id, false, false);
+ nsim_nexthop_hw_flags_set(net, nexthop, false);
nsim_nexthop_account(data, nexthop->occ, false, NULL);
nsim_nexthop_destroy(nexthop);
}
+static ssize_t nsim_nexthop_bucket_activity_write(struct file *file,
+ const char __user *user_buf,
+ size_t size, loff_t *ppos)
+{
+ struct nsim_fib_data *data = file->private_data;
+ struct net *net = devlink_net(data->devlink);
+ struct nsim_nexthop *nexthop;
+ unsigned long *activity;
+ loff_t pos = *ppos;
+ u16 bucket_index;
+ char buf[128];
+ int err = 0;
+ u32 nhid;
+
+ if (pos != 0)
+ return -EINVAL;
+ if (size > sizeof(buf))
+ return -EINVAL;
+ if (copy_from_user(buf, user_buf, size))
+ return -EFAULT;
+ if (sscanf(buf, "%u %hu", &nhid, &bucket_index) != 2)
+ return -EINVAL;
+
+ rtnl_lock();
+
+ nexthop = rhashtable_lookup_fast(&data->nexthop_ht, &nhid,
+ nsim_nexthop_ht_params);
+ if (!nexthop || !nexthop->is_resilient ||
+ bucket_index >= nexthop->occ) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ activity = bitmap_zalloc(nexthop->occ, GFP_KERNEL);
+ if (!activity) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ bitmap_set(activity, bucket_index, 1);
+ nexthop_res_grp_activity_update(net, nhid, nexthop->occ, activity);
+ bitmap_free(activity);
+
+out:
+ rtnl_unlock();
+
+ *ppos = size;
+ return err ?: size;
+}
+
+static const struct file_operations nsim_nexthop_bucket_activity_fops = {
+ .open = simple_open,
+ .write = nsim_nexthop_bucket_activity_write,
+ .llseek = no_llseek,
+ .owner = THIS_MODULE,
+};
+
static u64 nsim_fib_ipv4_resource_occ_get(void *priv)
{
struct nsim_fib_data *data = priv;
@@ -1379,6 +1492,17 @@ nsim_fib_debugfs_init(struct nsim_fib_data *data, struct nsim_dev *nsim_dev)
data->fail_route_offload = false;
debugfs_create_bool("fail_route_offload", 0600, data->ddir,
&data->fail_route_offload);
+
+ data->fail_res_nexthop_group_replace = false;
+ debugfs_create_bool("fail_res_nexthop_group_replace", 0600, data->ddir,
+ &data->fail_res_nexthop_group_replace);
+
+ data->fail_nexthop_bucket_replace = false;
+ debugfs_create_bool("fail_nexthop_bucket_replace", 0600, data->ddir,
+ &data->fail_nexthop_bucket_replace);
+
+ debugfs_create_file("nexthop_bucket_activity", 0200, data->ddir,
+ data, &nsim_nexthop_bucket_activity_fops);
return 0;
}
@@ -1404,6 +1528,7 @@ struct nsim_fib_data *nsim_fib_create(struct devlink *devlink,
if (err)
goto err_data_free;
+ mutex_init(&data->nh_lock);
err = rhashtable_init(&data->nexthop_ht, &nsim_nexthop_ht_params);
if (err)
goto err_debugfs_exit;
@@ -1469,6 +1594,7 @@ err_rhashtable_nexthop_destroy:
data);
mutex_destroy(&data->fib_lock);
err_debugfs_exit:
+ mutex_destroy(&data->nh_lock);
nsim_fib_debugfs_exit(data);
err_data_free:
kfree(data);
@@ -1497,6 +1623,7 @@ void nsim_fib_destroy(struct devlink *devlink, struct nsim_fib_data *data)
WARN_ON_ONCE(!list_empty(&data->fib_event_queue));
WARN_ON_ONCE(!list_empty(&data->fib_rt_list));
mutex_destroy(&data->fib_lock);
+ mutex_destroy(&data->nh_lock);
nsim_fib_debugfs_exit(data);
kfree(data);
}
diff --git a/tools/testing/selftests/drivers/net/netdevsim/nexthop.sh b/tools/testing/selftests/drivers/net/netdevsim/nexthop.sh
index be0c1b5ee6b8..ba75c81cda91 100755
--- a/tools/testing/selftests/drivers/net/netdevsim/nexthop.sh
+++ b/tools/testing/selftests/drivers/net/netdevsim/nexthop.sh
@@ -11,14 +11,33 @@ ALL_TESTS="
nexthop_single_add_err_test
nexthop_group_add_test
nexthop_group_add_err_test
+ nexthop_res_group_add_test
+ nexthop_res_group_add_err_test
nexthop_group_replace_test
nexthop_group_replace_err_test
+ nexthop_res_group_replace_test
+ nexthop_res_group_replace_err_test
+ nexthop_res_group_idle_timer_test
+ nexthop_res_group_idle_timer_del_test
+ nexthop_res_group_increase_idle_timer_test
+ nexthop_res_group_decrease_idle_timer_test
+ nexthop_res_group_unbalanced_timer_test
+ nexthop_res_group_unbalanced_timer_del_test
+ nexthop_res_group_no_unbalanced_timer_test
+ nexthop_res_group_short_unbalanced_timer_test
+ nexthop_res_group_increase_unbalanced_timer_test
+ nexthop_res_group_decrease_unbalanced_timer_test
+ nexthop_res_group_force_migrate_busy_test
nexthop_single_replace_test
nexthop_single_replace_err_test
nexthop_single_in_group_replace_test
nexthop_single_in_group_replace_err_test
+ nexthop_single_in_res_group_replace_test
+ nexthop_single_in_res_group_replace_err_test
nexthop_single_in_group_delete_test
nexthop_single_in_group_delete_err_test
+ nexthop_single_in_res_group_delete_test
+ nexthop_single_in_res_group_delete_err_test
nexthop_replay_test
nexthop_replay_err_test
"
@@ -27,6 +46,7 @@ DEV_ADDR=1337
DEV=netdevsim${DEV_ADDR}
DEVLINK_DEV=netdevsim/${DEV}
SYSFS_NET_DIR=/sys/bus/netdevsim/devices/$DEV/net/
+DEBUGFS_NET_DIR=/sys/kernel/debug/netdevsim/$DEV/
NUM_NETIFS=0
source $lib_dir/lib.sh
source $lib_dir/devlink_lib.sh
@@ -44,6 +64,28 @@ nexthop_check()
return 0
}
+nexthop_bucket_nhid_count_check()
+{
+ local group_id=$1; shift
+ local expected
+ local count
+ local nhid
+ local ret
+
+ while (($# > 0)); do
+ nhid=$1; shift
+ expected=$1; shift
+
+ count=$($IP nexthop bucket show id $group_id nhid $nhid |
+ grep "trap" | wc -l)
+ if ((expected != count)); then
+ return 1
+ fi
+ done
+
+ return 0
+}
+
nexthop_resource_check()
{
local expected_occ=$1; shift
@@ -159,6 +201,71 @@ nexthop_group_add_err_test()
nexthop_resource_set 9999
}
+nexthop_res_group_add_test()
+{
+ RET=0
+
+ $IP nexthop add id 1 via 192.0.2.2 dev dummy1
+ $IP nexthop add id 2 via 192.0.2.3 dev dummy1
+
+ $IP nexthop add id 10 group 1/2 type resilient buckets 4
+ nexthop_check "id 10" "id 10 group 1/2 type resilient buckets 4 idle_timer 120 unbalanced_timer 0 unbalanced_time 0 trap"
+ check_err $? "Unexpected nexthop group entry"
+
+ nexthop_bucket_nhid_count_check 10 1 2
+ check_err $? "Wrong nexthop buckets count"
+ nexthop_bucket_nhid_count_check 10 2 2
+ check_err $? "Wrong nexthop buckets count"
+
+ nexthop_resource_check 6
+ check_err $? "Wrong nexthop occupancy"
+
+ $IP nexthop del id 10
+ nexthop_resource_check 2
+ check_err $? "Wrong nexthop occupancy after delete"
+
+ $IP nexthop add id 10 group 1,3/2,2 type resilient buckets 5
+ nexthop_check "id 10" "id 10 group 1,3/2,2 type resilient buckets 5 idle_timer 120 unbalanced_timer 0 unbalanced_time 0 trap"
+ check_err $? "Unexpected weighted nexthop group entry"
+
+ nexthop_bucket_nhid_count_check 10 1 3
+ check_err $? "Wrong nexthop buckets count"
+ nexthop_bucket_nhid_count_check 10 2 2
+ check_err $? "Wrong nexthop buckets count"
+
+ nexthop_resource_check 7
+ check_err $? "Wrong weighted nexthop occupancy"
+
+ $IP nexthop del id 10
+ nexthop_resource_check 2
+ check_err $? "Wrong nexthop occupancy after delete"
+
+ log_test "Resilient nexthop group add and delete"
+
+ $IP nexthop flush &> /dev/null
+}
+
+nexthop_res_group_add_err_test()
+{
+ RET=0
+
+ nexthop_resource_set 2
+
+ $IP nexthop add id 1 via 192.0.2.2 dev dummy1
+ $IP nexthop add id 2 via 192.0.2.3 dev dummy1
+
+ $IP nexthop add id 10 group 1/2 type resilient buckets 4 &> /dev/null
+ check_fail $? "Nexthop group addition succeeded when should fail"
+
+ nexthop_resource_check 2
+ check_err $? "Wrong nexthop occupancy"
+
+ log_test "Resilient nexthop group add failure"
+
+ $IP nexthop flush &> /dev/null
+ nexthop_resource_set 9999
+}
+
nexthop_group_replace_test()
{
RET=0
@@ -206,6 +313,411 @@ nexthop_group_replace_err_test()
nexthop_resource_set 9999
}
+nexthop_res_group_replace_test()
+{
+ RET=0
+
+ $IP nexthop add id 1 via 192.0.2.2 dev dummy1
+ $IP nexthop add id 2 via 192.0.2.3 dev dummy1
+ $IP nexthop add id 3 via 192.0.2.4 dev dummy1
+ $IP nexthop add id 10 group 1/2 type resilient buckets 6
+
+ $IP nexthop replace id 10 group 1/2/3 type resilient
+ nexthop_check "id 10" "id 10 group 1/2/3 type resilient buckets 6 idle_timer 120 unbalanced_timer 0 unbalanced_time 0 trap"
+ check_err $? "Unexpected nexthop group entry"
+
+ nexthop_bucket_nhid_count_check 10 1 2
+ check_err $? "Wrong nexthop buckets count"
+ nexthop_bucket_nhid_count_check 10 2 2
+ check_err $? "Wrong nexthop buckets count"
+ nexthop_bucket_nhid_count_check 10 3 2
+ check_err $? "Wrong nexthop buckets count"
+
+ nexthop_resource_check 9
+ check_err $? "Wrong nexthop occupancy"
+
+ log_test "Resilient nexthop group replace"
+
+ $IP nexthop flush &> /dev/null
+}
+
+nexthop_res_group_replace_err_test()
+{
+ RET=0
+
+ $IP nexthop add id 1 via 192.0.2.2 dev dummy1
+ $IP nexthop add id 2 via 192.0.2.3 dev dummy1
+ $IP nexthop add id 3 via 192.0.2.4 dev dummy1
+ $IP nexthop add id 10 group 1/2 type resilient buckets 6
+
+ ip netns exec testns1 \
+ echo 1 > $DEBUGFS_NET_DIR/fib/fail_res_nexthop_group_replace
+ $IP nexthop replace id 10 group 1/2/3 type resilient &> /dev/null
+ check_fail $? "Nexthop group replacement succeeded when should fail"
+
+ nexthop_check "id 10" "id 10 group 1/2 type resilient buckets 6 idle_timer 120 unbalanced_timer 0 unbalanced_time 0 trap"
+ check_err $? "Unexpected nexthop group entry after failure"
+
+ nexthop_bucket_nhid_count_check 10 1 3
+ check_err $? "Wrong nexthop buckets count"
+ nexthop_bucket_nhid_count_check 10 2 3
+ check_err $? "Wrong nexthop buckets count"
+
+ nexthop_resource_check 9
+ check_err $? "Wrong nexthop occupancy after failure"
+
+ log_test "Resilient nexthop group replace failure"
+
+ $IP nexthop flush &> /dev/null
+ ip netns exec testns1 \
+ echo 0 > $DEBUGFS_NET_DIR/fib/fail_res_nexthop_group_replace
+}
+
+nexthop_res_mark_buckets_busy()
+{
+ local group_id=$1; shift
+ local nhid=$1; shift
+ local count=$1; shift
+ local index
+
+ for index in $($IP -j nexthop bucket show id $group_id nhid $nhid |
+ jq '.[].bucket.index' | head -n ${count:--0})
+ do
+ echo $group_id $index \
+ > $DEBUGFS_NET_DIR/fib/nexthop_bucket_activity
+ done
+}
+
+nexthop_res_num_nhid_buckets()
+{
+ local group_id=$1; shift
+ local nhid=$1; shift
+
+ $IP -j nexthop bucket show id $group_id nhid $nhid | jq length
+}
+
+nexthop_res_group_idle_timer_test()
+{
+ $IP nexthop add id 1 via 192.0.2.2 dev dummy1
+ $IP nexthop add id 2 via 192.0.2.3 dev dummy1
+
+ RET=0
+
+ $IP nexthop add id 10 group 1/2 type resilient buckets 8 idle_timer 4
+ nexthop_res_mark_buckets_busy 10 1
+ $IP nexthop replace id 10 group 1/2,3 type resilient
+
+ nexthop_bucket_nhid_count_check 10 1 4 2 4
+ check_err $? "Group expected to be unbalanced"
+
+ sleep 6
+
+ nexthop_bucket_nhid_count_check 10 1 2 2 6
+ check_err $? "Group expected to be balanced"
+
+ log_test "Bucket migration after idle timer"
+
+ $IP nexthop flush &> /dev/null
+}
+
+nexthop_res_group_idle_timer_del_test()
+{
+ $IP nexthop add id 1 via 192.0.2.2 dev dummy1
+ $IP nexthop add id 2 via 192.0.2.3 dev dummy1
+ $IP nexthop add id 3 via 192.0.2.3 dev dummy1
+
+ RET=0
+
+ $IP nexthop add id 10 group 1,50/2,50/3,1 \
+ type resilient buckets 8 idle_timer 6
+ nexthop_res_mark_buckets_busy 10 1
+ $IP nexthop replace id 10 group 1,50/2,150/3,1 type resilient
+
+ nexthop_bucket_nhid_count_check 10 1 4 2 4 3 0
+ check_err $? "Group expected to be unbalanced"
+
+ sleep 4
+
+ # Deletion prompts group replacement. Check that the bucket timers
+ # are kept.
+ $IP nexthop delete id 3
+
+ nexthop_bucket_nhid_count_check 10 1 4 2 4
+ check_err $? "Group expected to still be unbalanced"
+
+ sleep 4
+
+ nexthop_bucket_nhid_count_check 10 1 2 2 6
+ check_err $? "Group expected to be balanced"
+
+ log_test "Bucket migration after idle timer (with delete)"
+
+ $IP nexthop flush &> /dev/null
+}
+
+__nexthop_res_group_increase_timer_test()
+{
+ local timer=$1; shift
+
+ $IP nexthop add id 1 via 192.0.2.2 dev dummy1
+ $IP nexthop add id 2 via 192.0.2.3 dev dummy1
+
+ RET=0
+
+ $IP nexthop add id 10 group 1/2 type resilient buckets 8 $timer 4
+ nexthop_res_mark_buckets_busy 10 1
+ $IP nexthop replace id 10 group 1/2,3 type resilient
+
+ nexthop_bucket_nhid_count_check 10 2 6
+ check_fail $? "Group expected to be unbalanced"
+
+ sleep 2
+ $IP nexthop replace id 10 group 1/2,3 type resilient $timer 8
+ sleep 4
+
+ # 6 seconds, past the original timer.
+ nexthop_bucket_nhid_count_check 10 2 6
+ check_fail $? "Group still expected to be unbalanced"
+
+ sleep 4
+
+ # 10 seconds, past the new timer.
+ nexthop_bucket_nhid_count_check 10 2 6
+ check_err $? "Group expected to be balanced"
+
+ log_test "Bucket migration after $timer increase"
+
+ $IP nexthop flush &> /dev/null
+}
+
+__nexthop_res_group_decrease_timer_test()
+{
+ local timer=$1; shift
+
+ $IP nexthop add id 1 via 192.0.2.2 dev dummy1
+ $IP nexthop add id 2 via 192.0.2.3 dev dummy1
+
+ RET=0
+
+ $IP nexthop add id 10 group 1/2 type resilient buckets 8 $timer 8
+ nexthop_res_mark_buckets_busy 10 1
+ $IP nexthop replace id 10 group 1/2,3 type resilient
+
+ nexthop_bucket_nhid_count_check 10 2 6
+ check_fail $? "Group expected to be unbalanced"
+
+ sleep 2
+ $IP nexthop replace id 10 group 1/2,3 type resilient $timer 4
+ sleep 4
+
+ # 6 seconds, past the new timer, before the old timer.
+ nexthop_bucket_nhid_count_check 10 2 6
+ check_err $? "Group expected to be balanced"
+
+ log_test "Bucket migration after $timer decrease"
+
+ $IP nexthop flush &> /dev/null
+}
+
+__nexthop_res_group_increase_timer_del_test()
+{
+ local timer=$1; shift
+
+ $IP nexthop add id 1 via 192.0.2.2 dev dummy1
+ $IP nexthop add id 2 via 192.0.2.3 dev dummy1
+ $IP nexthop add id 3 via 192.0.2.3 dev dummy1
+
+ RET=0
+
+ $IP nexthop add id 10 group 1,100/2,100/3,1 \
+ type resilient buckets 8 $timer 4
+ nexthop_res_mark_buckets_busy 10 1
+ $IP nexthop replace id 10 group 1,100/2,300/3,1 type resilient
+
+ nexthop_bucket_nhid_count_check 10 2 6
+ check_fail $? "Group expected to be unbalanced"
+
+ sleep 2
+ $IP nexthop replace id 10 group 1/2,3 type resilient $timer 8
+ sleep 4
+
+ # 6 seconds, past the original timer.
+ nexthop_bucket_nhid_count_check 10 2 6
+ check_fail $? "Group still expected to be unbalanced"
+
+ sleep 4
+
+ # 10 seconds, past the new timer.
+ nexthop_bucket_nhid_count_check 10 2 6
+ check_err $? "Group expected to be balanced"
+
+ log_test "Bucket migration after $timer increase"
+
+ $IP nexthop flush &> /dev/null
+}
+
+nexthop_res_group_increase_idle_timer_test()
+{
+ __nexthop_res_group_increase_timer_test idle_timer
+}
+
+nexthop_res_group_decrease_idle_timer_test()
+{
+ __nexthop_res_group_decrease_timer_test idle_timer
+}
+
+nexthop_res_group_unbalanced_timer_test()
+{
+ local i
+
+ $IP nexthop add id 1 via 192.0.2.2 dev dummy1
+ $IP nexthop add id 2 via 192.0.2.3 dev dummy1
+
+ RET=0
+
+ $IP nexthop add id 10 group 1/2 type resilient \
+ buckets 8 idle_timer 6 unbalanced_timer 10
+ nexthop_res_mark_buckets_busy 10 1
+ $IP nexthop replace id 10 group 1/2,3 type resilient
+
+ for i in 1 2; do
+ sleep 4
+ nexthop_bucket_nhid_count_check 10 1 4 2 4
+ check_err $? "$i: Group expected to be unbalanced"
+ nexthop_res_mark_buckets_busy 10 1
+ done
+
+ # 3 x sleep 4 > unbalanced timer 10
+ sleep 4
+ nexthop_bucket_nhid_count_check 10 1 2 2 6
+ check_err $? "Group expected to be balanced"
+
+ log_test "Bucket migration after unbalanced timer"
+
+ $IP nexthop flush &> /dev/null
+}
+
+nexthop_res_group_unbalanced_timer_del_test()
+{
+ local i
+
+ $IP nexthop add id 1 via 192.0.2.2 dev dummy1
+ $IP nexthop add id 2 via 192.0.2.3 dev dummy1
+ $IP nexthop add id 3 via 192.0.2.3 dev dummy1
+
+ RET=0
+
+ $IP nexthop add id 10 group 1,50/2,50/3,1 type resilient \
+ buckets 8 idle_timer 6 unbalanced_timer 10
+ nexthop_res_mark_buckets_busy 10 1
+ $IP nexthop replace id 10 group 1,50/2,150/3,1 type resilient
+
+ # Check that NH delete does not reset unbalanced time.
+ sleep 4
+ $IP nexthop delete id 3
+ nexthop_bucket_nhid_count_check 10 1 4 2 4
+ check_err $? "1: Group expected to be unbalanced"
+ nexthop_res_mark_buckets_busy 10 1
+
+ sleep 4
+ nexthop_bucket_nhid_count_check 10 1 4 2 4
+ check_err $? "2: Group expected to be unbalanced"
+ nexthop_res_mark_buckets_busy 10 1
+
+ # 3 x sleep 4 > unbalanced timer 10
+ sleep 4
+ nexthop_bucket_nhid_count_check 10 1 2 2 6
+ check_err $? "Group expected to be balanced"
+
+ log_test "Bucket migration after unbalanced timer (with delete)"
+
+ $IP nexthop flush &> /dev/null
+}
+
+nexthop_res_group_no_unbalanced_timer_test()
+{
+ local i
+
+ $IP nexthop add id 1 via 192.0.2.2 dev dummy1
+ $IP nexthop add id 2 via 192.0.2.3 dev dummy1
+
+ RET=0
+
+ $IP nexthop add id 10 group 1/2 type resilient buckets 8
+ nexthop_res_mark_buckets_busy 10 1
+ $IP nexthop replace id 10 group 1/2,3 type resilient
+
+ for i in $(seq 3); do
+ sleep 60
+ nexthop_bucket_nhid_count_check 10 2 6
+ check_fail $? "$i: Group expected to be unbalanced"
+ nexthop_res_mark_buckets_busy 10 1
+ done
+
+ log_test "Buckets never force-migrated without unbalanced timer"
+
+ $IP nexthop flush &> /dev/null
+}
+
+nexthop_res_group_short_unbalanced_timer_test()
+{
+ $IP nexthop add id 1 via 192.0.2.2 dev dummy1
+ $IP nexthop add id 2 via 192.0.2.3 dev dummy1
+
+ RET=0
+
+ $IP nexthop add id 10 group 1/2 type resilient \
+ buckets 8 idle_timer 120 unbalanced_timer 4
+ nexthop_res_mark_buckets_busy 10 1
+ $IP nexthop replace id 10 group 1/2,3 type resilient
+
+ nexthop_bucket_nhid_count_check 10 2 6
+ check_fail $? "Group expected to be unbalanced"
+
+ sleep 5
+
+ nexthop_bucket_nhid_count_check 10 2 6
+ check_err $? "Group expected to be balanced"
+
+ log_test "Bucket migration after unbalanced < idle timer"
+
+ $IP nexthop flush &> /dev/null
+}
+
+nexthop_res_group_increase_unbalanced_timer_test()
+{
+ __nexthop_res_group_increase_timer_test unbalanced_timer
+}
+
+nexthop_res_group_decrease_unbalanced_timer_test()
+{
+ __nexthop_res_group_decrease_timer_test unbalanced_timer
+}
+
+nexthop_res_group_force_migrate_busy_test()
+{
+ $IP nexthop add id 1 via 192.0.2.2 dev dummy1
+ $IP nexthop add id 2 via 192.0.2.3 dev dummy1
+
+ RET=0
+
+ $IP nexthop add id 10 group 1/2 type resilient \
+ buckets 8 idle_timer 120
+ nexthop_res_mark_buckets_busy 10 1
+ $IP nexthop replace id 10 group 1/2,3 type resilient
+
+ nexthop_bucket_nhid_count_check 10 2 6
+ check_fail $? "Group expected to be unbalanced"
+
+ $IP nexthop replace id 10 group 2 type resilient
+ nexthop_bucket_nhid_count_check 10 2 8
+ check_err $? "All buckets expected to have migrated"
+
+ log_test "Busy buckets force-migrated when NH removed"
+
+ $IP nexthop flush &> /dev/null
+}
+
nexthop_single_replace_test()
{
RET=0
@@ -299,6 +811,63 @@ nexthop_single_in_group_replace_err_test()
nexthop_resource_set 9999
}
+nexthop_single_in_res_group_replace_test()
+{
+ RET=0
+
+ $IP nexthop add id 1 via 192.0.2.2 dev dummy1
+ $IP nexthop add id 2 via 192.0.2.3 dev dummy1
+ $IP nexthop add id 10 group 1/2 type resilient buckets 4
+
+ $IP nexthop replace id 1 via 192.0.2.4 dev dummy1
+ check_err $? "Failed to replace nexthop when should not"
+
+ nexthop_check "id 10" "id 10 group 1/2 type resilient buckets 4 idle_timer 120 unbalanced_timer 0 unbalanced_time 0 trap"
+ check_err $? "Unexpected nexthop group entry"
+
+ nexthop_bucket_nhid_count_check 10 1 2 2 2
+ check_err $? "Wrong nexthop buckets count"
+
+ nexthop_resource_check 6
+ check_err $? "Wrong nexthop occupancy"
+
+ log_test "Single nexthop replace while in resilient group"
+
+ $IP nexthop flush &> /dev/null
+}
+
+nexthop_single_in_res_group_replace_err_test()
+{
+ RET=0
+
+ $IP nexthop add id 1 via 192.0.2.2 dev dummy1
+ $IP nexthop add id 2 via 192.0.2.3 dev dummy1
+ $IP nexthop add id 10 group 1/2 type resilient buckets 4
+
+ ip netns exec testns1 \
+ echo 1 > $DEBUGFS_NET_DIR/fib/fail_nexthop_bucket_replace
+ $IP nexthop replace id 1 via 192.0.2.4 dev dummy1 &> /dev/null
+ check_fail $? "Nexthop replacement succeeded when should fail"
+
+ nexthop_check "id 1" "id 1 via 192.0.2.2 dev dummy1 scope link trap"
+ check_err $? "Unexpected nexthop entry after failure"
+
+ nexthop_check "id 10" "id 10 group 1/2 type resilient buckets 4 idle_timer 120 unbalanced_timer 0 unbalanced_time 0 trap"
+ check_err $? "Unexpected nexthop group entry after failure"
+
+ nexthop_bucket_nhid_count_check 10 1 2 2 2
+ check_err $? "Wrong nexthop buckets count"
+
+ nexthop_resource_check 6
+ check_err $? "Wrong nexthop occupancy"
+
+ log_test "Single nexthop replace while in resilient group failure"
+
+ $IP nexthop flush &> /dev/null
+ ip netns exec testns1 \
+ echo 0 > $DEBUGFS_NET_DIR/fib/fail_nexthop_bucket_replace
+}
+
nexthop_single_in_group_delete_test()
{
RET=0
@@ -346,6 +915,57 @@ nexthop_single_in_group_delete_err_test()
nexthop_resource_set 9999
}
+nexthop_single_in_res_group_delete_test()
+{
+ RET=0
+
+ $IP nexthop add id 1 via 192.0.2.2 dev dummy1
+ $IP nexthop add id 2 via 192.0.2.3 dev dummy1
+ $IP nexthop add id 10 group 1/2 type resilient buckets 4
+
+ $IP nexthop del id 1
+ nexthop_check "id 10" "id 10 group 2 type resilient buckets 4 idle_timer 120 unbalanced_timer 0 unbalanced_time 0 trap"
+ check_err $? "Unexpected nexthop group entry"
+
+ nexthop_bucket_nhid_count_check 10 2 4
+ check_err $? "Wrong nexthop buckets count"
+
+ nexthop_resource_check 5
+ check_err $? "Wrong nexthop occupancy"
+
+ log_test "Single nexthop delete while in resilient group"
+
+ $IP nexthop flush &> /dev/null
+}
+
+nexthop_single_in_res_group_delete_err_test()
+{
+ RET=0
+
+ $IP nexthop add id 1 via 192.0.2.2 dev dummy1
+ $IP nexthop add id 2 via 192.0.2.3 dev dummy1
+ $IP nexthop add id 3 via 192.0.2.4 dev dummy1
+ $IP nexthop add id 10 group 1/2/3 type resilient buckets 6
+
+ ip netns exec testns1 \
+ echo 1 > $DEBUGFS_NET_DIR/fib/fail_nexthop_bucket_replace
+ $IP nexthop del id 1
+
+ # We failed to replace the two nexthop buckets that were originally
+ # assigned to nhid 1.
+ nexthop_bucket_nhid_count_check 10 2 2 3 2
+ check_err $? "Wrong nexthop buckets count"
+
+ nexthop_resource_check 8
+ check_err $? "Wrong nexthop occupancy"
+
+ log_test "Single nexthop delete while in resilient group failure"
+
+ $IP nexthop flush &> /dev/null
+ ip netns exec testns1 \
+ echo 0 > $DEBUGFS_NET_DIR/fib/fail_nexthop_bucket_replace
+}
+
nexthop_replay_test()
{
RET=0
diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh
index d98fb85e201c..56dd0c6f2e96 100755
--- a/tools/testing/selftests/net/fib_nexthops.sh
+++ b/tools/testing/selftests/net/fib_nexthops.sh
@@ -19,10 +19,39 @@ ret=0
ksft_skip=4
# all tests in this script. Can be overridden with -t option
-IPV4_TESTS="ipv4_fcnal ipv4_grp_fcnal ipv4_withv6_fcnal ipv4_fcnal_runtime ipv4_large_grp ipv4_compat_mode ipv4_fdb_grp_fcnal ipv4_torture"
-IPV6_TESTS="ipv6_fcnal ipv6_grp_fcnal ipv6_fcnal_runtime ipv6_large_grp ipv6_compat_mode ipv6_fdb_grp_fcnal ipv6_torture"
-
-ALL_TESTS="basic ${IPV4_TESTS} ${IPV6_TESTS}"
+IPV4_TESTS="
+ ipv4_fcnal
+ ipv4_grp_fcnal
+ ipv4_res_grp_fcnal
+ ipv4_withv6_fcnal
+ ipv4_fcnal_runtime
+ ipv4_large_grp
+ ipv4_large_res_grp
+ ipv4_compat_mode
+ ipv4_fdb_grp_fcnal
+ ipv4_torture
+ ipv4_res_torture
+"
+
+IPV6_TESTS="
+ ipv6_fcnal
+ ipv6_grp_fcnal
+ ipv6_res_grp_fcnal
+ ipv6_fcnal_runtime
+ ipv6_large_grp
+ ipv6_large_res_grp
+ ipv6_compat_mode
+ ipv6_fdb_grp_fcnal
+ ipv6_torture
+ ipv6_res_torture
+"
+
+ALL_TESTS="
+ basic
+ basic_res
+ ${IPV4_TESTS}
+ ${IPV6_TESTS}
+"
TESTS="${ALL_TESTS}"
VERBOSE=0
PAUSE_ON_FAIL=no
@@ -232,6 +261,19 @@ check_nexthop()
check_output "${out}" "${expected}"
}
+check_nexthop_bucket()
+{
+ local nharg="$1"
+ local expected="$2"
+ local out
+
+ # remove the idle time since we cannot match it
+ out=$($IP nexthop bucket ${nharg} \
+ | sed s/idle_time\ [0-9.]*\ // 2>/dev/null)
+
+ check_output "${out}" "${expected}"
+}
+
check_route()
{
local pfx="$1"
@@ -308,6 +350,25 @@ check_large_grp()
log_test $? 0 "Dump large (x$ecmp) ecmp groups"
}
+check_large_res_grp()
+{
+ local ipv=$1
+ local buckets=$2
+ local ipstr=""
+
+ if [ $ipv -eq 4 ]; then
+ ipstr="172.16.1.2"
+ else
+ ipstr="2001:db8:91::2"
+ fi
+
+ # create a resilient group with $buckets buckets and dump them
+ run_cmd "$IP nexthop add id 100 via $ipstr dev veth1"
+ run_cmd "$IP nexthop add id 1000 group 100 type resilient buckets $buckets"
+ run_cmd "$IP nexthop bucket list"
+ log_test $? 0 "Dump large (x$buckets) nexthop buckets"
+}
+
start_ip_monitor()
{
local mtype=$1
@@ -344,6 +405,15 @@ check_nexthop_fdb_support()
fi
}
+check_nexthop_res_support()
+{
+ $IP nexthop help 2>&1 | grep -q resilient
+ if [ $? -ne 0 ]; then
+ echo "SKIP: iproute2 too old, missing resilient nexthop group support"
+ return $ksft_skip
+ fi
+}
+
ipv6_fdb_grp_fcnal()
{
local rc
@@ -666,6 +736,70 @@ ipv6_grp_fcnal()
log_test $? 2 "Nexthop group can not have a blackhole and another nexthop"
}
+ipv6_res_grp_fcnal()
+{
+ local rc
+
+ echo
+ echo "IPv6 resilient groups functional"
+ echo "--------------------------------"
+
+ check_nexthop_res_support
+ if [ $? -eq $ksft_skip ]; then
+ return $ksft_skip
+ fi
+
+ #
+ # migration of nexthop buckets - equal weights
+ #
+ run_cmd "$IP nexthop add id 62 via 2001:db8:91::2 dev veth1"
+ run_cmd "$IP nexthop add id 63 via 2001:db8:91::3 dev veth1"
+ run_cmd "$IP nexthop add id 102 group 62/63 type resilient buckets 2 idle_timer 0"
+
+ run_cmd "$IP nexthop del id 63"
+ check_nexthop "id 102" \
+ "id 102 group 62 type resilient buckets 2 idle_timer 0 unbalanced_timer 0 unbalanced_time 0"
+ log_test $? 0 "Nexthop group updated when entry is deleted"
+ check_nexthop_bucket "list id 102" \
+ "id 102 index 0 nhid 62 id 102 index 1 nhid 62"
+ log_test $? 0 "Nexthop buckets updated when entry is deleted"
+
+ run_cmd "$IP nexthop add id 63 via 2001:db8:91::3 dev veth1"
+ run_cmd "$IP nexthop replace id 102 group 62/63 type resilient buckets 2 idle_timer 0"
+ check_nexthop "id 102" \
+ "id 102 group 62/63 type resilient buckets 2 idle_timer 0 unbalanced_timer 0 unbalanced_time 0"
+ log_test $? 0 "Nexthop group updated after replace"
+ check_nexthop_bucket "list id 102" \
+ "id 102 index 0 nhid 63 id 102 index 1 nhid 62"
+ log_test $? 0 "Nexthop buckets updated after replace"
+
+ $IP nexthop flush >/dev/null 2>&1
+
+ #
+ # migration of nexthop buckets - unequal weights
+ #
+ run_cmd "$IP nexthop add id 62 via 2001:db8:91::2 dev veth1"
+ run_cmd "$IP nexthop add id 63 via 2001:db8:91::3 dev veth1"
+ run_cmd "$IP nexthop add id 102 group 62,3/63,1 type resilient buckets 4 idle_timer 0"
+
+ run_cmd "$IP nexthop del id 63"
+ check_nexthop "id 102" \
+ "id 102 group 62,3 type resilient buckets 4 idle_timer 0 unbalanced_timer 0 unbalanced_time 0"
+ log_test $? 0 "Nexthop group updated when entry is deleted - nECMP"
+ check_nexthop_bucket "list id 102" \
+ "id 102 index 0 nhid 62 id 102 index 1 nhid 62 id 102 index 2 nhid 62 id 102 index 3 nhid 62"
+ log_test $? 0 "Nexthop buckets updated when entry is deleted - nECMP"
+
+ run_cmd "$IP nexthop add id 63 via 2001:db8:91::3 dev veth1"
+ run_cmd "$IP nexthop replace id 102 group 62,3/63,1 type resilient buckets 4 idle_timer 0"
+ check_nexthop "id 102" \
+ "id 102 group 62,3/63 type resilient buckets 4 idle_timer 0 unbalanced_timer 0 unbalanced_time 0"
+ log_test $? 0 "Nexthop group updated after replace - nECMP"
+ check_nexthop_bucket "list id 102" \
+ "id 102 index 0 nhid 63 id 102 index 1 nhid 62 id 102 index 2 nhid 62 id 102 index 3 nhid 62"
+ log_test $? 0 "Nexthop buckets updated after replace - nECMP"
+}
+
ipv6_fcnal_runtime()
{
local rc
@@ -824,6 +958,22 @@ ipv6_large_grp()
$IP nexthop flush >/dev/null 2>&1
}
+ipv6_large_res_grp()
+{
+ echo
+ echo "IPv6 large resilient group (128k buckets)"
+ echo "-----------------------------------------"
+
+ check_nexthop_res_support
+ if [ $? -eq $ksft_skip ]; then
+ return $ksft_skip
+ fi
+
+ check_large_res_grp 6 $((128 * 1024))
+
+ $IP nexthop flush >/dev/null 2>&1
+}
+
ipv6_del_add_loop1()
{
while :; do
@@ -874,11 +1024,67 @@ ipv6_torture()
sleep 300
kill -9 $pid1 $pid2 $pid3 $pid4 $pid5
+ wait $pid1 $pid2 $pid3 $pid4 $pid5 2>/dev/null
# if we did not crash, success
log_test 0 0 "IPv6 torture test"
}
+ipv6_res_grp_replace_loop()
+{
+ while :; do
+ $IP nexthop replace id 102 group 100/101 type resilient
+ done >/dev/null 2>&1
+}
+
+ipv6_res_torture()
+{
+ local pid1
+ local pid2
+ local pid3
+ local pid4
+ local pid5
+
+ echo
+ echo "IPv6 runtime resilient nexthop group torture"
+ echo "--------------------------------------------"
+
+ check_nexthop_res_support
+ if [ $? -eq $ksft_skip ]; then
+ return $ksft_skip
+ fi
+
+ if [ ! -x "$(command -v mausezahn)" ]; then
+ echo "SKIP: Could not run test; need mausezahn tool"
+ return
+ fi
+
+ run_cmd "$IP nexthop add id 100 via 2001:db8:91::2 dev veth1"
+ run_cmd "$IP nexthop add id 101 via 2001:db8:92::2 dev veth3"
+ run_cmd "$IP nexthop add id 102 group 100/101 type resilient buckets 512 idle_timer 0"
+ run_cmd "$IP route add 2001:db8:101::1 nhid 102"
+ run_cmd "$IP route add 2001:db8:101::2 nhid 102"
+
+ ipv6_del_add_loop1 &
+ pid1=$!
+ ipv6_res_grp_replace_loop &
+ pid2=$!
+ ip netns exec me ping -f 2001:db8:101::1 >/dev/null 2>&1 &
+ pid3=$!
+ ip netns exec me ping -f 2001:db8:101::2 >/dev/null 2>&1 &
+ pid4=$!
+ ip netns exec me mausezahn -6 veth1 \
+ -B 2001:db8:101::2 -A 2001:db8:91::1 -c 0 \
+ -t tcp "dp=1-1023, flags=syn" >/dev/null 2>&1 &
+ pid5=$!
+
+ sleep 300
+ kill -9 $pid1 $pid2 $pid3 $pid4 $pid5
+ wait $pid1 $pid2 $pid3 $pid4 $pid5 2>/dev/null
+
+ # if we did not crash, success
+ log_test 0 0 "IPv6 resilient nexthop group torture test"
+}
ipv4_fcnal()
{
@@ -1038,6 +1244,70 @@ ipv4_grp_fcnal()
log_test $? 2 "Nexthop group can not have a blackhole and another nexthop"
}
+ipv4_res_grp_fcnal()
+{
+ local rc
+
+ echo
+ echo "IPv4 resilient groups functional"
+ echo "--------------------------------"
+
+ check_nexthop_res_support
+ if [ $? -eq $ksft_skip ]; then
+ return $ksft_skip
+ fi
+
+ #
+ # migration of nexthop buckets - equal weights
+ #
+ run_cmd "$IP nexthop add id 12 via 172.16.1.2 dev veth1"
+ run_cmd "$IP nexthop add id 13 via 172.16.1.3 dev veth1"
+ run_cmd "$IP nexthop add id 102 group 12/13 type resilient buckets 2 idle_timer 0"
+
+ run_cmd "$IP nexthop del id 13"
+ check_nexthop "id 102" \
+ "id 102 group 12 type resilient buckets 2 idle_timer 0 unbalanced_timer 0 unbalanced_time 0"
+ log_test $? 0 "Nexthop group updated when entry is deleted"
+ check_nexthop_bucket "list id 102" \
+ "id 102 index 0 nhid 12 id 102 index 1 nhid 12"
+ log_test $? 0 "Nexthop buckets updated when entry is deleted"
+
+ run_cmd "$IP nexthop add id 13 via 172.16.1.3 dev veth1"
+ run_cmd "$IP nexthop replace id 102 group 12/13 type resilient buckets 2 idle_timer 0"
+ check_nexthop "id 102" \
+ "id 102 group 12/13 type resilient buckets 2 idle_timer 0 unbalanced_timer 0 unbalanced_time 0"
+ log_test $? 0 "Nexthop group updated after replace"
+ check_nexthop_bucket "list id 102" \
+ "id 102 index 0 nhid 13 id 102 index 1 nhid 12"
+ log_test $? 0 "Nexthop buckets updated after replace"
+
+ $IP nexthop flush >/dev/null 2>&1
+
+ #
+ # migration of nexthop buckets - unequal weights
+ #
+ run_cmd "$IP nexthop add id 12 via 172.16.1.2 dev veth1"
+ run_cmd "$IP nexthop add id 13 via 172.16.1.3 dev veth1"
+ run_cmd "$IP nexthop add id 102 group 12,3/13,1 type resilient buckets 4 idle_timer 0"
+
+ run_cmd "$IP nexthop del id 13"
+ check_nexthop "id 102" \
+ "id 102 group 12,3 type resilient buckets 4 idle_timer 0 unbalanced_timer 0 unbalanced_time 0"
+ log_test $? 0 "Nexthop group updated when entry is deleted - nECMP"
+ check_nexthop_bucket "list id 102" \
+ "id 102 index 0 nhid 12 id 102 index 1 nhid 12 id 102 index 2 nhid 12 id 102 index 3 nhid 12"
+ log_test $? 0 "Nexthop buckets updated when entry is deleted - nECMP"
+
+ run_cmd "$IP nexthop add id 13 via 172.16.1.3 dev veth1"
+ run_cmd "$IP nexthop replace id 102 group 12,3/13,1 type resilient buckets 4 idle_timer 0"
+ check_nexthop "id 102" \
+ "id 102 group 12,3/13 type resilient buckets 4 idle_timer 0 unbalanced_timer 0 unbalanced_time 0"
+ log_test $? 0 "Nexthop group updated after replace - nECMP"
+ check_nexthop_bucket "list id 102" \
+ "id 102 index 0 nhid 13 id 102 index 1 nhid 12 id 102 index 2 nhid 12 id 102 index 3 nhid 12"
+ log_test $? 0 "Nexthop buckets updated after replace - nECMP"
+}
+
ipv4_withv6_fcnal()
{
local lladdr
@@ -1259,6 +1529,22 @@ ipv4_large_grp()
$IP nexthop flush >/dev/null 2>&1
}
+ipv4_large_res_grp()
+{
+ echo
+ echo "IPv4 large resilient group (128k buckets)"
+ echo "-----------------------------------------"
+
+ check_nexthop_res_support
+ if [ $? -eq $ksft_skip ]; then
+ return $ksft_skip
+ fi
+
+ check_large_res_grp 4 $((128 * 1024))
+
+ $IP nexthop flush >/dev/null 2>&1
+}
+
sysctl_nexthop_compat_mode_check()
{
local sysctlname="net.ipv4.nexthop_compat_mode"
@@ -1476,11 +1762,68 @@ ipv4_torture()
sleep 300
kill -9 $pid1 $pid2 $pid3 $pid4 $pid5
+ wait $pid1 $pid2 $pid3 $pid4 $pid5 2>/dev/null
# if we did not crash, success
log_test 0 0 "IPv4 torture test"
}
+ipv4_res_grp_replace_loop()
+{
+ while :; do
+ $IP nexthop replace id 102 group 100/101 type resilient
+ done >/dev/null 2>&1
+}
+
+ipv4_res_torture()
+{
+ local pid1
+ local pid2
+ local pid3
+ local pid4
+ local pid5
+
+ echo
+ echo "IPv4 runtime resilient nexthop group torture"
+ echo "--------------------------------------------"
+
+ check_nexthop_res_support
+ if [ $? -eq $ksft_skip ]; then
+ return $ksft_skip
+ fi
+
+ if [ ! -x "$(command -v mausezahn)" ]; then
+ echo "SKIP: Could not run test; need mausezahn tool"
+ return
+ fi
+
+ run_cmd "$IP nexthop add id 100 via 172.16.1.2 dev veth1"
+ run_cmd "$IP nexthop add id 101 via 172.16.2.2 dev veth3"
+ run_cmd "$IP nexthop add id 102 group 100/101 type resilient buckets 512 idle_timer 0"
+ run_cmd "$IP route add 172.16.101.1 nhid 102"
+ run_cmd "$IP route add 172.16.101.2 nhid 102"
+
+ ipv4_del_add_loop1 &
+ pid1=$!
+ ipv4_res_grp_replace_loop &
+ pid2=$!
+ ip netns exec me ping -f 172.16.101.1 >/dev/null 2>&1 &
+ pid3=$!
+ ip netns exec me ping -f 172.16.101.2 >/dev/null 2>&1 &
+ pid4=$!
+ ip netns exec me mausezahn veth1 \
+ -B 172.16.101.2 -A 172.16.1.1 -c 0 \
+ -t tcp "dp=1-1023, flags=syn" >/dev/null 2>&1 &
+ pid5=$!
+
+ sleep 300
+ kill -9 $pid1 $pid2 $pid3 $pid4 $pid5
+ wait $pid1 $pid2 $pid3 $pid4 $pid5 2>/dev/null
+
+ # if we did not crash, success
+ log_test 0 0 "IPv4 resilient nexthop group torture test"
+}
+
basic()
{
echo
@@ -1592,6 +1935,204 @@ basic()
$IP nexthop flush >/dev/null 2>&1
}
+check_nexthop_buckets_balance()
+{
+ local nharg=$1; shift
+ local ret
+
+ while (($# > 0)); do
+ local selector=$1; shift
+ local condition=$1; shift
+ local count
+
+ count=$($IP -j nexthop bucket ${nharg} ${selector} | jq length)
+ (( $count $condition ))
+ ret=$?
+ if ((ret != 0)); then
+ return $ret
+ fi
+ done
+
+ return 0
+}
+
+basic_res()
+{
+ echo
+ echo "Basic resilient nexthop group functional tests"
+ echo "----------------------------------------------"
+
+ check_nexthop_res_support
+ if [ $? -eq $ksft_skip ]; then
+ return $ksft_skip
+ fi
+
+ run_cmd "$IP nexthop add id 1 dev veth1"
+
+ #
+ # resilient nexthop group addition
+ #
+
+ run_cmd "$IP nexthop add id 101 group 1 type resilient buckets 8"
+ log_test $? 0 "Add a nexthop group with default parameters"
+
+ run_cmd "$IP nexthop get id 101"
+ check_nexthop "id 101" \
+ "id 101 group 1 type resilient buckets 8 idle_timer 120 unbalanced_timer 0 unbalanced_time 0"
+ log_test $? 0 "Get a nexthop group with default parameters"
+
+ run_cmd "$IP nexthop add id 102 group 1 type resilient
+ buckets 4 idle_timer 100 unbalanced_timer 5"
+ run_cmd "$IP nexthop get id 102"
+ check_nexthop "id 102" \
+ "id 102 group 1 type resilient buckets 4 idle_timer 100 unbalanced_timer 5 unbalanced_time 0"
+ log_test $? 0 "Get a nexthop group with non-default parameters"
+
+ run_cmd "$IP nexthop add id 103 group 1 type resilient buckets 0"
+ log_test $? 2 "Add a nexthop group with 0 buckets"
+
+ #
+ # resilient nexthop group replacement
+ #
+
+ run_cmd "$IP nexthop replace id 101 group 1 type resilient
+ buckets 8 idle_timer 240 unbalanced_timer 80"
+ log_test $? 0 "Replace nexthop group parameters"
+ check_nexthop "id 101" \
+ "id 101 group 1 type resilient buckets 8 idle_timer 240 unbalanced_timer 80 unbalanced_time 0"
+ log_test $? 0 "Get a nexthop group after replacing parameters"
+
+ run_cmd "$IP nexthop replace id 101 group 1 type resilient idle_timer 512"
+ log_test $? 0 "Replace idle timer"
+ check_nexthop "id 101" \
+ "id 101 group 1 type resilient buckets 8 idle_timer 512 unbalanced_timer 80 unbalanced_time 0"
+ log_test $? 0 "Get a nexthop group after replacing idle timer"
+
+ run_cmd "$IP nexthop replace id 101 group 1 type resilient unbalanced_timer 256"
+ log_test $? 0 "Replace unbalanced timer"
+ check_nexthop "id 101" \
+ "id 101 group 1 type resilient buckets 8 idle_timer 512 unbalanced_timer 256 unbalanced_time 0"
+ log_test $? 0 "Get a nexthop group after replacing unbalanced timer"
+
+ run_cmd "$IP nexthop replace id 101 group 1 type resilient"
+ log_test $? 0 "Replace with no parameters"
+ check_nexthop "id 101" \
+ "id 101 group 1 type resilient buckets 8 idle_timer 512 unbalanced_timer 256 unbalanced_time 0"
+ log_test $? 0 "Get a nexthop group after replacing no parameters"
+
+ run_cmd "$IP nexthop replace id 101 group 1"
+ log_test $? 2 "Replace nexthop group type - implicit"
+
+ run_cmd "$IP nexthop replace id 101 group 1 type mpath"
+ log_test $? 2 "Replace nexthop group type - explicit"
+
+ run_cmd "$IP nexthop replace id 101 group 1 type resilient buckets 1024"
+ log_test $? 2 "Replace number of nexthop buckets"
+
+ check_nexthop "id 101" \
+ "id 101 group 1 type resilient buckets 8 idle_timer 512 unbalanced_timer 256 unbalanced_time 0"
+ log_test $? 0 "Get a nexthop group after replacing with invalid parameters"
+
+ #
+ # resilient nexthop buckets dump
+ #
+
+ $IP nexthop flush >/dev/null 2>&1
+ run_cmd "$IP nexthop add id 1 dev veth1"
+ run_cmd "$IP nexthop add id 2 dev veth3"
+ run_cmd "$IP nexthop add id 101 group 1/2 type resilient buckets 4"
+ run_cmd "$IP nexthop add id 201 group 1/2"
+
+ check_nexthop_bucket "" \
+ "id 101 index 0 nhid 2 id 101 index 1 nhid 2 id 101 index 2 nhid 1 id 101 index 3 nhid 1"
+ log_test $? 0 "Dump all nexthop buckets"
+
+ check_nexthop_bucket "list id 101" \
+ "id 101 index 0 nhid 2 id 101 index 1 nhid 2 id 101 index 2 nhid 1 id 101 index 3 nhid 1"
+ log_test $? 0 "Dump all nexthop buckets in a group"
+
+ (( $($IP -j nexthop bucket list id 101 |
+ jq '[.[] | select(.bucket.idle_time > 0 and
+ .bucket.idle_time < 2)] | length') == 4 ))
+ log_test $? 0 "All nexthop buckets report a positive near-zero idle time"
+
+ check_nexthop_bucket "list dev veth1" \
+ "id 101 index 2 nhid 1 id 101 index 3 nhid 1"
+ log_test $? 0 "Dump all nexthop buckets with a specific nexthop device"
+
+ check_nexthop_bucket "list nhid 2" \
+ "id 101 index 0 nhid 2 id 101 index 1 nhid 2"
+ log_test $? 0 "Dump all nexthop buckets with a specific nexthop identifier"
+
+ run_cmd "$IP nexthop bucket list id 111"
+ log_test $? 2 "Dump all nexthop buckets in a non-existent group"
+
+ run_cmd "$IP nexthop bucket list id 201"
+ log_test $? 2 "Dump all nexthop buckets in a non-resilient group"
+
+ run_cmd "$IP nexthop bucket list dev bla"
+ log_test $? 255 "Dump all nexthop buckets using a non-existent device"
+
+ run_cmd "$IP nexthop bucket list groups"
+ log_test $? 255 "Dump all nexthop buckets with invalid 'groups' keyword"
+
+ run_cmd "$IP nexthop bucket list fdb"
+ log_test $? 255 "Dump all nexthop buckets with invalid 'fdb' keyword"
+
+ #
+ # resilient nexthop buckets get requests
+ #
+
+ check_nexthop_bucket "get id 101 index 0" "id 101 index 0 nhid 2"
+ log_test $? 0 "Get a valid nexthop bucket"
+
+ run_cmd "$IP nexthop bucket get id 101 index 999"
+ log_test $? 2 "Get a nexthop bucket with valid group, but invalid index"
+
+ run_cmd "$IP nexthop bucket get id 201 index 0"
+ log_test $? 2 "Get a nexthop bucket from a non-resilient group"
+
+ run_cmd "$IP nexthop bucket get id 999 index 0"
+ log_test $? 2 "Get a nexthop bucket from a non-existent group"
+
+ #
+ # tests for bucket migration
+ #
+
+ $IP nexthop flush >/dev/null 2>&1
+
+ run_cmd "$IP nexthop add id 1 dev veth1"
+ run_cmd "$IP nexthop add id 2 dev veth3"
+ run_cmd "$IP nexthop add id 101
+ group 1/2 type resilient buckets 10
+ idle_timer 1 unbalanced_timer 20"
+
+ check_nexthop_buckets_balance "list id 101" \
+ "nhid 1" "== 5" \
+ "nhid 2" "== 5"
+ log_test $? 0 "Initial bucket allocation"
+
+ run_cmd "$IP nexthop replace id 101
+ group 1,2/2,3 type resilient"
+ check_nexthop_buckets_balance "list id 101" \
+ "nhid 1" "== 4" \
+ "nhid 2" "== 6"
+ log_test $? 0 "Bucket allocation after replace"
+
+ # Check that increase in idle timer does not make buckets appear busy.
+ run_cmd "$IP nexthop replace id 101
+ group 1,2/2,3 type resilient
+ idle_timer 10"
+ run_cmd "$IP nexthop replace id 101
+ group 1/2 type resilient"
+ check_nexthop_buckets_balance "list id 101" \
+ "nhid 1" "== 5" \
+ "nhid 2" "== 5"
+ log_test $? 0 "Buckets migrated after idle timer change"
+
+ $IP nexthop flush >/dev/null 2>&1
+}
+
################################################################################
# usage
diff --git a/tools/testing/selftests/net/forwarding/gre_multipath_nh_res.sh b/tools/testing/selftests/net/forwarding/gre_multipath_nh_res.sh
new file mode 100755
index 000000000000..088b65e64d66
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/gre_multipath_nh_res.sh
@@ -0,0 +1,361 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test traffic distribution when a wECMP route forwards traffic to two GRE
+# tunnels.
+#
+# +-------------------------+
+# | H1 |
+# | $h1 + |
+# | 192.0.2.1/28 | |
+# | 2001:db8:1::1/64 | |
+# +-------------------|-----+
+# |
+# +-------------------|------------------------+
+# | SW1 | |
+# | $ol1 + |
+# | 192.0.2.2/28 |
+# | 2001:db8:1::2/64 |
+# | |
+# | + g1a (gre) + g1b (gre) |
+# | loc=192.0.2.65 loc=192.0.2.81 |
+# | rem=192.0.2.66 --. rem=192.0.2.82 --. |
+# | tos=inherit | tos=inherit | |
+# | .------------------' | |
+# | | .------------------' |
+# | v v |
+# | + $ul1.111 (vlan) + $ul1.222 (vlan) |
+# | | 192.0.2.129/28 | 192.0.2.145/28 |
+# | \ / |
+# | \________________/ |
+# | | |
+# | + $ul1 |
+# +------------|-------------------------------+
+# |
+# +------------|-------------------------------+
+# | SW2 + $ul2 |
+# | _______|________ |
+# | / \ |
+# | / \ |
+# | + $ul2.111 (vlan) + $ul2.222 (vlan) |
+# | ^ 192.0.2.130/28 ^ 192.0.2.146/28 |
+# | | | |
+# | | '------------------. |
+# | '------------------. | |
+# | + g2a (gre) | + g2b (gre) | |
+# | loc=192.0.2.66 | loc=192.0.2.82 | |
+# | rem=192.0.2.65 --' rem=192.0.2.81 --' |
+# | tos=inherit tos=inherit |
+# | |
+# | $ol2 + |
+# | 192.0.2.17/28 | |
+# | 2001:db8:2::1/64 | |
+# +-------------------|------------------------+
+# |
+# +-------------------|-----+
+# | H2 | |
+# | $h2 + |
+# | 192.0.2.18/28 |
+# | 2001:db8:2::2/64 |
+# +-------------------------+
+
+ALL_TESTS="
+ ping_ipv4
+ ping_ipv6
+ multipath_ipv4
+ multipath_ipv6
+ multipath_ipv6_l4
+"
+
+NUM_NETIFS=6
+source lib.sh
+
+h1_create()
+{
+ simple_if_init $h1 192.0.2.1/28 2001:db8:1::1/64
+ ip route add vrf v$h1 192.0.2.16/28 via 192.0.2.2
+ ip route add vrf v$h1 2001:db8:2::/64 via 2001:db8:1::2
+}
+
+h1_destroy()
+{
+ ip route del vrf v$h1 2001:db8:2::/64 via 2001:db8:1::2
+ ip route del vrf v$h1 192.0.2.16/28 via 192.0.2.2
+ simple_if_fini $h1 192.0.2.1/28
+}
+
+sw1_create()
+{
+ simple_if_init $ol1 192.0.2.2/28 2001:db8:1::2/64
+ __simple_if_init $ul1 v$ol1
+ vlan_create $ul1 111 v$ol1 192.0.2.129/28
+ vlan_create $ul1 222 v$ol1 192.0.2.145/28
+
+ tunnel_create g1a gre 192.0.2.65 192.0.2.66 tos inherit dev v$ol1
+ __simple_if_init g1a v$ol1 192.0.2.65/32
+ ip route add vrf v$ol1 192.0.2.66/32 via 192.0.2.130
+
+ tunnel_create g1b gre 192.0.2.81 192.0.2.82 tos inherit dev v$ol1
+ __simple_if_init g1b v$ol1 192.0.2.81/32
+ ip route add vrf v$ol1 192.0.2.82/32 via 192.0.2.146
+
+ ip -6 nexthop add id 101 dev g1a
+ ip -6 nexthop add id 102 dev g1b
+ ip nexthop add id 103 group 101/102 type resilient buckets 512 \
+ idle_timer 0
+
+ ip route add vrf v$ol1 192.0.2.16/28 nhid 103
+ ip route add vrf v$ol1 2001:db8:2::/64 nhid 103
+}
+
+sw1_destroy()
+{
+ ip route del vrf v$ol1 2001:db8:2::/64
+ ip route del vrf v$ol1 192.0.2.16/28
+
+ ip nexthop del id 103
+ ip -6 nexthop del id 102
+ ip -6 nexthop del id 101
+
+ ip route del vrf v$ol1 192.0.2.82/32 via 192.0.2.146
+ __simple_if_fini g1b 192.0.2.81/32
+ tunnel_destroy g1b
+
+ ip route del vrf v$ol1 192.0.2.66/32 via 192.0.2.130
+ __simple_if_fini g1a 192.0.2.65/32
+ tunnel_destroy g1a
+
+ vlan_destroy $ul1 222
+ vlan_destroy $ul1 111
+ __simple_if_fini $ul1
+ simple_if_fini $ol1 192.0.2.2/28 2001:db8:1::2/64
+}
+
+sw2_create()
+{
+ simple_if_init $ol2 192.0.2.17/28 2001:db8:2::1/64
+ __simple_if_init $ul2 v$ol2
+ vlan_create $ul2 111 v$ol2 192.0.2.130/28
+ vlan_create $ul2 222 v$ol2 192.0.2.146/28
+
+ tunnel_create g2a gre 192.0.2.66 192.0.2.65 tos inherit dev v$ol2
+ __simple_if_init g2a v$ol2 192.0.2.66/32
+ ip route add vrf v$ol2 192.0.2.65/32 via 192.0.2.129
+
+ tunnel_create g2b gre 192.0.2.82 192.0.2.81 tos inherit dev v$ol2
+ __simple_if_init g2b v$ol2 192.0.2.82/32
+ ip route add vrf v$ol2 192.0.2.81/32 via 192.0.2.145
+
+ ip -6 nexthop add id 201 dev g2a
+ ip -6 nexthop add id 202 dev g2b
+ ip nexthop add id 203 group 201/202 type resilient buckets 512 \
+ idle_timer 0
+
+ ip route add vrf v$ol2 192.0.2.0/28 nhid 203
+ ip route add vrf v$ol2 2001:db8:1::/64 nhid 203
+
+ tc qdisc add dev $ul2 clsact
+ tc filter add dev $ul2 ingress pref 111 prot 802.1Q \
+ flower vlan_id 111 action pass
+ tc filter add dev $ul2 ingress pref 222 prot 802.1Q \
+ flower vlan_id 222 action pass
+}
+
+sw2_destroy()
+{
+ tc qdisc del dev $ul2 clsact
+
+ ip route del vrf v$ol2 2001:db8:1::/64
+ ip route del vrf v$ol2 192.0.2.0/28
+
+ ip nexthop del id 203
+ ip -6 nexthop del id 202
+ ip -6 nexthop del id 201
+
+ ip route del vrf v$ol2 192.0.2.81/32 via 192.0.2.145
+ __simple_if_fini g2b 192.0.2.82/32
+ tunnel_destroy g2b
+
+ ip route del vrf v$ol2 192.0.2.65/32 via 192.0.2.129
+ __simple_if_fini g2a 192.0.2.66/32
+ tunnel_destroy g2a
+
+ vlan_destroy $ul2 222
+ vlan_destroy $ul2 111
+ __simple_if_fini $ul2
+ simple_if_fini $ol2 192.0.2.17/28 2001:db8:2::1/64
+}
+
+h2_create()
+{
+ simple_if_init $h2 192.0.2.18/28 2001:db8:2::2/64
+ ip route add vrf v$h2 192.0.2.0/28 via 192.0.2.17
+ ip route add vrf v$h2 2001:db8:1::/64 via 2001:db8:2::1
+}
+
+h2_destroy()
+{
+ ip route del vrf v$h2 2001:db8:1::/64 via 2001:db8:2::1
+ ip route del vrf v$h2 192.0.2.0/28 via 192.0.2.17
+ simple_if_fini $h2 192.0.2.18/28 2001:db8:2::2/64
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ ol1=${NETIFS[p2]}
+
+ ul1=${NETIFS[p3]}
+ ul2=${NETIFS[p4]}
+
+ ol2=${NETIFS[p5]}
+ h2=${NETIFS[p6]}
+
+ vrf_prepare
+ h1_create
+ sw1_create
+ sw2_create
+ h2_create
+
+ forwarding_enable
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ forwarding_restore
+
+ h2_destroy
+ sw2_destroy
+ sw1_destroy
+ h1_destroy
+ vrf_cleanup
+}
+
+multipath4_test()
+{
+ local what=$1; shift
+ local weight1=$1; shift
+ local weight2=$1; shift
+
+ sysctl_set net.ipv4.fib_multipath_hash_policy 1
+ ip nexthop replace id 103 group 101,$weight1/102,$weight2 \
+ type resilient
+
+ local t0_111=$(tc_rule_stats_get $ul2 111 ingress)
+ local t0_222=$(tc_rule_stats_get $ul2 222 ingress)
+
+ ip vrf exec v$h1 \
+ $MZ $h1 -q -p 64 -A 192.0.2.1 -B 192.0.2.18 \
+ -d 1msec -t udp "sp=1024,dp=0-32768"
+
+ local t1_111=$(tc_rule_stats_get $ul2 111 ingress)
+ local t1_222=$(tc_rule_stats_get $ul2 222 ingress)
+
+ local d111=$((t1_111 - t0_111))
+ local d222=$((t1_222 - t0_222))
+ multipath_eval "$what" $weight1 $weight2 $d111 $d222
+
+ ip nexthop replace id 103 group 101/102 type resilient
+ sysctl_restore net.ipv4.fib_multipath_hash_policy
+}
+
+multipath6_test()
+{
+ local what=$1; shift
+ local weight1=$1; shift
+ local weight2=$1; shift
+
+ sysctl_set net.ipv6.fib_multipath_hash_policy 0
+ ip nexthop replace id 103 group 101,$weight1/102,$weight2 \
+ type resilient
+
+ local t0_111=$(tc_rule_stats_get $ul2 111 ingress)
+ local t0_222=$(tc_rule_stats_get $ul2 222 ingress)
+
+ # Generate 16384 echo requests, each with a random flow label.
+ for ((i=0; i < 16384; ++i)); do
+ ip vrf exec v$h1 $PING6 2001:db8:2::2 -F 0 -c 1 -q &> /dev/null
+ done
+
+ local t1_111=$(tc_rule_stats_get $ul2 111 ingress)
+ local t1_222=$(tc_rule_stats_get $ul2 222 ingress)
+
+ local d111=$((t1_111 - t0_111))
+ local d222=$((t1_222 - t0_222))
+ multipath_eval "$what" $weight1 $weight2 $d111 $d222
+
+ ip nexthop replace id 103 group 101/102 type resilient
+ sysctl_restore net.ipv6.fib_multipath_hash_policy
+}
+
+multipath6_l4_test()
+{
+ local what=$1; shift
+ local weight1=$1; shift
+ local weight2=$1; shift
+
+ sysctl_set net.ipv6.fib_multipath_hash_policy 1
+ ip nexthop replace id 103 group 101,$weight1/102,$weight2 \
+ type resilient
+
+ local t0_111=$(tc_rule_stats_get $ul2 111 ingress)
+ local t0_222=$(tc_rule_stats_get $ul2 222 ingress)
+
+ ip vrf exec v$h1 \
+ $MZ $h1 -6 -q -p 64 -A 2001:db8:1::1 -B 2001:db8:2::2 \
+ -d 1msec -t udp "sp=1024,dp=0-32768"
+
+ local t1_111=$(tc_rule_stats_get $ul2 111 ingress)
+ local t1_222=$(tc_rule_stats_get $ul2 222 ingress)
+
+ local d111=$((t1_111 - t0_111))
+ local d222=$((t1_222 - t0_222))
+ multipath_eval "$what" $weight1 $weight2 $d111 $d222
+
+ ip nexthop replace id 103 group 101/102 type resilient
+ sysctl_restore net.ipv6.fib_multipath_hash_policy
+}
+
+ping_ipv4()
+{
+ ping_test $h1 192.0.2.18
+}
+
+ping_ipv6()
+{
+ ping6_test $h1 2001:db8:2::2
+}
+
+multipath_ipv4()
+{
+ log_info "Running IPv4 multipath tests"
+ multipath4_test "ECMP" 1 1
+ multipath4_test "Weighted MP 2:1" 2 1
+ multipath4_test "Weighted MP 11:45" 11 45
+}
+
+multipath_ipv6()
+{
+ log_info "Running IPv6 multipath tests"
+ multipath6_test "ECMP" 1 1
+ multipath6_test "Weighted MP 2:1" 2 1
+ multipath6_test "Weighted MP 11:45" 11 45
+}
+
+multipath_ipv6_l4()
+{
+ log_info "Running IPv6 L4 hash multipath tests"
+ multipath6_l4_test "ECMP" 1 1
+ multipath6_l4_test "Weighted MP 2:1" 2 1
+ multipath6_l4_test "Weighted MP 11:45" 11 45
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh b/tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh
new file mode 100755
index 000000000000..4898dd4118f1
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh
@@ -0,0 +1,400 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="
+ ping_ipv4
+ ping_ipv6
+ multipath_test
+"
+NUM_NETIFS=8
+source lib.sh
+
+h1_create()
+{
+ vrf_create "vrf-h1"
+ ip link set dev $h1 master vrf-h1
+
+ ip link set dev vrf-h1 up
+ ip link set dev $h1 up
+
+ ip address add 192.0.2.2/24 dev $h1
+ ip address add 2001:db8:1::2/64 dev $h1
+
+ ip route add 198.51.100.0/24 vrf vrf-h1 nexthop via 192.0.2.1
+ ip route add 2001:db8:2::/64 vrf vrf-h1 nexthop via 2001:db8:1::1
+}
+
+h1_destroy()
+{
+ ip route del 2001:db8:2::/64 vrf vrf-h1
+ ip route del 198.51.100.0/24 vrf vrf-h1
+
+ ip address del 2001:db8:1::2/64 dev $h1
+ ip address del 192.0.2.2/24 dev $h1
+
+ ip link set dev $h1 down
+ vrf_destroy "vrf-h1"
+}
+
+h2_create()
+{
+ vrf_create "vrf-h2"
+ ip link set dev $h2 master vrf-h2
+
+ ip link set dev vrf-h2 up
+ ip link set dev $h2 up
+
+ ip address add 198.51.100.2/24 dev $h2
+ ip address add 2001:db8:2::2/64 dev $h2
+
+ ip route add 192.0.2.0/24 vrf vrf-h2 nexthop via 198.51.100.1
+ ip route add 2001:db8:1::/64 vrf vrf-h2 nexthop via 2001:db8:2::1
+}
+
+h2_destroy()
+{
+ ip route del 2001:db8:1::/64 vrf vrf-h2
+ ip route del 192.0.2.0/24 vrf vrf-h2
+
+ ip address del 2001:db8:2::2/64 dev $h2
+ ip address del 198.51.100.2/24 dev $h2
+
+ ip link set dev $h2 down
+ vrf_destroy "vrf-h2"
+}
+
+router1_create()
+{
+ vrf_create "vrf-r1"
+ ip link set dev $rp11 master vrf-r1
+ ip link set dev $rp12 master vrf-r1
+ ip link set dev $rp13 master vrf-r1
+
+ ip link set dev vrf-r1 up
+ ip link set dev $rp11 up
+ ip link set dev $rp12 up
+ ip link set dev $rp13 up
+
+ ip address add 192.0.2.1/24 dev $rp11
+ ip address add 2001:db8:1::1/64 dev $rp11
+
+ ip address add 169.254.2.12/24 dev $rp12
+ ip address add fe80:2::12/64 dev $rp12
+
+ ip address add 169.254.3.13/24 dev $rp13
+ ip address add fe80:3::13/64 dev $rp13
+}
+
+router1_destroy()
+{
+ ip route del 2001:db8:2::/64 vrf vrf-r1
+ ip route del 198.51.100.0/24 vrf vrf-r1
+
+ ip address del fe80:3::13/64 dev $rp13
+ ip address del 169.254.3.13/24 dev $rp13
+
+ ip address del fe80:2::12/64 dev $rp12
+ ip address del 169.254.2.12/24 dev $rp12
+
+ ip address del 2001:db8:1::1/64 dev $rp11
+ ip address del 192.0.2.1/24 dev $rp11
+
+ ip nexthop del id 103
+ ip nexthop del id 101
+ ip nexthop del id 102
+ ip nexthop del id 106
+ ip nexthop del id 104
+ ip nexthop del id 105
+
+ ip link set dev $rp13 down
+ ip link set dev $rp12 down
+ ip link set dev $rp11 down
+
+ vrf_destroy "vrf-r1"
+}
+
+router2_create()
+{
+ vrf_create "vrf-r2"
+ ip link set dev $rp21 master vrf-r2
+ ip link set dev $rp22 master vrf-r2
+ ip link set dev $rp23 master vrf-r2
+
+ ip link set dev vrf-r2 up
+ ip link set dev $rp21 up
+ ip link set dev $rp22 up
+ ip link set dev $rp23 up
+
+ ip address add 198.51.100.1/24 dev $rp21
+ ip address add 2001:db8:2::1/64 dev $rp21
+
+ ip address add 169.254.2.22/24 dev $rp22
+ ip address add fe80:2::22/64 dev $rp22
+
+ ip address add 169.254.3.23/24 dev $rp23
+ ip address add fe80:3::23/64 dev $rp23
+}
+
+router2_destroy()
+{
+ ip route del 2001:db8:1::/64 vrf vrf-r2
+ ip route del 192.0.2.0/24 vrf vrf-r2
+
+ ip address del fe80:3::23/64 dev $rp23
+ ip address del 169.254.3.23/24 dev $rp23
+
+ ip address del fe80:2::22/64 dev $rp22
+ ip address del 169.254.2.22/24 dev $rp22
+
+ ip address del 2001:db8:2::1/64 dev $rp21
+ ip address del 198.51.100.1/24 dev $rp21
+
+ ip nexthop del id 201
+ ip nexthop del id 202
+ ip nexthop del id 204
+ ip nexthop del id 205
+
+ ip link set dev $rp23 down
+ ip link set dev $rp22 down
+ ip link set dev $rp21 down
+
+ vrf_destroy "vrf-r2"
+}
+
+routing_nh_obj()
+{
+ ip nexthop add id 101 via 169.254.2.22 dev $rp12
+ ip nexthop add id 102 via 169.254.3.23 dev $rp13
+ ip nexthop add id 103 group 101/102 type resilient buckets 512 \
+ idle_timer 0
+ ip route add 198.51.100.0/24 vrf vrf-r1 nhid 103
+
+ ip nexthop add id 104 via fe80:2::22 dev $rp12
+ ip nexthop add id 105 via fe80:3::23 dev $rp13
+ ip nexthop add id 106 group 104/105 type resilient buckets 512 \
+ idle_timer 0
+ ip route add 2001:db8:2::/64 vrf vrf-r1 nhid 106
+
+ ip nexthop add id 201 via 169.254.2.12 dev $rp22
+ ip nexthop add id 202 via 169.254.3.13 dev $rp23
+ ip nexthop add id 203 group 201/202 type resilient buckets 512 \
+ idle_timer 0
+ ip route add 192.0.2.0/24 vrf vrf-r2 nhid 203
+
+ ip nexthop add id 204 via fe80:2::12 dev $rp22
+ ip nexthop add id 205 via fe80:3::13 dev $rp23
+ ip nexthop add id 206 group 204/205 type resilient buckets 512 \
+ idle_timer 0
+ ip route add 2001:db8:1::/64 vrf vrf-r2 nhid 206
+}
+
+multipath4_test()
+{
+ local desc="$1"
+ local weight_rp12=$2
+ local weight_rp13=$3
+ local t0_rp12 t0_rp13 t1_rp12 t1_rp13
+ local packets_rp12 packets_rp13
+
+ # Transmit multiple flows from h1 to h2 and make sure they are
+ # distributed between both multipath links (rp12 and rp13)
+ # according to the provided weights.
+ sysctl_set net.ipv4.fib_multipath_hash_policy 1
+
+ t0_rp12=$(link_stats_tx_packets_get $rp12)
+ t0_rp13=$(link_stats_tx_packets_get $rp13)
+
+ ip vrf exec vrf-h1 $MZ $h1 -q -p 64 -A 192.0.2.2 -B 198.51.100.2 \
+ -d 1msec -t udp "sp=1024,dp=0-32768"
+
+ t1_rp12=$(link_stats_tx_packets_get $rp12)
+ t1_rp13=$(link_stats_tx_packets_get $rp13)
+
+ let "packets_rp12 = $t1_rp12 - $t0_rp12"
+ let "packets_rp13 = $t1_rp13 - $t0_rp13"
+ multipath_eval "$desc" $weight_rp12 $weight_rp13 $packets_rp12 $packets_rp13
+
+ # Restore settings.
+ sysctl_restore net.ipv4.fib_multipath_hash_policy
+}
+
+multipath6_l4_test()
+{
+ local desc="$1"
+ local weight_rp12=$2
+ local weight_rp13=$3
+ local t0_rp12 t0_rp13 t1_rp12 t1_rp13
+ local packets_rp12 packets_rp13
+
+ # Transmit multiple flows from h1 to h2 and make sure they are
+ # distributed between both multipath links (rp12 and rp13)
+ # according to the provided weights.
+ sysctl_set net.ipv6.fib_multipath_hash_policy 1
+
+ t0_rp12=$(link_stats_tx_packets_get $rp12)
+ t0_rp13=$(link_stats_tx_packets_get $rp13)
+
+ $MZ $h1 -6 -q -p 64 -A 2001:db8:1::2 -B 2001:db8:2::2 \
+ -d 1msec -t udp "sp=1024,dp=0-32768"
+
+ t1_rp12=$(link_stats_tx_packets_get $rp12)
+ t1_rp13=$(link_stats_tx_packets_get $rp13)
+
+ let "packets_rp12 = $t1_rp12 - $t0_rp12"
+ let "packets_rp13 = $t1_rp13 - $t0_rp13"
+ multipath_eval "$desc" $weight_rp12 $weight_rp13 $packets_rp12 $packets_rp13
+
+ sysctl_restore net.ipv6.fib_multipath_hash_policy
+}
+
+multipath_test()
+{
+ # Without an idle timer, weight replacement should happen immediately.
+ log_info "Running multipath tests without an idle timer"
+ ip nexthop replace id 103 group 101/102 type resilient idle_timer 0
+ ip nexthop replace id 106 group 104/105 type resilient idle_timer 0
+
+ log_info "Running IPv4 multipath tests"
+ ip nexthop replace id 103 group 101,1/102,1 type resilient
+ multipath4_test "ECMP" 1 1
+ ip nexthop replace id 103 group 101,2/102,1 type resilient
+ multipath4_test "Weighted MP 2:1" 2 1
+ ip nexthop replace id 103 group 101,11/102,45 type resilient
+ multipath4_test "Weighted MP 11:45" 11 45
+
+ ip nexthop replace id 103 group 101,1/102,1 type resilient
+
+ log_info "Running IPv6 L4 hash multipath tests"
+ ip nexthop replace id 106 group 104,1/105,1 type resilient
+ multipath6_l4_test "ECMP" 1 1
+ ip nexthop replace id 106 group 104,2/105,1 type resilient
+ multipath6_l4_test "Weighted MP 2:1" 2 1
+ ip nexthop replace id 106 group 104,11/105,45 type resilient
+ multipath6_l4_test "Weighted MP 11:45" 11 45
+
+ ip nexthop replace id 106 group 104,1/105,1 type resilient
+
+ # With an idle timer, weight replacement should not happen, so the
+ # expected ratio should always be the initial one (1:1).
+ log_info "Running multipath tests with an idle timer of 120 seconds"
+ ip nexthop replace id 103 group 101/102 type resilient idle_timer 120
+ ip nexthop replace id 106 group 104/105 type resilient idle_timer 120
+
+ log_info "Running IPv4 multipath tests"
+ ip nexthop replace id 103 group 101,1/102,1 type resilient
+ multipath4_test "ECMP" 1 1
+ ip nexthop replace id 103 group 101,2/102,1 type resilient
+ multipath4_test "Weighted MP 2:1" 1 1
+ ip nexthop replace id 103 group 101,11/102,45 type resilient
+ multipath4_test "Weighted MP 11:45" 1 1
+
+ ip nexthop replace id 103 group 101,1/102,1 type resilient
+
+ log_info "Running IPv6 L4 hash multipath tests"
+ ip nexthop replace id 106 group 104,1/105,1 type resilient
+ multipath6_l4_test "ECMP" 1 1
+ ip nexthop replace id 106 group 104,2/105,1 type resilient
+ multipath6_l4_test "Weighted MP 2:1" 1 1
+ ip nexthop replace id 106 group 104,11/105,45 type resilient
+ multipath6_l4_test "Weighted MP 11:45" 1 1
+
+ ip nexthop replace id 106 group 104,1/105,1 type resilient
+
+ # With a short idle timer and enough idle time, weight replacement
+ # should happen.
+ log_info "Running multipath tests with an idle timer of 5 seconds"
+ ip nexthop replace id 103 group 101/102 type resilient idle_timer 5
+ ip nexthop replace id 106 group 104/105 type resilient idle_timer 5
+
+ log_info "Running IPv4 multipath tests"
+ sleep 10
+ ip nexthop replace id 103 group 101,1/102,1 type resilient
+ multipath4_test "ECMP" 1 1
+ sleep 10
+ ip nexthop replace id 103 group 101,2/102,1 type resilient
+ multipath4_test "Weighted MP 2:1" 2 1
+ sleep 10
+ ip nexthop replace id 103 group 101,11/102,45 type resilient
+ multipath4_test "Weighted MP 11:45" 11 45
+
+ ip nexthop replace id 103 group 101,1/102,1 type resilient
+
+ log_info "Running IPv6 L4 hash multipath tests"
+ sleep 10
+ ip nexthop replace id 106 group 104,1/105,1 type resilient
+ multipath6_l4_test "ECMP" 1 1
+ sleep 10
+ ip nexthop replace id 106 group 104,2/105,1 type resilient
+ multipath6_l4_test "Weighted MP 2:1" 2 1
+ sleep 10
+ ip nexthop replace id 106 group 104,11/105,45 type resilient
+ multipath6_l4_test "Weighted MP 11:45" 11 45
+
+ ip nexthop replace id 106 group 104,1/105,1 type resilient
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ rp11=${NETIFS[p2]}
+
+ rp12=${NETIFS[p3]}
+ rp22=${NETIFS[p4]}
+
+ rp13=${NETIFS[p5]}
+ rp23=${NETIFS[p6]}
+
+ rp21=${NETIFS[p7]}
+ h2=${NETIFS[p8]}
+
+ vrf_prepare
+
+ h1_create
+ h2_create
+
+ router1_create
+ router2_create
+
+ forwarding_enable
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ forwarding_restore
+
+ router2_destroy
+ router1_destroy
+
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+}
+
+ping_ipv4()
+{
+ ping_test $h1 198.51.100.2
+}
+
+ping_ipv6()
+{
+ ping6_test $h1 2001:db8:2::2
+}
+
+ip nexthop ls >/dev/null 2>&1
+if [ $? -ne 0 ]; then
+ echo "Nexthop objects not supported; skipping tests"
+ exit 0
+fi
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+routing_nh_obj
+
+tests_run
+
+exit $EXIT_STATUS