diff options
author | Jiri Bohac <jbohac@suse.cz> | 2012-08-30 12:02:47 +0000 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2012-08-31 16:37:12 -0400 |
commit | da210f559019ba1cd4ebee2a28ad158bfb95bab2 (patch) | |
tree | 9dd79ea671975ded4249dd6643f69de104c5580d /drivers/net/bonding | |
parent | eb7e0575966f9c84434e92c8a3f69719cc2e7571 (diff) | |
download | lwn-da210f559019ba1cd4ebee2a28ad158bfb95bab2.tar.gz lwn-da210f559019ba1cd4ebee2a28ad158bfb95bab2.zip |
bonding: add some slack to arp monitoring time limits
Currently, all the time limits in the bonding ARP monitor are in
multiples of arp_interval -- the time interval at which the ARP
monitor is periodically scheduled.
With a fast network round-trip and a little scheduling latency
of the ARP monitor work, a limit of n*delta_in_ticks may
effectively mean (n-1)*delta_in_ticks.
This is fatal in case of n==1 (the link will stay down
forever) and makes the behaviour non-deterministic in all the
other cases.
Add a delta_in_ticks/2 time slack to all the time limits.
Signed-off-by: Jiri Bohac <jbohac@suse.cz>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'drivers/net/bonding')
-rw-r--r-- | drivers/net/bonding/bond_main.c | 31 |
1 files changed, 20 insertions, 11 deletions
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index b24ce257ac7b..7858c58df4a3 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -2811,12 +2811,13 @@ void bond_loadbalance_arp_mon(struct work_struct *work) arp_work.work); struct slave *slave, *oldcurrent; int do_failover = 0; - int delta_in_ticks; + int delta_in_ticks, extra_ticks; int i; read_lock(&bond->lock); delta_in_ticks = msecs_to_jiffies(bond->params.arp_interval); + extra_ticks = delta_in_ticks / 2; if (bond->slave_cnt == 0) goto re_arm; @@ -2839,10 +2840,10 @@ void bond_loadbalance_arp_mon(struct work_struct *work) if (slave->link != BOND_LINK_UP) { if (time_in_range(jiffies, trans_start - delta_in_ticks, - trans_start + delta_in_ticks) && + trans_start + delta_in_ticks + extra_ticks) && time_in_range(jiffies, slave->dev->last_rx - delta_in_ticks, - slave->dev->last_rx + delta_in_ticks)) { + slave->dev->last_rx + delta_in_ticks + extra_ticks)) { slave->link = BOND_LINK_UP; bond_set_active_slave(slave); @@ -2872,10 +2873,10 @@ void bond_loadbalance_arp_mon(struct work_struct *work) */ if (!time_in_range(jiffies, trans_start - delta_in_ticks, - trans_start + 2 * delta_in_ticks) || + trans_start + 2 * delta_in_ticks + extra_ticks) || !time_in_range(jiffies, slave->dev->last_rx - delta_in_ticks, - slave->dev->last_rx + 2 * delta_in_ticks)) { + slave->dev->last_rx + 2 * delta_in_ticks + extra_ticks)) { slave->link = BOND_LINK_DOWN; bond_set_backup_slave(slave); @@ -2933,6 +2934,14 @@ static int bond_ab_arp_inspect(struct bonding *bond, int delta_in_ticks) struct slave *slave; int i, commit = 0; unsigned long trans_start; + int extra_ticks; + + /* All the time comparisons below need some extra time. Otherwise, on + * fast networks the ARP probe/reply may arrive within the same jiffy + * as it was sent. Then, the next time the ARP monitor is run, one + * arp_interval will already have passed in the comparisons. + */ + extra_ticks = delta_in_ticks / 2; bond_for_each_slave(bond, slave, i) { slave->new_link = BOND_LINK_NOCHANGE; @@ -2940,7 +2949,7 @@ static int bond_ab_arp_inspect(struct bonding *bond, int delta_in_ticks) if (slave->link != BOND_LINK_UP) { if (time_in_range(jiffies, slave_last_rx(bond, slave) - delta_in_ticks, - slave_last_rx(bond, slave) + delta_in_ticks)) { + slave_last_rx(bond, slave) + delta_in_ticks + extra_ticks)) { slave->new_link = BOND_LINK_UP; commit++; @@ -2956,7 +2965,7 @@ static int bond_ab_arp_inspect(struct bonding *bond, int delta_in_ticks) */ if (time_in_range(jiffies, slave->jiffies - delta_in_ticks, - slave->jiffies + 2 * delta_in_ticks)) + slave->jiffies + 2 * delta_in_ticks + extra_ticks)) continue; /* @@ -2976,7 +2985,7 @@ static int bond_ab_arp_inspect(struct bonding *bond, int delta_in_ticks) !bond->current_arp_slave && !time_in_range(jiffies, slave_last_rx(bond, slave) - delta_in_ticks, - slave_last_rx(bond, slave) + 3 * delta_in_ticks)) { + slave_last_rx(bond, slave) + 3 * delta_in_ticks + extra_ticks)) { slave->new_link = BOND_LINK_DOWN; commit++; @@ -2992,10 +3001,10 @@ static int bond_ab_arp_inspect(struct bonding *bond, int delta_in_ticks) if (bond_is_active_slave(slave) && (!time_in_range(jiffies, trans_start - delta_in_ticks, - trans_start + 2 * delta_in_ticks) || + trans_start + 2 * delta_in_ticks + extra_ticks) || !time_in_range(jiffies, slave_last_rx(bond, slave) - delta_in_ticks, - slave_last_rx(bond, slave) + 2 * delta_in_ticks))) { + slave_last_rx(bond, slave) + 2 * delta_in_ticks + extra_ticks))) { slave->new_link = BOND_LINK_DOWN; commit++; @@ -3027,7 +3036,7 @@ static void bond_ab_arp_commit(struct bonding *bond, int delta_in_ticks) if ((!bond->curr_active_slave && time_in_range(jiffies, trans_start - delta_in_ticks, - trans_start + delta_in_ticks)) || + trans_start + delta_in_ticks + delta_in_ticks / 2)) || bond->curr_active_slave != slave) { slave->link = BOND_LINK_UP; if (bond->current_arp_slave) { |