From 1d4adfaf65746203861c72d9d78de349eb97d528 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 20 Aug 2020 15:13:00 +0100 Subject: rxrpc: Make rxrpc_kernel_get_srtt() indicate validity Fix rxrpc_kernel_get_srtt() to indicate the validity of the returned smoothed RTT. If we haven't had any valid samples yet, the SRTT isn't useful. Fixes: c410bf01933e ("rxrpc: Fix the excessive initial retransmission timeout") Signed-off-by: David Howells --- fs/afs/fs_probe.c | 4 ++-- fs/afs/vl_probe.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/afs/fs_probe.c b/fs/afs/fs_probe.c index 5d9ef517cf81..e7e98ad63a91 100644 --- a/fs/afs/fs_probe.c +++ b/fs/afs/fs_probe.c @@ -161,8 +161,8 @@ responded: } } - rtt_us = rxrpc_kernel_get_srtt(call->net->socket, call->rxcall); - if (rtt_us < server->probe.rtt) { + if (rxrpc_kernel_get_srtt(call->net->socket, call->rxcall, &rtt_us) && + rtt_us < server->probe.rtt) { server->probe.rtt = rtt_us; server->rtt = rtt_us; alist->preferred = index; diff --git a/fs/afs/vl_probe.c b/fs/afs/vl_probe.c index e3aa013c2177..081b7e5b13f5 100644 --- a/fs/afs/vl_probe.c +++ b/fs/afs/vl_probe.c @@ -92,8 +92,8 @@ responded: } } - rtt_us = rxrpc_kernel_get_srtt(call->net->socket, call->rxcall); - if (rtt_us < server->probe.rtt) { + if (rxrpc_kernel_get_srtt(call->net->socket, call->rxcall, &rtt_us) && + rtt_us < server->probe.rtt) { server->probe.rtt = rtt_us; alist->preferred = index; have_result = true; -- cgit v1.2.3 From 4f4c2c05eb7703b485b4285b8e2eee908c7b4508 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 19 Aug 2020 15:13:44 +0100 Subject: afs: Remove afs_vlserver->probe.have_result Remove afs_vlserver->probe.have_result as it's neither read nor waited upon. Fixes: 3bf0fb6f33dd ("afs: Probe multiple fileservers simultaneously") Signed-off-by: David Howells --- fs/afs/internal.h | 1 - fs/afs/vl_probe.c | 5 +---- 2 files changed, 1 insertion(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 792ac711985e..2e6ae6388c72 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -412,7 +412,6 @@ struct afs_vlserver { unsigned int rtt; /* RTT as ktime/64 */ u32 abort_code; short error; - bool have_result; bool responded:1; bool is_yfs:1; bool not_yfs:1; diff --git a/fs/afs/vl_probe.c b/fs/afs/vl_probe.c index 081b7e5b13f5..ee59188433b9 100644 --- a/fs/afs/vl_probe.c +++ b/fs/afs/vl_probe.c @@ -109,11 +109,8 @@ out: server_index, index, &alist->addrs[index].transport, rtt_us, ret); have_result |= afs_vl_probe_done(server); - if (have_result) { - server->probe.have_result = true; - wake_up_var(&server->probe.have_result); + if (have_result) wake_up_all(&server->probe_wq); - } } /* -- cgit v1.2.3 From fb72cd3d484ce548597c75ebeecc70483fe8bb6e Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 20 Aug 2020 15:01:54 +0100 Subject: afs: Expose information from afs_vlserver through /proc for debugging Convert various bitfields in afs_vlserver::probe to a mask and then expose this and some other bits of information through /proc/net/afs//vlservers to make it easier to debug VL server communication issues. Signed-off-by: David Howells --- fs/afs/internal.h | 9 +++++---- fs/afs/proc.c | 5 +++++ fs/afs/vl_probe.c | 20 ++++++++++---------- fs/afs/vl_rotate.c | 3 ++- 4 files changed, 22 insertions(+), 15 deletions(-) (limited to 'fs') diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 2e6ae6388c72..b29dfcfe5fc2 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -412,10 +412,11 @@ struct afs_vlserver { unsigned int rtt; /* RTT as ktime/64 */ u32 abort_code; short error; - bool responded:1; - bool is_yfs:1; - bool not_yfs:1; - bool local_failure:1; + unsigned short flags; +#define AFS_VLSERVER_PROBE_RESPONDED 0x01 /* At least once response (may be abort) */ +#define AFS_VLSERVER_PROBE_IS_YFS 0x02 /* The peer appears to be YFS */ +#define AFS_VLSERVER_PROBE_NOT_YFS 0x04 /* The peer appears not to be YFS */ +#define AFS_VLSERVER_PROBE_LOCAL_FAILURE 0x08 /* A local failure prevented a probe */ } probe; u16 port; diff --git a/fs/afs/proc.c b/fs/afs/proc.c index e817fc740ba0..5e837155f1a0 100644 --- a/fs/afs/proc.c +++ b/fs/afs/proc.c @@ -310,6 +310,11 @@ static int afs_proc_cell_vlservers_show(struct seq_file *m, void *v) alist->preferred == i ? '>' : '-', &alist->addrs[i].transport); } + seq_printf(m, " info: fl=%lx rtt=%d\n", vlserver->flags, vlserver->probe.rtt); + seq_printf(m, " probe: fl=%x e=%d ac=%d out=%d\n", + vlserver->probe.flags, vlserver->probe.error, + vlserver->probe.abort_code, + atomic_read(&vlserver->probe_outstanding)); return 0; } diff --git a/fs/afs/vl_probe.c b/fs/afs/vl_probe.c index ee59188433b9..a6d04b4fbf56 100644 --- a/fs/afs/vl_probe.c +++ b/fs/afs/vl_probe.c @@ -45,14 +45,14 @@ void afs_vlserver_probe_result(struct afs_call *call) server->probe.error = 0; goto responded; case -ECONNABORTED: - if (!server->probe.responded) { + if (!(server->probe.flags & AFS_VLSERVER_PROBE_RESPONDED)) { server->probe.abort_code = call->abort_code; server->probe.error = ret; } goto responded; case -ENOMEM: case -ENONET: - server->probe.local_failure = true; + server->probe.flags |= AFS_VLSERVER_PROBE_LOCAL_FAILURE; afs_io_error(call, afs_io_error_vl_probe_fail); goto out; case -ECONNRESET: /* Responded, but call expired. */ @@ -67,7 +67,7 @@ void afs_vlserver_probe_result(struct afs_call *call) default: clear_bit(index, &alist->responded); set_bit(index, &alist->failed); - if (!server->probe.responded && + if (!(server->probe.flags & AFS_VLSERVER_PROBE_RESPONDED) && (server->probe.error == 0 || server->probe.error == -ETIMEDOUT || server->probe.error == -ETIME)) @@ -81,12 +81,12 @@ responded: clear_bit(index, &alist->failed); if (call->service_id == YFS_VL_SERVICE) { - server->probe.is_yfs = true; + server->probe.flags |= AFS_VLSERVER_PROBE_IS_YFS; set_bit(AFS_VLSERVER_FL_IS_YFS, &server->flags); alist->addrs[index].srx_service = call->service_id; } else { - server->probe.not_yfs = true; - if (!server->probe.is_yfs) { + server->probe.flags |= AFS_VLSERVER_PROBE_NOT_YFS; + if (!(server->probe.flags & AFS_VLSERVER_PROBE_IS_YFS)) { clear_bit(AFS_VLSERVER_FL_IS_YFS, &server->flags); alist->addrs[index].srx_service = call->service_id; } @@ -100,7 +100,7 @@ responded: } smp_wmb(); /* Set rtt before responded. */ - server->probe.responded = true; + server->probe.flags |= AFS_VLSERVER_PROBE_RESPONDED; set_bit(AFS_VLSERVER_FL_PROBED, &server->flags); out: spin_unlock(&server->probe_lock); @@ -202,7 +202,7 @@ int afs_wait_for_vl_probes(struct afs_vlserver_list *vllist, server = vllist->servers[i].server; if (!test_bit(AFS_VLSERVER_FL_PROBING, &server->flags)) __clear_bit(i, &untried); - if (server->probe.responded) + if (server->probe.flags & AFS_VLSERVER_PROBE_RESPONDED) have_responders = true; } } @@ -228,7 +228,7 @@ int afs_wait_for_vl_probes(struct afs_vlserver_list *vllist, for (i = 0; i < vllist->nr_servers; i++) { if (test_bit(i, &untried)) { server = vllist->servers[i].server; - if (server->probe.responded) + if (server->probe.flags & AFS_VLSERVER_PROBE_RESPONDED) goto stop; if (test_bit(AFS_VLSERVER_FL_PROBING, &server->flags)) still_probing = true; @@ -246,7 +246,7 @@ stop: for (i = 0; i < vllist->nr_servers; i++) { if (test_bit(i, &untried)) { server = vllist->servers[i].server; - if (server->probe.responded && + if ((server->probe.flags & AFS_VLSERVER_PROBE_RESPONDED) && server->probe.rtt < rtt) { pref = i; rtt = server->probe.rtt; diff --git a/fs/afs/vl_rotate.c b/fs/afs/vl_rotate.c index f405ca8b240a..ed2609e82695 100644 --- a/fs/afs/vl_rotate.c +++ b/fs/afs/vl_rotate.c @@ -192,7 +192,8 @@ pick_server: for (i = 0; i < vc->server_list->nr_servers; i++) { struct afs_vlserver *s = vc->server_list->servers[i].server; - if (!test_bit(i, &vc->untried) || !s->probe.responded) + if (!test_bit(i, &vc->untried) || + !(s->probe.flags & AFS_VLSERVER_PROBE_RESPONDED)) continue; if (s->probe.rtt < rtt) { vc->index = i; -- cgit v1.2.3 From b95b30940ee46a4d892ca3de3ffb8249c4985b1f Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 19 Aug 2020 15:27:17 +0100 Subject: afs: Don't use VL probe running state to make decisions outside probe code Don't use the running state for VL server probes to make decisions about which server to use as the state is cleared at the start of a probe and intermediate values might also be misleading. Instead, add a separate 'latest known' rtt in the afs_vlserver struct and a flag to indicate if the server is known to be responding and update these as and when we know what to change them to. Fixes: 3bf0fb6f33dd ("afs: Probe multiple fileservers simultaneously") Signed-off-by: David Howells --- fs/afs/internal.h | 4 +++- fs/afs/proc.c | 2 +- fs/afs/vl_list.c | 1 + fs/afs/vl_probe.c | 57 ++++++++++++++++++++++++++++++++++++++---------------- fs/afs/vl_rotate.c | 2 +- 5 files changed, 46 insertions(+), 20 deletions(-) (limited to 'fs') diff --git a/fs/afs/internal.h b/fs/afs/internal.h index b29dfcfe5fc2..18042b7dab6a 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -401,15 +401,17 @@ struct afs_vlserver { #define AFS_VLSERVER_FL_PROBED 0 /* The VL server has been probed */ #define AFS_VLSERVER_FL_PROBING 1 /* VL server is being probed */ #define AFS_VLSERVER_FL_IS_YFS 2 /* Server is YFS not AFS */ +#define AFS_VLSERVER_FL_RESPONDING 3 /* VL server is responding */ rwlock_t lock; /* Lock on addresses */ atomic_t usage; + unsigned int rtt; /* Server's current RTT in uS */ /* Probe state */ wait_queue_head_t probe_wq; atomic_t probe_outstanding; spinlock_t probe_lock; struct { - unsigned int rtt; /* RTT as ktime/64 */ + unsigned int rtt; /* RTT in uS */ u32 abort_code; short error; unsigned short flags; diff --git a/fs/afs/proc.c b/fs/afs/proc.c index 5e837155f1a0..e8babb62ed44 100644 --- a/fs/afs/proc.c +++ b/fs/afs/proc.c @@ -310,7 +310,7 @@ static int afs_proc_cell_vlservers_show(struct seq_file *m, void *v) alist->preferred == i ? '>' : '-', &alist->addrs[i].transport); } - seq_printf(m, " info: fl=%lx rtt=%d\n", vlserver->flags, vlserver->probe.rtt); + seq_printf(m, " info: fl=%lx rtt=%d\n", vlserver->flags, vlserver->rtt); seq_printf(m, " probe: fl=%x e=%d ac=%d out=%d\n", vlserver->probe.flags, vlserver->probe.error, vlserver->probe.abort_code, diff --git a/fs/afs/vl_list.c b/fs/afs/vl_list.c index 8fea54eba0c2..38b2ba1d9ec0 100644 --- a/fs/afs/vl_list.c +++ b/fs/afs/vl_list.c @@ -21,6 +21,7 @@ struct afs_vlserver *afs_alloc_vlserver(const char *name, size_t name_len, rwlock_init(&vlserver->lock); init_waitqueue_head(&vlserver->probe_wq); spin_lock_init(&vlserver->probe_lock); + vlserver->rtt = UINT_MAX; vlserver->name_len = name_len; vlserver->port = port; memcpy(vlserver->name, name, name_len); diff --git a/fs/afs/vl_probe.c b/fs/afs/vl_probe.c index a6d04b4fbf56..d1c7068b4346 100644 --- a/fs/afs/vl_probe.c +++ b/fs/afs/vl_probe.c @@ -11,15 +11,33 @@ #include "internal.h" #include "protocol_yfs.h" -static bool afs_vl_probe_done(struct afs_vlserver *server) + +/* + * Handle the completion of a set of probes. + */ +static void afs_finished_vl_probe(struct afs_vlserver *server) { - if (!atomic_dec_and_test(&server->probe_outstanding)) - return false; + if (!(server->probe.flags & AFS_VLSERVER_PROBE_RESPONDED)) { + server->rtt = UINT_MAX; + clear_bit(AFS_VLSERVER_FL_RESPONDING, &server->flags); + } - wake_up_var(&server->probe_outstanding); clear_bit_unlock(AFS_VLSERVER_FL_PROBING, &server->flags); wake_up_bit(&server->flags, AFS_VLSERVER_FL_PROBING); - return true; +} + +/* + * Handle the completion of a probe RPC call. + */ +static void afs_done_one_vl_probe(struct afs_vlserver *server, bool wake_up) +{ + if (atomic_dec_and_test(&server->probe_outstanding)) { + afs_finished_vl_probe(server); + wake_up = true; + } + + if (wake_up) + wake_up_all(&server->probe_wq); } /* @@ -52,8 +70,13 @@ void afs_vlserver_probe_result(struct afs_call *call) goto responded; case -ENOMEM: case -ENONET: + case -EKEYEXPIRED: + case -EKEYREVOKED: + case -EKEYREJECTED: server->probe.flags |= AFS_VLSERVER_PROBE_LOCAL_FAILURE; - afs_io_error(call, afs_io_error_vl_probe_fail); + if (server->probe.error == 0) + server->probe.error = ret; + trace_afs_io_error(call->debug_id, ret, afs_io_error_vl_probe_fail); goto out; case -ECONNRESET: /* Responded, but call expired. */ case -ERFKILL: @@ -72,7 +95,7 @@ void afs_vlserver_probe_result(struct afs_call *call) server->probe.error == -ETIMEDOUT || server->probe.error == -ETIME)) server->probe.error = ret; - afs_io_error(call, afs_io_error_vl_probe_fail); + trace_afs_io_error(call->debug_id, ret, afs_io_error_vl_probe_fail); goto out; } @@ -95,22 +118,22 @@ responded: if (rxrpc_kernel_get_srtt(call->net->socket, call->rxcall, &rtt_us) && rtt_us < server->probe.rtt) { server->probe.rtt = rtt_us; + server->rtt = rtt_us; alist->preferred = index; - have_result = true; } smp_wmb(); /* Set rtt before responded. */ server->probe.flags |= AFS_VLSERVER_PROBE_RESPONDED; set_bit(AFS_VLSERVER_FL_PROBED, &server->flags); + set_bit(AFS_VLSERVER_FL_RESPONDING, &server->flags); + have_result = true; out: spin_unlock(&server->probe_lock); _debug("probe [%u][%u] %pISpc rtt=%u ret=%d", server_index, index, &alist->addrs[index].transport, rtt_us, ret); - have_result |= afs_vl_probe_done(server); - if (have_result) - wake_up_all(&server->probe_wq); + afs_done_one_vl_probe(server, have_result); } /* @@ -148,11 +171,10 @@ static bool afs_do_probe_vlserver(struct afs_net *net, in_progress = true; } else { afs_prioritise_error(_e, PTR_ERR(call), ac.abort_code); + afs_done_one_vl_probe(server, false); } } - if (!in_progress) - afs_vl_probe_done(server); return in_progress; } @@ -190,7 +212,7 @@ int afs_wait_for_vl_probes(struct afs_vlserver_list *vllist, { struct wait_queue_entry *waits; struct afs_vlserver *server; - unsigned int rtt = UINT_MAX; + unsigned int rtt = UINT_MAX, rtt_s; bool have_responders = false; int pref = -1, i; @@ -246,10 +268,11 @@ stop: for (i = 0; i < vllist->nr_servers; i++) { if (test_bit(i, &untried)) { server = vllist->servers[i].server; - if ((server->probe.flags & AFS_VLSERVER_PROBE_RESPONDED) && - server->probe.rtt < rtt) { + rtt_s = READ_ONCE(server->rtt); + if (test_bit(AFS_VLSERVER_FL_RESPONDING, &server->flags) && + rtt_s < rtt) { pref = i; - rtt = server->probe.rtt; + rtt = rtt_s; } remove_wait_queue(&server->probe_wq, &waits[i]); diff --git a/fs/afs/vl_rotate.c b/fs/afs/vl_rotate.c index ed2609e82695..ab2beb4ba20e 100644 --- a/fs/afs/vl_rotate.c +++ b/fs/afs/vl_rotate.c @@ -193,7 +193,7 @@ pick_server: struct afs_vlserver *s = vc->server_list->servers[i].server; if (!test_bit(i, &vc->untried) || - !(s->probe.flags & AFS_VLSERVER_PROBE_RESPONDED)) + !test_bit(AFS_VLSERVER_FL_RESPONDING, &s->flags)) continue; if (s->probe.rtt < rtt) { vc->index = i; -- cgit v1.2.3 From e4686c79b103c0a73b8ed41e5fd967da201b6fba Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 20 Aug 2020 16:13:05 +0100 Subject: afs: Fix error handling in VL server rotation The error handling in the VL server rotation in the case of there being no contactable servers is not correct. In such a case, the records of all the servers in the list are scanned and the errors and abort codes are mapped and prioritised and one error is chosen. This is then forgotten and the default error is used (EDESTADDRREQ). Fix this by using the calculated error. Also we need to note whether a server responded on one of its endpoints so that we can priorise an error from an abort message over local and network errors. Fixes: 4584ae96ae30 ("afs: Fix missing net error handling") Signed-off-by: David Howells --- fs/afs/vl_rotate.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs') diff --git a/fs/afs/vl_rotate.c b/fs/afs/vl_rotate.c index ab2beb4ba20e..c0458c903b31 100644 --- a/fs/afs/vl_rotate.c +++ b/fs/afs/vl_rotate.c @@ -263,10 +263,14 @@ no_more_servers: for (i = 0; i < vc->server_list->nr_servers; i++) { struct afs_vlserver *s = vc->server_list->servers[i].server; + if (test_bit(AFS_VLSERVER_FL_RESPONDING, &s->flags)) + e.responded = true; afs_prioritise_error(&e, READ_ONCE(s->probe.error), s->probe.abort_code); } + error = e.error; + failed_set_error: vc->error = error; failed: -- cgit v1.2.3