From 35a566a24e58f1b5f89737edf60b77de58719ed0 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 18 Feb 2025 18:14:26 -0500 Subject: NFSv4: Avoid unnecessary scans of filesystems for returning delegations The amount of looping through the list of delegations is occasionally leading to soft lockups. If the state manager was asked to return delegations asynchronously, it should only scan those filesystems that hold delegations that need to be returned. Fixes: af3b61bf6131 ("NFSv4: Clean up nfs_client_return_marked_delegations()") Signed-off-by: Trond Myklebust --- include/linux/nfs_fs_sb.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index f00bfcee7120..4e9ad6f6e907 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -250,6 +250,8 @@ struct nfs_server { struct list_head ss_copies; struct list_head ss_src_copies; + unsigned long delegation_flags; +#define NFS4SERV_DELEGRETURN (1) unsigned long delegation_gen; unsigned long mig_gen; unsigned long mig_status; -- cgit v1.2.3 From f163aa81a799e2d46d7f8f0b42a0e7770eaa0d06 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 18 Feb 2025 19:03:21 -0500 Subject: NFSv4: Avoid unnecessary scans of filesystems for expired delegations The amount of looping through the list of delegations is occasionally leading to soft lockups. If the state manager was asked to reap the expired delegations, it should scan only those filesystems that hold delegations that need to be reaped. Fixes: 7f156ef0bf45 ("NFSv4: Clean up nfs_delegation_reap_expired()") Signed-off-by: Trond Myklebust --- fs/nfs/delegation.c | 7 +++++++ include/linux/nfs_fs_sb.h | 1 + 2 files changed, 8 insertions(+) (limited to 'include/linux') diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index d1f5e497729c..abd952cc47e4 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -1284,6 +1284,7 @@ static void nfs_mark_test_expired_delegation(struct nfs_server *server, return; clear_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags); set_bit(NFS_DELEGATION_TEST_EXPIRED, &delegation->flags); + set_bit(NFS4SERV_DELEGATION_EXPIRED, &server->delegation_flags); set_bit(NFS4CLNT_DELEGATION_EXPIRED, &server->nfs_client->cl_state); } @@ -1362,6 +1363,9 @@ static int nfs_server_reap_expired_delegations(struct nfs_server *server, nfs4_stateid stateid; unsigned long gen = ++server->delegation_gen; + if (!test_and_clear_bit(NFS4SERV_DELEGATION_EXPIRED, + &server->delegation_flags)) + return 0; restart: rcu_read_lock(); list_for_each_entry_rcu(delegation, &server->delegations, super_list) { @@ -1391,6 +1395,9 @@ restart: goto restart; } nfs_inode_mark_test_expired_delegation(server,inode); + set_bit(NFS4SERV_DELEGATION_EXPIRED, &server->delegation_flags); + set_bit(NFS4CLNT_DELEGATION_EXPIRED, + &server->nfs_client->cl_state); iput(inode); return -EAGAIN; } diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 4e9ad6f6e907..7d6f164036fa 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -252,6 +252,7 @@ struct nfs_server { unsigned long delegation_flags; #define NFS4SERV_DELEGRETURN (1) +#define NFS4SERV_DELEGATION_EXPIRED (2) unsigned long delegation_gen; unsigned long mig_gen; unsigned long mig_status; -- cgit v1.2.3 From e767b59e29b8327d25edde65efc743f479f30d0a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 18 Feb 2025 18:37:51 -0500 Subject: NFSv4: Avoid unnecessary scans of filesystems for delayed delegations The amount of looping through the list of delegations is occasionally leading to soft lockups. If the state manager was asked to manage the delayed return of delegations, then only scan those filesystems containing delegations that were marked as being delayed. Fixes: be20037725d1 ("NFSv4: Fix delegation return in cases where we have to retry") Signed-off-by: Trond Myklebust --- fs/nfs/delegation.c | 18 ++++++++++++------ include/linux/nfs_fs_sb.h | 1 + 2 files changed, 13 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index abd952cc47e4..325ba0663a6d 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -331,14 +331,16 @@ nfs_start_delegation_return(struct nfs_inode *nfsi) } static void nfs_abort_delegation_return(struct nfs_delegation *delegation, - struct nfs_client *clp, int err) + struct nfs_server *server, int err) { - spin_lock(&delegation->lock); clear_bit(NFS_DELEGATION_RETURNING, &delegation->flags); if (err == -EAGAIN) { set_bit(NFS_DELEGATION_RETURN_DELAYED, &delegation->flags); - set_bit(NFS4CLNT_DELEGRETURN_DELAYED, &clp->cl_state); + set_bit(NFS4SERV_DELEGRETURN_DELAYED, + &server->delegation_flags); + set_bit(NFS4CLNT_DELEGRETURN_DELAYED, + &server->nfs_client->cl_state); } spin_unlock(&delegation->lock); } @@ -548,7 +550,7 @@ out: */ static int nfs_end_delegation_return(struct inode *inode, struct nfs_delegation *delegation, int issync) { - struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; + struct nfs_server *server = NFS_SERVER(inode); unsigned int mode = O_WRONLY | O_RDWR; int err = 0; @@ -570,11 +572,11 @@ static int nfs_end_delegation_return(struct inode *inode, struct nfs_delegation /* * Guard against state recovery */ - err = nfs4_wait_clnt_recover(clp); + err = nfs4_wait_clnt_recover(server->nfs_client); } if (err) { - nfs_abort_delegation_return(delegation, clp, err); + nfs_abort_delegation_return(delegation, server, err); goto out; } @@ -678,6 +680,9 @@ static bool nfs_server_clear_delayed_delegations(struct nfs_server *server) struct nfs_delegation *d; bool ret = false; + if (!test_and_clear_bit(NFS4SERV_DELEGRETURN_DELAYED, + &server->delegation_flags)) + goto out; list_for_each_entry_rcu (d, &server->delegations, super_list) { if (!test_bit(NFS_DELEGATION_RETURN_DELAYED, &d->flags)) continue; @@ -685,6 +690,7 @@ static bool nfs_server_clear_delayed_delegations(struct nfs_server *server) clear_bit(NFS_DELEGATION_RETURN_DELAYED, &d->flags); ret = true; } +out: return ret; } diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 7d6f164036fa..108862d81b57 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -253,6 +253,7 @@ struct nfs_server { unsigned long delegation_flags; #define NFS4SERV_DELEGRETURN (1) #define NFS4SERV_DELEGATION_EXPIRED (2) +#define NFS4SERV_DELEGRETURN_DELAYED (3) unsigned long delegation_gen; unsigned long mig_gen; unsigned long mig_status; -- cgit v1.2.3 From 8955e7ce61fb6ba28f88e0a38479c992991ba6ab Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 13 Jan 2025 10:32:39 -0500 Subject: NFS: Implement NFSv4.2's OFFLOAD_STATUS XDR Add XDR encoding and decoding functions for the NFSv4.2 OFFLOAD_STATUS operation. Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever Reviewed-by: Benjamin Coddington Link: https://lore.kernel.org/r/20250113153235.48706-13-cel@kernel.org Signed-off-by: Trond Myklebust --- fs/nfs/nfs42xdr.c | 86 +++++++++++++++++++++++++++++++++++++++++++++++++ fs/nfs/nfs4xdr.c | 1 + include/linux/nfs4.h | 1 + include/linux/nfs_xdr.h | 5 +-- 4 files changed, 91 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c index 5072d7ea72e9..b1b663468249 100644 --- a/fs/nfs/nfs42xdr.c +++ b/fs/nfs/nfs42xdr.c @@ -35,6 +35,11 @@ #define encode_offload_cancel_maxsz (op_encode_hdr_maxsz + \ XDR_QUADLEN(NFS4_STATEID_SIZE)) #define decode_offload_cancel_maxsz (op_decode_hdr_maxsz) +#define encode_offload_status_maxsz (op_encode_hdr_maxsz + \ + XDR_QUADLEN(NFS4_STATEID_SIZE)) +#define decode_offload_status_maxsz (op_decode_hdr_maxsz + \ + 2 /* osr_count */ + \ + 2 /* osr_complete */) #define encode_copy_notify_maxsz (op_encode_hdr_maxsz + \ XDR_QUADLEN(NFS4_STATEID_SIZE) + \ 1 + /* nl4_type */ \ @@ -143,6 +148,14 @@ decode_sequence_maxsz + \ decode_putfh_maxsz + \ decode_offload_cancel_maxsz) +#define NFS4_enc_offload_status_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ + encode_putfh_maxsz + \ + encode_offload_status_maxsz) +#define NFS4_dec_offload_status_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ + decode_putfh_maxsz + \ + decode_offload_status_maxsz) #define NFS4_enc_copy_notify_sz (compound_encode_hdr_maxsz + \ encode_sequence_maxsz + \ encode_putfh_maxsz + \ @@ -345,6 +358,14 @@ static void encode_offload_cancel(struct xdr_stream *xdr, encode_nfs4_stateid(xdr, &args->osa_stateid); } +static void encode_offload_status(struct xdr_stream *xdr, + const struct nfs42_offload_status_args *args, + struct compound_hdr *hdr) +{ + encode_op_hdr(xdr, OP_OFFLOAD_STATUS, decode_offload_status_maxsz, hdr); + encode_nfs4_stateid(xdr, &args->osa_stateid); +} + static void encode_copy_notify(struct xdr_stream *xdr, const struct nfs42_copy_notify_args *args, struct compound_hdr *hdr) @@ -569,6 +590,25 @@ static void nfs4_xdr_enc_offload_cancel(struct rpc_rqst *req, encode_nops(&hdr); } +/* + * Encode OFFLOAD_STATUS request + */ +static void nfs4_xdr_enc_offload_status(struct rpc_rqst *req, + struct xdr_stream *xdr, + const void *data) +{ + const struct nfs42_offload_status_args *args = data; + struct compound_hdr hdr = { + .minorversion = nfs4_xdr_minorversion(&args->osa_seq_args), + }; + + encode_compound_hdr(xdr, req, &hdr); + encode_sequence(xdr, &args->osa_seq_args, &hdr); + encode_putfh(xdr, args->osa_src_fh, &hdr); + encode_offload_status(xdr, args, &hdr); + encode_nops(&hdr); +} + /* * Encode COPY_NOTIFY request */ @@ -921,6 +961,26 @@ static int decode_offload_cancel(struct xdr_stream *xdr, return decode_op_hdr(xdr, OP_OFFLOAD_CANCEL); } +static int decode_offload_status(struct xdr_stream *xdr, + struct nfs42_offload_status_res *res) +{ + ssize_t result; + int status; + + status = decode_op_hdr(xdr, OP_OFFLOAD_STATUS); + if (status) + return status; + /* osr_count */ + if (xdr_stream_decode_u64(xdr, &res->osr_count) < 0) + return -EIO; + /* osr_complete<1> */ + result = xdr_stream_decode_uint32_array(xdr, &res->osr_complete, 1); + if (result < 0) + return -EIO; + res->complete_count = result; + return 0; +} + static int decode_copy_notify(struct xdr_stream *xdr, struct nfs42_copy_notify_res *res) { @@ -1370,6 +1430,32 @@ out: return status; } +/* + * Decode OFFLOAD_STATUS response + */ +static int nfs4_xdr_dec_offload_status(struct rpc_rqst *rqstp, + struct xdr_stream *xdr, + void *data) +{ + struct nfs42_offload_status_res *res = data; + struct compound_hdr hdr; + int status; + + status = decode_compound_hdr(xdr, &hdr); + if (status) + goto out; + status = decode_sequence(xdr, &res->osr_seq_res, rqstp); + if (status) + goto out; + status = decode_putfh(xdr); + if (status) + goto out; + status = decode_offload_status(xdr, res); + +out: + return status; +} + /* * Decode COPY_NOTIFY response */ diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 71f45cc0ca74..55bef5fbfa47 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -7702,6 +7702,7 @@ const struct rpc_procinfo nfs4_procedures[] = { PROC42(CLONE, enc_clone, dec_clone), PROC42(COPY, enc_copy, dec_copy), PROC42(OFFLOAD_CANCEL, enc_offload_cancel, dec_offload_cancel), + PROC42(OFFLOAD_STATUS, enc_offload_status, dec_offload_status), PROC42(COPY_NOTIFY, enc_copy_notify, dec_copy_notify), PROC(LOOKUPP, enc_lookupp, dec_lookupp), PROC42(LAYOUTERROR, enc_layouterror, dec_layouterror), diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index 9ac83ca88326..5fa60fe441b5 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -691,6 +691,7 @@ enum { NFSPROC4_CLNT_LISTXATTRS, NFSPROC4_CLNT_REMOVEXATTR, NFSPROC4_CLNT_READ_PLUS, + NFSPROC4_CLNT_OFFLOAD_STATUS, }; /* nfs41 types */ diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 9155a6ffc370..dc5288111999 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1515,8 +1515,9 @@ struct nfs42_offload_status_args { struct nfs42_offload_status_res { struct nfs4_sequence_res osr_seq_res; - uint64_t osr_count; - int osr_status; + u64 osr_count; + int complete_count; + u32 osr_complete; }; struct nfs42_copy_notify_args { -- cgit v1.2.3 From 77dd8a302f56679178924f82a29eaf437857ea75 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 13 Jan 2025 10:32:40 -0500 Subject: NFS: Implement NFSv4.2's OFFLOAD_STATUS operation Enable the Linux NFS client to observe the progress of an offloaded asynchronous COPY operation. This new operation will be put to use in a subsequent patch. Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever Reviewed-by: Benjamin Coddington Link: https://lore.kernel.org/r/20250113153235.48706-14-cel@kernel.org Signed-off-by: Trond Myklebust --- fs/nfs/nfs42proc.c | 103 ++++++++++++++++++++++++++++++++++++++++++++++ fs/nfs/nfs4proc.c | 3 +- include/linux/nfs_fs_sb.h | 1 + 3 files changed, 106 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index 1924c4a2077b..3e359e16c324 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c @@ -21,6 +21,8 @@ #define NFSDBG_FACILITY NFSDBG_PROC static int nfs42_do_offload_cancel_async(struct file *dst, nfs4_stateid *std); +static int nfs42_proc_offload_status(struct file *file, nfs4_stateid *stateid, + u64 *copied); static void nfs42_set_netaddr(struct file *filep, struct nfs42_netaddr *naddr) { @@ -582,6 +584,107 @@ static int nfs42_do_offload_cancel_async(struct file *dst, return status; } +static int +_nfs42_proc_offload_status(struct nfs_server *server, struct file *file, + struct nfs42_offload_data *data) +{ + struct nfs_open_context *ctx = nfs_file_open_context(file); + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OFFLOAD_STATUS], + .rpc_argp = &data->args, + .rpc_resp = &data->res, + .rpc_cred = ctx->cred, + }; + int status; + + status = nfs4_call_sync(server->client, server, &msg, + &data->args.osa_seq_args, + &data->res.osr_seq_res, 1); + switch (status) { + case 0: + break; + + case -NFS4ERR_ADMIN_REVOKED: + case -NFS4ERR_BAD_STATEID: + case -NFS4ERR_OLD_STATEID: + /* + * Server does not recognize the COPY stateid. CB_OFFLOAD + * could have purged it, or server might have rebooted. + * Since COPY stateids don't have an associated inode, + * avoid triggering state recovery. + */ + status = -EBADF; + break; + case -NFS4ERR_NOTSUPP: + case -ENOTSUPP: + case -EOPNOTSUPP: + server->caps &= ~NFS_CAP_OFFLOAD_STATUS; + status = -EOPNOTSUPP; + break; + } + + return status; +} + +/** + * nfs42_proc_offload_status - Poll completion status of an async copy operation + * @dst: handle of file being copied into + * @stateid: copy stateid (from async COPY result) + * @copied: OUT: number of bytes copied so far + * + * Return values: + * %0: Server returned an NFS4_OK completion status + * %-EINPROGRESS: Server returned no completion status + * %-EREMOTEIO: Server returned an error completion status + * %-EBADF: Server did not recognize the copy stateid + * %-EOPNOTSUPP: Server does not support OFFLOAD_STATUS + * %-ERESTARTSYS: Wait interrupted by signal + * + * Other negative errnos indicate the client could not complete the + * request. + */ +static int __maybe_unused +nfs42_proc_offload_status(struct file *dst, nfs4_stateid *stateid, u64 *copied) +{ + struct inode *inode = file_inode(dst); + struct nfs_server *server = NFS_SERVER(inode); + struct nfs4_exception exception = { + .inode = inode, + }; + struct nfs42_offload_data *data; + int status; + + if (!(server->caps & NFS_CAP_OFFLOAD_STATUS)) + return -EOPNOTSUPP; + + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (!data) + return -ENOMEM; + data->seq_server = server; + data->args.osa_src_fh = NFS_FH(inode); + memcpy(&data->args.osa_stateid, stateid, + sizeof(data->args.osa_stateid)); + exception.stateid = &data->args.osa_stateid; + do { + status = _nfs42_proc_offload_status(server, dst, data); + if (status == -EOPNOTSUPP) + goto out; + status = nfs4_handle_exception(server, status, &exception); + } while (exception.retry); + if (status) + goto out; + + *copied = data->res.osr_count; + if (!data->res.complete_count) + status = -EINPROGRESS; + else if (data->res.osr_complete != NFS_OK) + status = -EREMOTEIO; + +out: + kfree(data); + return status; +} + static int _nfs42_proc_copy_notify(struct file *src, struct file *dst, struct nfs42_copy_notify_args *args, struct nfs42_copy_notify_res *res) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 6e95db6c17e9..24406fc1352d 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -10781,7 +10781,8 @@ static const struct nfs4_minor_version_ops nfs_v4_2_minor_ops = { | NFS_CAP_CLONE | NFS_CAP_LAYOUTERROR | NFS_CAP_READ_PLUS - | NFS_CAP_MOVEABLE, + | NFS_CAP_MOVEABLE + | NFS_CAP_OFFLOAD_STATUS, .init_client = nfs41_init_client, .shutdown_client = nfs41_shutdown_client, .match_stateid = nfs41_match_stateid, diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 108862d81b57..1711eefcf24f 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -293,6 +293,7 @@ struct nfs_server { #define NFS_CAP_CASE_INSENSITIVE (1U << 6) #define NFS_CAP_CASE_PRESERVING (1U << 7) #define NFS_CAP_REBOOT_LAYOUTRETURN (1U << 8) +#define NFS_CAP_OFFLOAD_STATUS (1U << 9) #define NFS_CAP_OPEN_XOR (1U << 12) #define NFS_CAP_DELEGTIME (1U << 13) #define NFS_CAP_POSIX_LOCK (1U << 14) -- cgit v1.2.3 From cfe1f8776f23fd6dfe4ba9fcb695b129f0761187 Mon Sep 17 00:00:00 2001 From: Benjamin Coddington Date: Thu, 13 Mar 2025 13:01:22 -0400 Subject: NFS: Extend rdirplus mount option with "force|none" There are certain users that wish to force the NFS client to choose READDIRPLUS over READDIR for a particular mount. Update the "rdirplus" mount option to optionally accept values. For "rdirplus=force", the NFS client will always attempt to use READDDIRPLUS. The setting of "rdirplus=none" is aliased to the existing "nordirplus". Signed-off-by: Benjamin Coddington Link: https://lore.kernel.org/r/c4cf0de4c8be0930b91bc74bee310d289781cd3b.1741885071.git.bcodding@redhat.com Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 2 ++ fs/nfs/fs_context.c | 32 ++++++++++++++++++++++++++++---- fs/nfs/super.c | 1 + include/linux/nfs_fs_sb.h | 1 + 4 files changed, 32 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 2b04038b0e40..5c4566a8dabb 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -666,6 +666,8 @@ static bool nfs_use_readdirplus(struct inode *dir, struct dir_context *ctx, { if (!nfs_server_capable(dir, NFS_CAP_READDIRPLUS)) return false; + if (NFS_SERVER(dir)->flags & NFS_MOUNT_FORCE_RDIRPLUS) + return true; if (ctx->pos == 0 || cache_hits + cache_misses > NFS_READDIR_CACHE_USAGE_THRESHOLD) return true; diff --git a/fs/nfs/fs_context.c b/fs/nfs/fs_context.c index b069385eea17..1cabba1231d6 100644 --- a/fs/nfs/fs_context.c +++ b/fs/nfs/fs_context.c @@ -72,6 +72,8 @@ enum nfs_param { Opt_posix, Opt_proto, Opt_rdirplus, + Opt_rdirplus_none, + Opt_rdirplus_force, Opt_rdma, Opt_resvport, Opt_retrans, @@ -174,7 +176,8 @@ static const struct fs_parameter_spec nfs_fs_parameters[] = { fsparam_u32 ("port", Opt_port), fsparam_flag_no("posix", Opt_posix), fsparam_string("proto", Opt_proto), - fsparam_flag_no("rdirplus", Opt_rdirplus), + fsparam_flag_no("rdirplus", Opt_rdirplus), // rdirplus|nordirplus + fsparam_string("rdirplus", Opt_rdirplus), // rdirplus=... fsparam_flag ("rdma", Opt_rdma), fsparam_flag_no("resvport", Opt_resvport), fsparam_u32 ("retrans", Opt_retrans), @@ -288,6 +291,12 @@ static const struct constant_table nfs_xprtsec_policies[] = { {} }; +static const struct constant_table nfs_rdirplus_tokens[] = { + { "none", Opt_rdirplus_none }, + { "force", Opt_rdirplus_force }, + {} +}; + /* * Sanity-check a server address provided by the mount command. * @@ -636,10 +645,25 @@ static int nfs_fs_context_parse_param(struct fs_context *fc, ctx->flags &= ~NFS_MOUNT_NOACL; break; case Opt_rdirplus: - if (result.negated) + if (result.negated) { + ctx->flags &= ~NFS_MOUNT_FORCE_RDIRPLUS; ctx->flags |= NFS_MOUNT_NORDIRPLUS; - else - ctx->flags &= ~NFS_MOUNT_NORDIRPLUS; + } else if (!param->string) { + ctx->flags &= ~(NFS_MOUNT_NORDIRPLUS | NFS_MOUNT_FORCE_RDIRPLUS); + } else { + switch (lookup_constant(nfs_rdirplus_tokens, param->string, -1)) { + case Opt_rdirplus_none: + ctx->flags &= ~NFS_MOUNT_FORCE_RDIRPLUS; + ctx->flags |= NFS_MOUNT_NORDIRPLUS; + break; + case Opt_rdirplus_force: + ctx->flags &= ~NFS_MOUNT_NORDIRPLUS; + ctx->flags |= NFS_MOUNT_FORCE_RDIRPLUS; + break; + default: + goto out_invalid_value; + } + } break; case Opt_sharecache: if (result.negated) diff --git a/fs/nfs/super.c b/fs/nfs/super.c index aeb715b4a690..96de658a7886 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -454,6 +454,7 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, { NFS_MOUNT_NONLM, ",nolock", "" }, { NFS_MOUNT_NOACL, ",noacl", "" }, { NFS_MOUNT_NORDIRPLUS, ",nordirplus", "" }, + { NFS_MOUNT_FORCE_RDIRPLUS, ",rdirplus=force", "" }, { NFS_MOUNT_UNSHARED, ",nosharecache", "" }, { NFS_MOUNT_NORESVPORT, ",noresvport", "" }, { 0, NULL, NULL } diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 1711eefcf24f..b83d16a42afc 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -167,6 +167,7 @@ struct nfs_server { #define NFS_MOUNT_TRUNK_DISCOVERY 0x04000000 #define NFS_MOUNT_SHUTDOWN 0x08000000 #define NFS_MOUNT_NO_ALIGNWRITE 0x10000000 +#define NFS_MOUNT_FORCE_RDIRPLUS 0x20000000 unsigned int fattr_valid; /* Valid attributes */ unsigned int caps; /* server capabilities */ -- cgit v1.2.3 From df210d9b0951d714c1668c511ca5c8ff38cf6916 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Fri, 7 Feb 2025 15:42:24 -0500 Subject: sunrpc: Add a sysfs file for adding a new xprt Writing to this file will clone the 'main' xprt of an xprt_switch and add it to be used as an additional connection. -- Reviewed-by: Benjamin Coddington Signed-off-by: Anna Schumaker v3: Replace call to xprt_iter_get_xprt() with xprt_iter_get_next() Link: https://lore.kernel.org/r/20250207204225.594002-5-anna@kernel.org Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprtmultipath.h | 1 + net/sunrpc/sysfs.c | 54 ++++++++++++++++++++++++++++++++++++ net/sunrpc/xprtmultipath.c | 21 ++++++++++++++ 3 files changed, 76 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xprtmultipath.h b/include/linux/sunrpc/xprtmultipath.h index e411368cdacf..e4db5022fe92 100644 --- a/include/linux/sunrpc/xprtmultipath.h +++ b/include/linux/sunrpc/xprtmultipath.h @@ -56,6 +56,7 @@ extern void rpc_xprt_switch_add_xprt(struct rpc_xprt_switch *xps, struct rpc_xprt *xprt); extern void rpc_xprt_switch_remove_xprt(struct rpc_xprt_switch *xps, struct rpc_xprt *xprt, bool offline); +extern struct rpc_xprt *rpc_xprt_switch_get_main_xprt(struct rpc_xprt_switch *xps); extern void xprt_iter_init(struct rpc_xprt_iter *xpi, struct rpc_xprt_switch *xps); diff --git a/net/sunrpc/sysfs.c b/net/sunrpc/sysfs.c index dcd579eab50a..8fd1975f2fe8 100644 --- a/net/sunrpc/sysfs.c +++ b/net/sunrpc/sysfs.c @@ -305,6 +305,55 @@ static ssize_t rpc_sysfs_xprt_switch_info_show(struct kobject *kobj, return ret; } +static ssize_t rpc_sysfs_xprt_switch_add_xprt_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + return sprintf(buf, "# add one xprt to this xprt_switch\n"); +} + +static ssize_t rpc_sysfs_xprt_switch_add_xprt_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t count) +{ + struct rpc_xprt_switch *xprt_switch = + rpc_sysfs_xprt_switch_kobj_get_xprt(kobj); + struct xprt_create xprt_create_args; + struct rpc_xprt *xprt, *new; + + if (!xprt_switch) + return 0; + + xprt = rpc_xprt_switch_get_main_xprt(xprt_switch); + if (!xprt) + goto out; + + xprt_create_args.ident = xprt->xprt_class->ident; + xprt_create_args.net = xprt->xprt_net; + xprt_create_args.dstaddr = (struct sockaddr *)&xprt->addr; + xprt_create_args.addrlen = xprt->addrlen; + xprt_create_args.servername = xprt->servername; + xprt_create_args.bc_xprt = xprt->bc_xprt; + xprt_create_args.xprtsec = xprt->xprtsec; + xprt_create_args.connect_timeout = xprt->connect_timeout; + xprt_create_args.reconnect_timeout = xprt->max_reconnect_timeout; + + new = xprt_create_transport(&xprt_create_args); + if (IS_ERR_OR_NULL(new)) { + count = PTR_ERR(new); + goto out_put_xprt; + } + + rpc_xprt_switch_add_xprt(xprt_switch, new); + xprt_put(new); + +out_put_xprt: + xprt_put(xprt); +out: + xprt_switch_put(xprt_switch); + return count; +} + static ssize_t rpc_sysfs_xprt_dstaddr_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) @@ -523,8 +572,13 @@ ATTRIBUTE_GROUPS(rpc_sysfs_xprt); static struct kobj_attribute rpc_sysfs_xprt_switch_info = __ATTR(xprt_switch_info, 0444, rpc_sysfs_xprt_switch_info_show, NULL); +static struct kobj_attribute rpc_sysfs_xprt_switch_add_xprt = + __ATTR(add_xprt, 0644, rpc_sysfs_xprt_switch_add_xprt_show, + rpc_sysfs_xprt_switch_add_xprt_store); + static struct attribute *rpc_sysfs_xprt_switch_attrs[] = { &rpc_sysfs_xprt_switch_info.attr, + &rpc_sysfs_xprt_switch_add_xprt.attr, NULL, }; ATTRIBUTE_GROUPS(rpc_sysfs_xprt_switch); diff --git a/net/sunrpc/xprtmultipath.c b/net/sunrpc/xprtmultipath.c index 7e98d4dd9f10..4c5e08b0aa64 100644 --- a/net/sunrpc/xprtmultipath.c +++ b/net/sunrpc/xprtmultipath.c @@ -92,6 +92,27 @@ void rpc_xprt_switch_remove_xprt(struct rpc_xprt_switch *xps, xprt_put(xprt); } +/** + * rpc_xprt_switch_get_main_xprt - Get the 'main' xprt for an xprt switch. + * @xps: pointer to struct rpc_xprt_switch. + */ +struct rpc_xprt *rpc_xprt_switch_get_main_xprt(struct rpc_xprt_switch *xps) +{ + struct rpc_xprt_iter xpi; + struct rpc_xprt *xprt; + + xprt_iter_init_listall(&xpi, xps); + + xprt = xprt_iter_get_next(&xpi); + while (xprt && !xprt->main) { + xprt_put(xprt); + xprt = xprt_iter_get_next(&xpi); + } + + xprt_iter_destroy(&xpi); + return xprt; +} + static DEFINE_IDA(rpc_xprtswitch_ids); void xprt_multipath_cleanup_ids(void) -- cgit v1.2.3 From 487fae09d7e266a58a13784983cc1ef6a2076526 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 20 Mar 2025 11:45:06 -0400 Subject: NFS: Add a mount option to make ENETUNREACH errors fatal If the NFS client was initially created in a container, and that container is torn down, there is usually no possibity to go back and destroy any NFS clients that are hung because their virtual network devices have been unlinked. Add a flag that tells the NFS client that in these circumstances, it should treat ENETDOWN and ENETUNREACH errors as fatal to the NFS client. The option defaults to being on when the mount happens from inside a net namespace that is not "init_net". Signed-off-by: Trond Myklebust Reviewed-by: Jeff Layton Tested-by: Jeff Layton Acked-by: Chuck Lever --- fs/nfs/fs_context.c | 39 +++++++++++++++++++++++++++++++++++++++ fs/nfs/super.c | 3 +++ include/linux/nfs_fs_sb.h | 1 + 3 files changed, 43 insertions(+) (limited to 'include/linux') diff --git a/fs/nfs/fs_context.c b/fs/nfs/fs_context.c index 1cabba1231d6..13f71ca8c974 100644 --- a/fs/nfs/fs_context.c +++ b/fs/nfs/fs_context.c @@ -50,6 +50,7 @@ enum nfs_param { Opt_clientaddr, Opt_cto, Opt_alignwrite, + Opt_fatal_neterrors, Opt_fg, Opt_fscache, Opt_fscache_flag, @@ -97,6 +98,20 @@ enum nfs_param { Opt_xprtsec, }; +enum { + Opt_fatal_neterrors_default, + Opt_fatal_neterrors_enetunreach, + Opt_fatal_neterrors_none, +}; + +static const struct constant_table nfs_param_enums_fatal_neterrors[] = { + { "default", Opt_fatal_neterrors_default }, + { "ENETDOWN:ENETUNREACH", Opt_fatal_neterrors_enetunreach }, + { "ENETUNREACH:ENETDOWN", Opt_fatal_neterrors_enetunreach }, + { "none", Opt_fatal_neterrors_none }, + {} +}; + enum { Opt_local_lock_all, Opt_local_lock_flock, @@ -153,6 +168,8 @@ static const struct fs_parameter_spec nfs_fs_parameters[] = { fsparam_string("clientaddr", Opt_clientaddr), fsparam_flag_no("cto", Opt_cto), fsparam_flag_no("alignwrite", Opt_alignwrite), + fsparam_enum("fatal_neterrors", Opt_fatal_neterrors, + nfs_param_enums_fatal_neterrors), fsparam_flag ("fg", Opt_fg), fsparam_flag_no("fsc", Opt_fscache_flag), fsparam_string("fsc", Opt_fscache), @@ -896,6 +913,25 @@ static int nfs_fs_context_parse_param(struct fs_context *fc, goto out_of_bounds; ctx->nfs_server.max_connect = result.uint_32; break; + case Opt_fatal_neterrors: + trace_nfs_mount_assign(param->key, param->string); + switch (result.uint_32) { + case Opt_fatal_neterrors_default: + if (fc->net_ns != &init_net) + ctx->flags |= NFS_MOUNT_NETUNREACH_FATAL; + else + ctx->flags &= ~NFS_MOUNT_NETUNREACH_FATAL; + break; + case Opt_fatal_neterrors_enetunreach: + ctx->flags |= NFS_MOUNT_NETUNREACH_FATAL; + break; + case Opt_fatal_neterrors_none: + ctx->flags &= ~NFS_MOUNT_NETUNREACH_FATAL; + break; + default: + goto out_invalid_value; + } + break; case Opt_lookupcache: trace_nfs_mount_assign(param->key, param->string); switch (result.uint_32) { @@ -1675,6 +1711,9 @@ static int nfs_init_fs_context(struct fs_context *fc) ctx->xprtsec.cert_serial = TLS_NO_CERT; ctx->xprtsec.privkey_serial = TLS_NO_PRIVKEY; + if (fc->net_ns != &init_net) + ctx->flags |= NFS_MOUNT_NETUNREACH_FATAL; + fc->s_iflags |= SB_I_STABLE_WRITES; } fc->fs_private = ctx; diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 96de658a7886..9eea9e62afc9 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -457,6 +457,9 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, { NFS_MOUNT_FORCE_RDIRPLUS, ",rdirplus=force", "" }, { NFS_MOUNT_UNSHARED, ",nosharecache", "" }, { NFS_MOUNT_NORESVPORT, ",noresvport", "" }, + { NFS_MOUNT_NETUNREACH_FATAL, + ",fatal_neterrors=ENETDOWN:ENETUNREACH", + ",fatal_neterrors=none" }, { 0, NULL, NULL } }; const struct proc_nfs_info *nfs_infop; diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index b83d16a42afc..a6ce8590eaaf 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -168,6 +168,7 @@ struct nfs_server { #define NFS_MOUNT_SHUTDOWN 0x08000000 #define NFS_MOUNT_NO_ALIGNWRITE 0x10000000 #define NFS_MOUNT_FORCE_RDIRPLUS 0x20000000 +#define NFS_MOUNT_NETUNREACH_FATAL 0x40000000 unsigned int fattr_valid; /* Valid attributes */ unsigned int caps; /* server capabilities */ -- cgit v1.2.3 From 9827144bfb2b1baf1e78600da73dcc9e92e28415 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 20 Mar 2025 10:50:26 -0400 Subject: NFS: Treat ENETUNREACH errors as fatal in containers Propagate the NFS_MOUNT_NETUNREACH_FATAL flag to work with the generic NFS client. If the flag is set, the client will receive ENETDOWN and ENETUNREACH errors from the RPC layer, and is expected to treat them as being fatal. Signed-off-by: Trond Myklebust Reviewed-by: Jeff Layton Tested-by: Jeff Layton Acked-by: Chuck Lever --- fs/nfs/client.c | 5 +++++ fs/nfs/nfs4client.c | 2 ++ fs/nfs/nfs4proc.c | 3 +++ include/linux/nfs_fs_sb.h | 1 + include/linux/sunrpc/clnt.h | 5 ++++- include/linux/sunrpc/sched.h | 1 + include/trace/events/sunrpc.h | 1 + net/sunrpc/clnt.c | 30 ++++++++++++++++++++++-------- 8 files changed, 39 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 3b0918ade53c..02c916a55020 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -546,6 +546,8 @@ int nfs_create_rpc_client(struct nfs_client *clp, args.flags |= RPC_CLNT_CREATE_NOPING; if (test_bit(NFS_CS_REUSEPORT, &clp->cl_flags)) args.flags |= RPC_CLNT_CREATE_REUSEPORT; + if (test_bit(NFS_CS_NETUNREACH_FATAL, &clp->cl_flags)) + args.flags |= RPC_CLNT_CREATE_NETUNREACH_FATAL; if (!IS_ERR(clp->cl_rpcclient)) return 0; @@ -709,6 +711,9 @@ static int nfs_init_server(struct nfs_server *server, if (ctx->flags & NFS_MOUNT_NORESVPORT) set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags); + if (ctx->flags & NFS_MOUNT_NETUNREACH_FATAL) + __set_bit(NFS_CS_NETUNREACH_FATAL, &cl_init.init_flags); + /* Allocate or find a client reference we can use */ clp = nfs_get_client(&cl_init); if (IS_ERR(clp)) diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 83378f69b35e..8f7d40844cdc 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -233,6 +233,8 @@ struct nfs_client *nfs4_alloc_client(const struct nfs_client_initdata *cl_init) __set_bit(NFS_CS_NO_RETRANS_TIMEOUT, &clp->cl_flags); if (test_bit(NFS_CS_PNFS, &cl_init->init_flags)) __set_bit(NFS_CS_PNFS, &clp->cl_flags); + if (test_bit(NFS_CS_NETUNREACH_FATAL, &cl_init->init_flags)) + __set_bit(NFS_CS_NETUNREACH_FATAL, &clp->cl_flags); /* * Set up the connection to the server before we add add to the * global list. diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 24406fc1352d..889511650ceb 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -195,6 +195,9 @@ static int nfs4_map_errors(int err) return -EBUSY; case -NFS4ERR_NOT_SAME: return -ENOTSYNC; + case -ENETDOWN: + case -ENETUNREACH: + break; default: dprintk("%s could not handle NFSv4 error %d\n", __func__, -err); diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index a6ce8590eaaf..71319637a84e 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -50,6 +50,7 @@ struct nfs_client { #define NFS_CS_DS 7 /* - Server is a DS */ #define NFS_CS_REUSEPORT 8 /* - reuse src port on reconnect */ #define NFS_CS_PNFS 9 /* - Server used for pnfs */ +#define NFS_CS_NETUNREACH_FATAL 10 /* - ENETUNREACH errors are fatal */ struct sockaddr_storage cl_addr; /* server identifier */ size_t cl_addrlen; char * cl_hostname; /* hostname of server */ diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index fec976e58174..f8b406b0a1af 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -64,7 +64,9 @@ struct rpc_clnt { cl_noretranstimeo: 1,/* No retransmit timeouts */ cl_autobind : 1,/* use getport() */ cl_chatty : 1,/* be verbose */ - cl_shutdown : 1;/* rpc immediate -EIO */ + cl_shutdown : 1,/* rpc immediate -EIO */ + cl_netunreach_fatal : 1; + /* Treat ENETUNREACH errors as fatal */ struct xprtsec_parms cl_xprtsec; /* transport security policy */ struct rpc_rtt * cl_rtt; /* RTO estimator data */ @@ -175,6 +177,7 @@ struct rpc_add_xprt_test { #define RPC_CLNT_CREATE_SOFTERR (1UL << 10) #define RPC_CLNT_CREATE_REUSEPORT (1UL << 11) #define RPC_CLNT_CREATE_CONNECTED (1UL << 12) +#define RPC_CLNT_CREATE_NETUNREACH_FATAL (1UL << 13) struct rpc_clnt *rpc_create(struct rpc_create_args *args); struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *, diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index eac57914dcf3..ccba79ebf893 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -134,6 +134,7 @@ struct rpc_task_setup { #define RPC_TASK_MOVEABLE 0x0004 /* nfs4.1+ rpc tasks */ #define RPC_TASK_NULLCREDS 0x0010 /* Use AUTH_NULL credential */ #define RPC_CALL_MAJORSEEN 0x0020 /* major timeout seen */ +#define RPC_TASK_NETUNREACH_FATAL 0x0040 /* ENETUNREACH is fatal */ #define RPC_TASK_DYNAMIC 0x0080 /* task was kmalloc'ed */ #define RPC_TASK_NO_ROUND_ROBIN 0x0100 /* send requests on "main" xprt */ #define RPC_TASK_SOFT 0x0200 /* Use soft timeouts */ diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 851841336ee6..5d331383047b 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -343,6 +343,7 @@ TRACE_EVENT(rpc_request, { RPC_TASK_MOVEABLE, "MOVEABLE" }, \ { RPC_TASK_NULLCREDS, "NULLCREDS" }, \ { RPC_CALL_MAJORSEEN, "MAJORSEEN" }, \ + { RPC_TASK_NETUNREACH_FATAL, "NETUNREACH_FATAL"}, \ { RPC_TASK_DYNAMIC, "DYNAMIC" }, \ { RPC_TASK_NO_ROUND_ROBIN, "NO_ROUND_ROBIN" }, \ { RPC_TASK_SOFT, "SOFT" }, \ diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 2fe88ea79a70..1cfaf93ceec1 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -512,6 +512,8 @@ static struct rpc_clnt *rpc_create_xprt(struct rpc_create_args *args, clnt->cl_discrtry = 1; if (!(args->flags & RPC_CLNT_CREATE_QUIET)) clnt->cl_chatty = 1; + if (args->flags & RPC_CLNT_CREATE_NETUNREACH_FATAL) + clnt->cl_netunreach_fatal = 1; return clnt; } @@ -662,6 +664,7 @@ static struct rpc_clnt *__rpc_clone_client(struct rpc_create_args *args, new->cl_noretranstimeo = clnt->cl_noretranstimeo; new->cl_discrtry = clnt->cl_discrtry; new->cl_chatty = clnt->cl_chatty; + new->cl_netunreach_fatal = clnt->cl_netunreach_fatal; new->cl_principal = clnt->cl_principal; new->cl_max_connect = clnt->cl_max_connect; return new; @@ -1195,6 +1198,8 @@ void rpc_task_set_client(struct rpc_task *task, struct rpc_clnt *clnt) task->tk_flags |= RPC_TASK_TIMEOUT; if (clnt->cl_noretranstimeo) task->tk_flags |= RPC_TASK_NO_RETRANS_TIMEOUT; + if (clnt->cl_netunreach_fatal) + task->tk_flags |= RPC_TASK_NETUNREACH_FATAL; atomic_inc(&clnt->cl_task_count); } @@ -2102,14 +2107,17 @@ call_bind_status(struct rpc_task *task) case -EPROTONOSUPPORT: trace_rpcb_bind_version_err(task); goto retry_timeout; + case -ENETDOWN: + case -ENETUNREACH: + if (task->tk_flags & RPC_TASK_NETUNREACH_FATAL) + break; + fallthrough; case -ECONNREFUSED: /* connection problems */ case -ECONNRESET: case -ECONNABORTED: case -ENOTCONN: case -EHOSTDOWN: - case -ENETDOWN: case -EHOSTUNREACH: - case -ENETUNREACH: case -EPIPE: trace_rpcb_unreachable_err(task); if (!RPC_IS_SOFTCONN(task)) { @@ -2191,19 +2199,22 @@ call_connect_status(struct rpc_task *task) task->tk_status = 0; switch (status) { + case -ENETDOWN: + case -ENETUNREACH: + if (task->tk_flags & RPC_TASK_NETUNREACH_FATAL) + break; + fallthrough; case -ECONNREFUSED: case -ECONNRESET: /* A positive refusal suggests a rebind is needed. */ - if (RPC_IS_SOFTCONN(task)) - break; if (clnt->cl_autobind) { rpc_force_rebind(clnt); + if (RPC_IS_SOFTCONN(task)) + break; goto out_retry; } fallthrough; case -ECONNABORTED: - case -ENETDOWN: - case -ENETUNREACH: case -EHOSTUNREACH: case -EPIPE: case -EPROTO: @@ -2455,10 +2466,13 @@ call_status(struct rpc_task *task) trace_rpc_call_status(task); task->tk_status = 0; switch(status) { - case -EHOSTDOWN: case -ENETDOWN: - case -EHOSTUNREACH: case -ENETUNREACH: + if (task->tk_flags & RPC_TASK_NETUNREACH_FATAL) + goto out_exit; + fallthrough; + case -EHOSTDOWN: + case -EHOSTUNREACH: case -EPERM: if (RPC_IS_SOFTCONN(task)) goto out_exit; -- cgit v1.2.3 From 8c9f0df7a82a9f3bbdab4c796d302b20a33c1cc6 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 20 Mar 2025 08:04:35 -0400 Subject: pNFS/flexfiles: Treat ENETUNREACH errors as fatal in containers Propagate the NFS_MOUNT_NETUNREACH_FATAL flag to work with the pNFS flexfiles client. In these circumstances, the client needs to treat the ENETDOWN and ENETUNREACH errors as fatal, and should abandon the attempted I/O. Signed-off-by: Trond Myklebust Reviewed-by: Jeff Layton Tested-by: Jeff Layton Acked-by: Chuck Lever --- fs/nfs/flexfilelayout/flexfilelayout.c | 23 +++++++++++++++++++++-- fs/nfs/nfs3client.c | 2 ++ fs/nfs/nfs4client.c | 5 +++++ include/linux/nfs4.h | 1 + 4 files changed, 29 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 98b45b636be3..f89fdba7289d 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -1154,10 +1154,14 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task, rpc_wake_up(&tbl->slot_tbl_waitq); goto reset; /* RPC connection errors */ + case -ENETDOWN: + case -ENETUNREACH: + if (test_bit(NFS_CS_NETUNREACH_FATAL, &clp->cl_flags)) + return -NFS4ERR_FATAL_IOERROR; + fallthrough; case -ECONNREFUSED: case -EHOSTDOWN: case -EHOSTUNREACH: - case -ENETUNREACH: case -EIO: case -ETIMEDOUT: case -EPIPE: @@ -1183,6 +1187,7 @@ reset: /* Retry all errors through either pNFS or MDS except for -EJUKEBOX */ static int ff_layout_async_handle_error_v3(struct rpc_task *task, + struct nfs_client *clp, struct pnfs_layout_segment *lseg, u32 idx) { @@ -1200,6 +1205,11 @@ static int ff_layout_async_handle_error_v3(struct rpc_task *task, case -EJUKEBOX: nfs_inc_stats(lseg->pls_layout->plh_inode, NFSIOS_DELAY); goto out_retry; + case -ENETDOWN: + case -ENETUNREACH: + if (test_bit(NFS_CS_NETUNREACH_FATAL, &clp->cl_flags)) + return -NFS4ERR_FATAL_IOERROR; + fallthrough; default: dprintk("%s DS connection error %d\n", __func__, task->tk_status); @@ -1234,7 +1244,7 @@ static int ff_layout_async_handle_error(struct rpc_task *task, switch (vers) { case 3: - return ff_layout_async_handle_error_v3(task, lseg, idx); + return ff_layout_async_handle_error_v3(task, clp, lseg, idx); case 4: return ff_layout_async_handle_error_v4(task, state, clp, lseg, idx); @@ -1337,6 +1347,9 @@ static int ff_layout_read_done_cb(struct rpc_task *task, return task->tk_status; case -EAGAIN: goto out_eagain; + case -NFS4ERR_FATAL_IOERROR: + task->tk_status = -EIO; + return 0; } return 0; @@ -1507,6 +1520,9 @@ static int ff_layout_write_done_cb(struct rpc_task *task, return task->tk_status; case -EAGAIN: return -EAGAIN; + case -NFS4ERR_FATAL_IOERROR: + task->tk_status = -EIO; + return 0; } if (hdr->res.verf->committed == NFS_FILE_SYNC || @@ -1551,6 +1567,9 @@ static int ff_layout_commit_done_cb(struct rpc_task *task, case -EAGAIN: rpc_restart_call_prepare(task); return -EAGAIN; + case -NFS4ERR_FATAL_IOERROR: + task->tk_status = -EIO; + return 0; } ff_layout_set_layoutcommit(data->inode, data->lseg, data->lwb); diff --git a/fs/nfs/nfs3client.c b/fs/nfs/nfs3client.c index b0c8a39c2bbd..0d7310c1ee0c 100644 --- a/fs/nfs/nfs3client.c +++ b/fs/nfs/nfs3client.c @@ -120,6 +120,8 @@ struct nfs_client *nfs3_set_ds_client(struct nfs_server *mds_srv, if (mds_srv->flags & NFS_MOUNT_NORESVPORT) __set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags); + if (test_bit(NFS_CS_NETUNREACH_FATAL, &mds_clp->cl_flags)) + __set_bit(NFS_CS_NETUNREACH_FATAL, &cl_init.init_flags); __set_bit(NFS_CS_DS, &cl_init.init_flags); diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 8f7d40844cdc..162c85a83a14 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -939,6 +939,9 @@ static int nfs4_set_client(struct nfs_server *server, __set_bit(NFS_CS_TSM_POSSIBLE, &cl_init.init_flags); server->port = rpc_get_port((struct sockaddr *)addr); + if (server->flags & NFS_MOUNT_NETUNREACH_FATAL) + __set_bit(NFS_CS_NETUNREACH_FATAL, &cl_init.init_flags); + /* Allocate or find a client reference we can use */ clp = nfs_get_client(&cl_init); if (IS_ERR(clp)) @@ -1013,6 +1016,8 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_server *mds_srv, if (mds_srv->flags & NFS_MOUNT_NORESVPORT) __set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags); + if (test_bit(NFS_CS_NETUNREACH_FATAL, &mds_clp->cl_flags)) + __set_bit(NFS_CS_NETUNREACH_FATAL, &cl_init.init_flags); __set_bit(NFS_CS_PNFS, &cl_init.init_flags); cl_init.max_connect = NFS_MAX_TRANSPORTS; diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index 5fa60fe441b5..d8cad844870a 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -300,6 +300,7 @@ enum nfsstat4 { /* error codes for internal client use */ #define NFS4ERR_RESET_TO_MDS 12001 #define NFS4ERR_RESET_TO_PNFS 12002 +#define NFS4ERR_FATAL_IOERROR 12003 static inline bool seqid_mutating_err(u32 err) { -- cgit v1.2.3