diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2022-08-10 14:04:32 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2022-08-10 14:04:32 -0700 |
commit | aeb6e6ac18c73ec287b3b1e2c913520699358c13 (patch) | |
tree | 526c24532c08e0757be8f4a41f519bd27bfd7b8b /fs | |
parent | f41445645ab5d172e6090d00c332c335d8dba337 (diff) | |
parent | 3fa5cbdc44de190f2c5605ba7db015ae0d26f668 (diff) | |
download | lwn-aeb6e6ac18c73ec287b3b1e2c913520699358c13.tar.gz lwn-aeb6e6ac18c73ec287b3b1e2c913520699358c13.zip |
Merge tag 'nfs-for-5.20-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs
Pull NFS client updates from Trond Myklebust:
"Highlights include:
Stable fixes:
- pNFS/flexfiles: Fix infinite looping when the RDMA connection
errors out
Bugfixes:
- NFS: fix port value parsing
- SUNRPC: Reinitialise the backchannel request buffers before reuse
- SUNRPC: fix expiry of auth creds
- NFSv4: Fix races in the legacy idmapper upcall
- NFS: O_DIRECT fixes from Jeff Layton
- NFSv4.1: Fix OP_SEQUENCE error handling
- SUNRPC: Fix an RPC/RDMA performance regression
- NFS: Fix case insensitive renames
- NFSv4/pnfs: Fix a use-after-free bug in open
- NFSv4.1: RECLAIM_COMPLETE must handle EACCES
Features:
- NFSv4.1: session trunking enhancements
- NFSv4.2: READ_PLUS performance optimisations
- NFS: relax the rules for rsize/wsize mount options
- NFS: don't unhash dentry during unlink/rename
- SUNRPC: Fail faster on bad verifier
- NFS/SUNRPC: Various tracing improvements"
* tag 'nfs-for-5.20-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs: (46 commits)
NFS: Improve readpage/writepage tracing
NFS: Improve O_DIRECT tracing
NFS: Improve write error tracing
NFS: don't unhash dentry during unlink/rename
NFSv4/pnfs: Fix a use-after-free bug in open
NFS: nfs_async_write_reschedule_io must not recurse into the writeback code
SUNRPC: Don't reuse bvec on retransmission of the request
SUNRPC: Reinitialise the backchannel request buffers before reuse
NFSv4.1: RECLAIM_COMPLETE must handle EACCES
NFSv4.1 probe offline transports for trunking on session creation
SUNRPC create a function that probes only offline transports
SUNRPC export xprt_iter_rewind function
SUNRPC restructure rpc_clnt_setup_test_and_add_xprt
NFSv4.1 remove xprt from xprt_switch if session trunking test fails
SUNRPC create an rpc function that allows xprt removal from rpc_clnt
SUNRPC enable back offline transports in trunking discovery
SUNRPC create an iterator to list only OFFLINE xprts
NFSv4.1 offline trunkable transports on DESTROY_SESSION
SUNRPC add function to offline remove trunkable transports
SUNRPC expose functions for offline remote xprt functionality
...
Diffstat (limited to 'fs')
-rw-r--r-- | fs/nfs/blocklayout/dev.c | 42 | ||||
-rw-r--r-- | fs/nfs/client.c | 13 | ||||
-rw-r--r-- | fs/nfs/dir.c | 80 | ||||
-rw-r--r-- | fs/nfs/direct.c | 50 | ||||
-rw-r--r-- | fs/nfs/file.c | 2 | ||||
-rw-r--r-- | fs/nfs/filelayout/filelayout.c | 2 | ||||
-rw-r--r-- | fs/nfs/flexfilelayout/flexfilelayout.c | 4 | ||||
-rw-r--r-- | fs/nfs/flexfilelayout/flexfilelayoutdev.c | 6 | ||||
-rw-r--r-- | fs/nfs/fs_context.c | 26 | ||||
-rw-r--r-- | fs/nfs/internal.h | 51 | ||||
-rw-r--r-- | fs/nfs/nfs3client.c | 1 | ||||
-rw-r--r-- | fs/nfs/nfs42xdr.c | 170 | ||||
-rw-r--r-- | fs/nfs/nfs4client.c | 4 | ||||
-rw-r--r-- | fs/nfs/nfs4idmap.c | 46 | ||||
-rw-r--r-- | fs/nfs/nfs4proc.c | 32 | ||||
-rw-r--r-- | fs/nfs/nfstrace.h | 215 | ||||
-rw-r--r-- | fs/nfs/write.c | 58 |
17 files changed, 513 insertions, 289 deletions
diff --git a/fs/nfs/blocklayout/dev.c b/fs/nfs/blocklayout/dev.c index 5e56da748b2a..fea5f8821da5 100644 --- a/fs/nfs/blocklayout/dev.c +++ b/fs/nfs/blocklayout/dev.c @@ -301,18 +301,14 @@ bl_validate_designator(struct pnfs_block_volume *v) } } -/* - * Try to open the udev path for the WWN. At least on Debian the udev - * by-id path will always point to the dm-multipath device if one exists. - */ static struct block_device * -bl_open_udev_path(struct pnfs_block_volume *v) +bl_open_path(struct pnfs_block_volume *v, const char *prefix) { struct block_device *bdev; const char *devname; - devname = kasprintf(GFP_KERNEL, "/dev/disk/by-id/wwn-0x%*phN", - v->scsi.designator_len, v->scsi.designator); + devname = kasprintf(GFP_KERNEL, "/dev/disk/by-id/%s%*phN", + prefix, v->scsi.designator_len, v->scsi.designator); if (!devname) return ERR_PTR(-ENOMEM); @@ -326,28 +322,6 @@ bl_open_udev_path(struct pnfs_block_volume *v) return bdev; } -/* - * Try to open the RH/Fedora specific dm-mpath udev path for this WWN, as the - * wwn- links will only point to the first discovered SCSI device there. - */ -static struct block_device * -bl_open_dm_mpath_udev_path(struct pnfs_block_volume *v) -{ - struct block_device *bdev; - const char *devname; - - devname = kasprintf(GFP_KERNEL, - "/dev/disk/by-id/dm-uuid-mpath-%d%*phN", - v->scsi.designator_type, - v->scsi.designator_len, v->scsi.designator); - if (!devname) - return ERR_PTR(-ENOMEM); - - bdev = blkdev_get_by_path(devname, FMODE_READ | FMODE_WRITE, NULL); - kfree(devname); - return bdev; -} - static int bl_parse_scsi(struct nfs_server *server, struct pnfs_block_dev *d, struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask) @@ -360,9 +334,15 @@ bl_parse_scsi(struct nfs_server *server, struct pnfs_block_dev *d, if (!bl_validate_designator(v)) return -EINVAL; - bdev = bl_open_dm_mpath_udev_path(v); + /* + * Try to open the RH/Fedora specific dm-mpath udev path first, as the + * wwn- links will only point to the first discovered SCSI device there. + * On other distributions like Debian, the default SCSI by-id path will + * point to the dm-multipath device if one exists. + */ + bdev = bl_open_path(v, "dm-uuid-mpath-0x"); if (IS_ERR(bdev)) - bdev = bl_open_udev_path(v); + bdev = bl_open_path(v, "wwn-0x"); if (IS_ERR(bdev)) return PTR_ERR(bdev); d->bdev = bdev; diff --git a/fs/nfs/client.c b/fs/nfs/client.c index e828504cc396..da8da5cdbbc1 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -708,9 +708,9 @@ static int nfs_init_server(struct nfs_server *server, } if (ctx->rsize) - server->rsize = nfs_block_size(ctx->rsize, NULL); + server->rsize = nfs_io_size(ctx->rsize, clp->cl_proto); if (ctx->wsize) - server->wsize = nfs_block_size(ctx->wsize, NULL); + server->wsize = nfs_io_size(ctx->wsize, clp->cl_proto); server->acregmin = ctx->acregmin * HZ; server->acregmax = ctx->acregmax * HZ; @@ -755,18 +755,19 @@ error: static void nfs_server_set_fsinfo(struct nfs_server *server, struct nfs_fsinfo *fsinfo) { + struct nfs_client *clp = server->nfs_client; unsigned long max_rpc_payload, raw_max_rpc_payload; /* Work out a lot of parameters */ if (server->rsize == 0) - server->rsize = nfs_block_size(fsinfo->rtpref, NULL); + server->rsize = nfs_io_size(fsinfo->rtpref, clp->cl_proto); if (server->wsize == 0) - server->wsize = nfs_block_size(fsinfo->wtpref, NULL); + server->wsize = nfs_io_size(fsinfo->wtpref, clp->cl_proto); if (fsinfo->rtmax >= 512 && server->rsize > fsinfo->rtmax) - server->rsize = nfs_block_size(fsinfo->rtmax, NULL); + server->rsize = nfs_io_size(fsinfo->rtmax, clp->cl_proto); if (fsinfo->wtmax >= 512 && server->wsize > fsinfo->wtmax) - server->wsize = nfs_block_size(fsinfo->wtmax, NULL); + server->wsize = nfs_io_size(fsinfo->wtmax, clp->cl_proto); raw_max_rpc_payload = rpc_max_payload(server->client); max_rpc_payload = nfs_block_size(raw_max_rpc_payload, NULL); diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 0c4e8dd6aa96..dbab3caa15ed 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1084,7 +1084,7 @@ static void nfs_do_filldir(struct nfs_readdir_descriptor *desc, struct nfs_cache_array *array; unsigned int i; - array = kmap(desc->page); + array = kmap_local_page(desc->page); for (i = desc->cache_entry_index; i < array->size; i++) { struct nfs_cache_array_entry *ent; @@ -1110,7 +1110,7 @@ static void nfs_do_filldir(struct nfs_readdir_descriptor *desc, if (array->page_is_eof) desc->eof = !desc->eob; - kunmap(desc->page); + kunmap_local(array); dfprintk(DIRCACHE, "NFS: nfs_do_filldir() filling ended @ cookie %llu\n", (unsigned long long)desc->dir_cookie); } @@ -1739,6 +1739,10 @@ nfs_do_lookup_revalidate(struct inode *dir, struct dentry *dentry, goto out_bad; } + if ((flags & LOOKUP_RENAME_TARGET) && d_count(dentry) < 2 && + nfs_server_capable(dir, NFS_CAP_CASE_INSENSITIVE)) + goto out_bad; + if (nfs_verifier_is_delegated(dentry)) return nfs_lookup_revalidate_delegated(dir, dentry, inode); @@ -1778,6 +1782,8 @@ __nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags, int ret; if (flags & LOOKUP_RCU) { + if (dentry->d_fsdata == NFS_FSDATA_BLOCKED) + return -ECHILD; parent = READ_ONCE(dentry->d_parent); dir = d_inode_rcu(parent); if (!dir) @@ -1786,6 +1792,9 @@ __nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags, if (parent != READ_ONCE(dentry->d_parent)) return -ECHILD; } else { + /* Wait for unlink to complete */ + wait_var_event(&dentry->d_fsdata, + dentry->d_fsdata != NFS_FSDATA_BLOCKED); parent = dget_parent(dentry); ret = reval(d_inode(parent), dentry, flags); dput(parent); @@ -2454,7 +2463,6 @@ out: int nfs_unlink(struct inode *dir, struct dentry *dentry) { int error; - int need_rehash = 0; dfprintk(VFS, "NFS: unlink(%s/%lu, %pd)\n", dir->i_sb->s_id, dir->i_ino, dentry); @@ -2469,15 +2477,25 @@ int nfs_unlink(struct inode *dir, struct dentry *dentry) error = nfs_sillyrename(dir, dentry); goto out; } - if (!d_unhashed(dentry)) { - __d_drop(dentry); - need_rehash = 1; - } + /* We must prevent any concurrent open until the unlink + * completes. ->d_revalidate will wait for ->d_fsdata + * to clear. We set it here to ensure no lookup succeeds until + * the unlink is complete on the server. + */ + error = -ETXTBSY; + if (WARN_ON(dentry->d_flags & DCACHE_NFSFS_RENAMED) || + WARN_ON(dentry->d_fsdata == NFS_FSDATA_BLOCKED)) + goto out; + if (dentry->d_fsdata) + /* old devname */ + kfree(dentry->d_fsdata); + dentry->d_fsdata = NFS_FSDATA_BLOCKED; + spin_unlock(&dentry->d_lock); error = nfs_safe_remove(dentry); nfs_dentry_remove_handle_error(dir, dentry, error); - if (need_rehash) - d_rehash(dentry); + dentry->d_fsdata = NULL; + wake_up_var(&dentry->d_fsdata); out: trace_nfs_unlink_exit(dir, dentry, error); return error; @@ -2584,6 +2602,15 @@ nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) } EXPORT_SYMBOL_GPL(nfs_link); +static void +nfs_unblock_rename(struct rpc_task *task, struct nfs_renamedata *data) +{ + struct dentry *new_dentry = data->new_dentry; + + new_dentry->d_fsdata = NULL; + wake_up_var(&new_dentry->d_fsdata); +} + /* * RENAME * FIXME: Some nfsds, like the Linux user space nfsd, may generate a @@ -2614,8 +2641,9 @@ int nfs_rename(struct user_namespace *mnt_userns, struct inode *old_dir, { struct inode *old_inode = d_inode(old_dentry); struct inode *new_inode = d_inode(new_dentry); - struct dentry *dentry = NULL, *rehash = NULL; + struct dentry *dentry = NULL; struct rpc_task *task; + bool must_unblock = false; int error = -EBUSY; if (flags) @@ -2633,18 +2661,27 @@ int nfs_rename(struct user_namespace *mnt_userns, struct inode *old_dir, * the new target. */ if (new_inode && !S_ISDIR(new_inode->i_mode)) { - /* - * To prevent any new references to the target during the - * rename, we unhash the dentry in advance. + /* We must prevent any concurrent open until the unlink + * completes. ->d_revalidate will wait for ->d_fsdata + * to clear. We set it here to ensure no lookup succeeds until + * the unlink is complete on the server. */ - if (!d_unhashed(new_dentry)) { - d_drop(new_dentry); - rehash = new_dentry; + error = -ETXTBSY; + if (WARN_ON(new_dentry->d_flags & DCACHE_NFSFS_RENAMED) || + WARN_ON(new_dentry->d_fsdata == NFS_FSDATA_BLOCKED)) + goto out; + if (new_dentry->d_fsdata) { + /* old devname */ + kfree(new_dentry->d_fsdata); + new_dentry->d_fsdata = NULL; } + spin_lock(&new_dentry->d_lock); if (d_count(new_dentry) > 2) { int err; + spin_unlock(&new_dentry->d_lock); + /* copy the target dentry's name */ dentry = d_alloc(new_dentry->d_parent, &new_dentry->d_name); @@ -2657,14 +2694,19 @@ int nfs_rename(struct user_namespace *mnt_userns, struct inode *old_dir, goto out; new_dentry = dentry; - rehash = NULL; new_inode = NULL; + } else { + new_dentry->d_fsdata = NFS_FSDATA_BLOCKED; + must_unblock = true; + spin_unlock(&new_dentry->d_lock); } + } if (S_ISREG(old_inode->i_mode)) nfs_sync_inode(old_inode); - task = nfs_async_rename(old_dir, new_dir, old_dentry, new_dentry, NULL); + task = nfs_async_rename(old_dir, new_dir, old_dentry, new_dentry, + must_unblock ? nfs_unblock_rename : NULL); if (IS_ERR(task)) { error = PTR_ERR(task); goto out; @@ -2688,8 +2730,6 @@ int nfs_rename(struct user_namespace *mnt_userns, struct inode *old_dir, spin_unlock(&old_inode->i_lock); } out: - if (rehash) - d_rehash(rehash); trace_nfs_rename_exit(old_dir, old_dentry, new_dir, new_dentry, error); if (!error) { diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index c275c83f0aef..1707f46b1335 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -60,44 +60,12 @@ #include "iostat.h" #include "pnfs.h" #include "fscache.h" +#include "nfstrace.h" #define NFSDBG_FACILITY NFSDBG_VFS static struct kmem_cache *nfs_direct_cachep; -struct nfs_direct_req { - struct kref kref; /* release manager */ - - /* I/O parameters */ - struct nfs_open_context *ctx; /* file open context info */ - struct nfs_lock_context *l_ctx; /* Lock context info */ - struct kiocb * iocb; /* controlling i/o request */ - struct inode * inode; /* target file of i/o */ - - /* completion state */ - atomic_t io_count; /* i/os we're waiting for */ - spinlock_t lock; /* protect completion state */ - - loff_t io_start; /* Start offset for I/O */ - ssize_t count, /* bytes actually processed */ - max_count, /* max expected count */ - bytes_left, /* bytes left to be sent */ - error; /* any reported error */ - struct completion completion; /* wait for i/o completion */ - - /* commit state */ - struct nfs_mds_commit_info mds_cinfo; /* Storage for cinfo */ - struct pnfs_ds_commit_info ds_cinfo; /* Storage for cinfo */ - struct work_struct work; - int flags; - /* for write */ -#define NFS_ODIRECT_DO_COMMIT (1) /* an unstable reply was received */ -#define NFS_ODIRECT_RESCHED_WRITES (2) /* write verification failed */ - /* for read */ -#define NFS_ODIRECT_SHOULD_DIRTY (3) /* dirty user-space page after read */ -#define NFS_ODIRECT_DONE INT_MAX /* write verification failed */ -}; - static const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops; static const struct nfs_commit_completion_ops nfs_direct_commit_completion_ops; static void nfs_direct_write_complete(struct nfs_direct_req *dreq); @@ -594,14 +562,17 @@ static void nfs_direct_commit_complete(struct nfs_commit_data *data) struct nfs_page *req; int status = data->task.tk_status; + trace_nfs_direct_commit_complete(dreq); + if (status < 0) { /* Errors in commit are fatal */ dreq->error = status; dreq->max_count = 0; dreq->count = 0; dreq->flags = NFS_ODIRECT_DONE; - } else if (dreq->flags == NFS_ODIRECT_DONE) + } else { status = dreq->error; + } nfs_init_cinfo_from_dreq(&cinfo, dreq); @@ -630,6 +601,8 @@ static void nfs_direct_resched_write(struct nfs_commit_info *cinfo, { struct nfs_direct_req *dreq = cinfo->dreq; + trace_nfs_direct_resched_write(dreq); + spin_lock(&dreq->lock); if (dreq->flags != NFS_ODIRECT_DONE) dreq->flags = NFS_ODIRECT_RESCHED_WRITES; @@ -694,6 +667,7 @@ static void nfs_direct_write_schedule_work(struct work_struct *work) static void nfs_direct_write_complete(struct nfs_direct_req *dreq) { + trace_nfs_direct_write_complete(dreq); queue_work(nfsiod_workqueue, &dreq->work); /* Calls nfs_direct_write_schedule_work */ } @@ -704,6 +678,8 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr) struct nfs_page *req = nfs_list_entry(hdr->pages.next); int flags = NFS_ODIRECT_DONE; + trace_nfs_direct_write_completion(dreq); + nfs_init_cinfo_from_dreq(&cinfo, dreq); spin_lock(&dreq->lock); @@ -713,7 +689,7 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr) } nfs_direct_count_bytes(dreq, hdr); - if (hdr->good_bytes != 0 && nfs_write_need_commit(hdr)) { + if (test_bit(NFS_IOHDR_UNSTABLE_WRITES, &hdr->flags)) { if (!dreq->flags) dreq->flags = NFS_ODIRECT_DO_COMMIT; flags = dreq->flags; @@ -758,6 +734,8 @@ static void nfs_direct_write_reschedule_io(struct nfs_pgio_header *hdr) { struct nfs_direct_req *dreq = hdr->dreq; + trace_nfs_direct_write_reschedule_io(dreq); + spin_lock(&dreq->lock); if (dreq->error == 0) { dreq->flags = NFS_ODIRECT_RESCHED_WRITES; @@ -798,6 +776,8 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, size_t requested_bytes = 0; size_t wsize = max_t(size_t, NFS_SERVER(inode)->wsize, PAGE_SIZE); + trace_nfs_direct_write_schedule_iovec(dreq); + nfs_pageio_init_write(&desc, inode, ioflags, false, &nfs_direct_write_completion_ops); desc.pg_dreq = dreq; diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 549baed76351..d2bcd4834c0e 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -661,8 +661,6 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from) result = filemap_fdatawait_range(file->f_mapping, iocb->ki_pos - written, iocb->ki_pos - 1); - if (result < 0) - goto out; } result = generic_write_sync(iocb, written); if (result < 0) diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c index 2b2661582bbe..ad34a33b0737 100644 --- a/fs/nfs/filelayout/filelayout.c +++ b/fs/nfs/filelayout/filelayout.c @@ -181,6 +181,8 @@ static int filelayout_async_handle_error(struct rpc_task *task, case -EIO: case -ETIMEDOUT: case -EPIPE: + case -EPROTO: + case -ENODEV: dprintk("%s DS connection error %d\n", __func__, task->tk_status); nfs4_mark_deviceid_unavailable(devid); diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 604be402ae13..7d285561e59f 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -1131,6 +1131,8 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task, case -EIO: case -ETIMEDOUT: case -EPIPE: + case -EPROTO: + case -ENODEV: dprintk("%s DS connection error %d\n", __func__, task->tk_status); nfs4_delete_deviceid(devid->ld, devid->nfs_client, @@ -1236,6 +1238,8 @@ static void ff_layout_io_track_ds_error(struct pnfs_layout_segment *lseg, case -ENOBUFS: case -EPIPE: case -EPERM: + case -EPROTO: + case -ENODEV: *op_status = status = NFS4ERR_NXIO; break; case -EACCES: diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c index bfa7202ca7be..e028f5a0ef5f 100644 --- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c +++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c @@ -113,8 +113,10 @@ nfs4_ff_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev, goto out_err_drain_dsaddrs; ds_versions[i].version = be32_to_cpup(p++); ds_versions[i].minor_version = be32_to_cpup(p++); - ds_versions[i].rsize = nfs_block_size(be32_to_cpup(p++), NULL); - ds_versions[i].wsize = nfs_block_size(be32_to_cpup(p++), NULL); + ds_versions[i].rsize = nfs_io_size(be32_to_cpup(p++), + server->nfs_client->cl_proto); + ds_versions[i].wsize = nfs_io_size(be32_to_cpup(p++), + server->nfs_client->cl_proto); ds_versions[i].tightly_coupled = be32_to_cpup(p); if (ds_versions[i].rsize > NFS_MAX_FILE_IO_SIZE) diff --git a/fs/nfs/fs_context.c b/fs/nfs/fs_context.c index 9a16897e8dc6..4da701fd1424 100644 --- a/fs/nfs/fs_context.c +++ b/fs/nfs/fs_context.c @@ -21,6 +21,8 @@ #include "nfs.h" #include "internal.h" +#include "nfstrace.h" + #define NFSDBG_FACILITY NFSDBG_MOUNT #if IS_ENABLED(CONFIG_NFS_V3) @@ -284,7 +286,6 @@ static int nfs_verify_server_address(struct sockaddr *addr) } } - dfprintk(MOUNT, "NFS: Invalid IP address specified\n"); return 0; } @@ -378,7 +379,7 @@ static int nfs_parse_security_flavors(struct fs_context *fc, char *string = param->string, *p; int ret; - dfprintk(MOUNT, "NFS: parsing %s=%s option\n", param->key, param->string); + trace_nfs_mount_assign(param->key, string); while ((p = strsep(&string, ":")) != NULL) { if (!*p) @@ -480,11 +481,11 @@ static int nfs_fs_context_parse_param(struct fs_context *fc, unsigned int len; int ret, opt; - dfprintk(MOUNT, "NFS: parsing nfs mount option '%s'\n", param->key); + trace_nfs_mount_option(param); opt = fs_parse(fc, nfs_fs_parameters, param, &result); if (opt < 0) - return ctx->sloppy ? 1 : opt; + return (opt == -ENOPARAM && ctx->sloppy) ? 1 : opt; if (fc->security) ctx->has_sec_mnt_opts = 1; @@ -683,6 +684,7 @@ static int nfs_fs_context_parse_param(struct fs_context *fc, return ret; break; case Opt_vers: + trace_nfs_mount_assign(param->key, param->string); ret = nfs_parse_version_string(fc, param->string); if (ret < 0) return ret; @@ -694,6 +696,7 @@ static int nfs_fs_context_parse_param(struct fs_context *fc, break; case Opt_proto: + trace_nfs_mount_assign(param->key, param->string); protofamily = AF_INET; switch (lookup_constant(nfs_xprt_protocol_tokens, param->string, -1)) { case Opt_xprt_udp6: @@ -729,6 +732,7 @@ static int nfs_fs_context_parse_param(struct fs_context *fc, break; case Opt_mountproto: + trace_nfs_mount_assign(param->key, param->string); mountfamily = AF_INET; switch (lookup_constant(nfs_xprt_protocol_tokens, param->string, -1)) { case Opt_xprt_udp6: @@ -751,6 +755,7 @@ static int nfs_fs_context_parse_param(struct fs_context *fc, break; case Opt_addr: + trace_nfs_mount_assign(param->key, param->string); len = rpc_pton(fc->net_ns, param->string, param->size, &ctx->nfs_server.address, sizeof(ctx->nfs_server._address)); @@ -759,16 +764,19 @@ static int nfs_fs_context_parse_param(struct fs_context *fc, ctx->nfs_server.addrlen = len; break; case Opt_clientaddr: + trace_nfs_mount_assign(param->key, param->string); kfree(ctx->client_address); ctx->client_address = param->string; param->string = NULL; break; case Opt_mounthost: + trace_nfs_mount_assign(param->key, param->string); kfree(ctx->mount_server.hostname); ctx->mount_server.hostname = param->string; param->string = NULL; break; case Opt_mountaddr: + trace_nfs_mount_assign(param->key, param->string); len = rpc_pton(fc->net_ns, param->string, param->size, &ctx->mount_server.address, sizeof(ctx->mount_server._address)); @@ -846,7 +854,6 @@ static int nfs_fs_context_parse_param(struct fs_context *fc, */ case Opt_sloppy: ctx->sloppy = true; - dfprintk(MOUNT, "NFS: relaxing parsing rules\n"); break; } @@ -879,10 +886,8 @@ static int nfs_parse_source(struct fs_context *fc, size_t len; const char *end; - if (unlikely(!dev_name || !*dev_name)) { - dfprintk(MOUNT, "NFS: device name not specified\n"); + if (unlikely(!dev_name || !*dev_name)) return -EINVAL; - } /* Is the host name protected with square brakcets? */ if (*dev_name == '[') { @@ -922,7 +927,7 @@ static int nfs_parse_source(struct fs_context *fc, if (!ctx->nfs_server.export_path) goto out_nomem; - dfprintk(MOUNT, "NFS: MNTPATH: '%s'\n", ctx->nfs_server.export_path); + trace_nfs_mount_path(ctx->nfs_server.export_path); return 0; out_bad_devname: @@ -1116,7 +1121,6 @@ out_no_sec: return nfs_invalf(fc, "NFS: nfs_mount_data version supports only AUTH_SYS"); out_nomem: - dfprintk(MOUNT, "NFS: not enough memory to handle mount options"); return -ENOMEM; out_no_address: @@ -1248,7 +1252,7 @@ static int nfs4_parse_monolithic(struct fs_context *fc, if (IS_ERR(c)) return PTR_ERR(c); ctx->nfs_server.export_path = c; - dfprintk(MOUNT, "NFS: MNTPATH: '%s'\n", c); + trace_nfs_mount_path(c); c = strndup_user(data->client_addr.data, 16); if (IS_ERR(c)) diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 437ebe544aaf..27c720d71b4e 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -707,6 +707,24 @@ unsigned long nfs_block_size(unsigned long bsize, unsigned char *nrbitsp) } /* + * Compute and set NFS server rsize / wsize + */ +static inline +unsigned long nfs_io_size(unsigned long iosize, enum xprt_transports proto) +{ + if (iosize < NFS_MIN_FILE_IO_SIZE) + iosize = NFS_DEF_FILE_IO_SIZE; + else if (iosize >= NFS_MAX_FILE_IO_SIZE) + iosize = NFS_MAX_FILE_IO_SIZE; + else + iosize = iosize & PAGE_MASK; + + if (proto == XPRT_TRANSPORT_UDP) + return nfs_block_bits(iosize, NULL); + return iosize; +} + +/* * Determine the maximum file size for a superblock */ static inline @@ -861,3 +879,36 @@ static inline void nfs_set_port(struct sockaddr *sap, int *port, rpc_set_port(sap, *port); } + +struct nfs_direct_req { + struct kref kref; /* release manager */ + + /* I/O parameters */ + struct nfs_open_context *ctx; /* file open context info */ + struct nfs_lock_context *l_ctx; /* Lock context info */ + struct kiocb * iocb; /* controlling i/o request */ + struct inode * inode; /* target file of i/o */ + + /* completion state */ + atomic_t io_count; /* i/os we're waiting for */ + spinlock_t lock; /* protect completion state */ + + loff_t io_start; /* Start offset for I/O */ + ssize_t count, /* bytes actually processed */ + max_count, /* max expected count */ + bytes_left, /* bytes left to be sent */ + error; /* any reported error */ + struct completion completion; /* wait for i/o completion */ + + /* commit state */ + struct nfs_mds_commit_info mds_cinfo; /* Storage for cinfo */ + struct pnfs_ds_commit_info ds_cinfo; /* Storage for cinfo */ + struct work_struct work; + int flags; + /* for write */ +#define NFS_ODIRECT_DO_COMMIT (1) /* an unstable reply was received */ +#define NFS_ODIRECT_RESCHED_WRITES (2) /* write verification failed */ + /* for read */ +#define NFS_ODIRECT_SHOULD_DIRTY (3) /* dirty user-space page after read */ +#define NFS_ODIRECT_DONE INT_MAX /* write verification failed */ +}; diff --git a/fs/nfs/nfs3client.c b/fs/nfs/nfs3client.c index 5601e47360c2..b49359afac88 100644 --- a/fs/nfs/nfs3client.c +++ b/fs/nfs/nfs3client.c @@ -108,7 +108,6 @@ struct nfs_client *nfs3_set_ds_client(struct nfs_server *mds_srv, if (mds_srv->flags & NFS_MOUNT_NORESVPORT) __set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags); - __set_bit(NFS_CS_NOPING, &cl_init.init_flags); __set_bit(NFS_CS_DS, &cl_init.init_flags); /* Use the MDS nfs_client cl_ipaddr. */ diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c index 271e5f92ed01..b56f05113d36 100644 --- a/fs/nfs/nfs42xdr.c +++ b/fs/nfs/nfs42xdr.c @@ -1025,82 +1025,95 @@ static int decode_deallocate(struct xdr_stream *xdr, struct nfs42_falloc_res *re return decode_op_hdr(xdr, OP_DEALLOCATE); } -static int decode_read_plus_data(struct xdr_stream *xdr, - struct nfs_pgio_args *args, - struct nfs_pgio_res *res) -{ - uint32_t count, recvd; +struct read_plus_segment { + enum data_content4 type; uint64_t offset; - __be32 *p; - - p = xdr_inline_decode(xdr, 8 + 4); - if (!p) - return 1; + union { + struct { + uint64_t length; + } hole; + + struct { + uint32_t length; + unsigned int from; + } data; + }; +}; - p = xdr_decode_hyper(p, &offset); - count = be32_to_cpup(p); - recvd = xdr_align_data(xdr, res->count, xdr_align_size(count)); - if (recvd > count) - recvd = count; - if (res->count + recvd > args->count) { - if (args->count > res->count) - res->count += args->count - res->count; - return 1; - } - res->count += recvd; - if (count > recvd) - return 1; - return 0; +static inline uint64_t read_plus_segment_length(struct read_plus_segment *seg) +{ + return seg->type == NFS4_CONTENT_DATA ? seg->data.length : seg->hole.length; } -static int decode_read_plus_hole(struct xdr_stream *xdr, - struct nfs_pgio_args *args, - struct nfs_pgio_res *res, uint32_t *eof) +static int decode_read_plus_segment(struct xdr_stream *xdr, + struct read_plus_segment *seg) { - uint64_t offset, length, recvd; __be32 *p; - p = xdr_inline_decode(xdr, 8 + 8); + p = xdr_inline_decode(xdr, 4); if (!p) - return 1; - - p = xdr_decode_hyper(p, &offset); - p = xdr_decode_hyper(p, &length); - if (offset != args->offset + res->count) { - /* Server returned an out-of-sequence extent */ - if (offset > args->offset + res->count || - offset + length < args->offset + res->count) { - dprintk("NFS: server returned out of sequence extent: " - "offset/size = %llu/%llu != expected %llu\n", - (unsigned long long)offset, - (unsigned long long)length, - (unsigned long long)(args->offset + - res->count)); - return 1; - } - length -= args->offset + res->count - offset; - } - if (length + res->count > args->count) { - *eof = 0; - if (unlikely(res->count >= args->count)) - return 1; - length = args->count - res->count; - } - recvd = xdr_expand_hole(xdr, res->count, length); - res->count += recvd; + return -EIO; + seg->type = be32_to_cpup(p++); + + p = xdr_inline_decode(xdr, seg->type == NFS4_CONTENT_DATA ? 12 : 16); + if (!p) + return -EIO; + p = xdr_decode_hyper(p, &seg->offset); + + if (seg->type == NFS4_CONTENT_DATA) { + struct xdr_buf buf; + uint32_t len = be32_to_cpup(p); + + seg->data.length = len; + seg->data.from = xdr_stream_pos(xdr); - if (recvd < length) - return 1; + if (!xdr_stream_subsegment(xdr, &buf, xdr_align_size(len))) + return -EIO; + } else if (seg->type == NFS4_CONTENT_HOLE) { + xdr_decode_hyper(p, &seg->hole.length); + } else + return -EINVAL; return 0; } +static int process_read_plus_segment(struct xdr_stream *xdr, + struct nfs_pgio_args *args, + struct nfs_pgio_res *res, + struct read_plus_segment *seg) +{ + unsigned long offset = seg->offset; + unsigned long length = read_plus_segment_length(seg); + unsigned int bufpos; + + if (offset + length < args->offset) + return 0; + else if (offset > args->offset + args->count) { + res->eof = 0; + return 0; + } else if (offset < args->offset) { + length -= (args->offset - offset); + offset = args->offset; + } else if (offset + length > args->offset + args->count) { + length = (args->offset + args->count) - offset; + res->eof = 0; + } + + bufpos = xdr->buf->head[0].iov_len + (offset - args->offset); + if (seg->type == NFS4_CONTENT_HOLE) + return xdr_stream_zero(xdr, bufpos, length); + else + return xdr_stream_move_subsegment(xdr, seg->data.from, bufpos, length); +} + static int decode_read_plus(struct xdr_stream *xdr, struct nfs_pgio_res *res) { struct nfs_pgio_header *hdr = container_of(res, struct nfs_pgio_header, res); struct nfs_pgio_args *args = &hdr->args; - uint32_t eof, segments, type; + uint32_t segments; + struct read_plus_segment *segs; int status, i; + char scratch_buf[16]; __be32 *p; status = decode_op_hdr(xdr, OP_READ_PLUS); @@ -1112,38 +1125,31 @@ static int decode_read_plus(struct xdr_stream *xdr, struct nfs_pgio_res *res) return -EIO; res->count = 0; - eof = be32_to_cpup(p++); + res->eof = be32_to_cpup(p++); segments = be32_to_cpup(p++); if (segments == 0) - goto out; - - for (i = 0; i < segments; i++) { - p = xdr_inline_decode(xdr, 4); - if (!p) - goto early_out; + return status; - type = be32_to_cpup(p++); - if (type == NFS4_CONTENT_DATA) - status = decode_read_plus_data(xdr, args, res); - else if (type == NFS4_CONTENT_HOLE) - status = decode_read_plus_hole(xdr, args, res, &eof); - else - return -EINVAL; + segs = kmalloc_array(segments, sizeof(*segs), GFP_KERNEL); + if (!segs) + return -ENOMEM; + xdr_set_scratch_buffer(xdr, &scratch_buf, 32); + status = -EIO; + for (i = 0; i < segments; i++) { + status = decode_read_plus_segment(xdr, &segs[i]); if (status < 0) - return status; - if (status > 0) - goto early_out; + goto out; } + xdr_set_pagelen(xdr, xdr_align_size(args->count)); + for (i = segments; i > 0; i--) + res->count += process_read_plus_segment(xdr, args, res, &segs[i-1]); + status = 0; + out: - res->eof = eof; - return 0; -early_out: - if (unlikely(!i)) - return -EIO; - res->eof = 0; - return 0; + kfree(segs); + return status; } static int decode_seek(struct xdr_stream *xdr, struct nfs42_seek_res *res) diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 47a6cf892c95..3c5678aec006 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -1161,9 +1161,9 @@ static int nfs4_init_server(struct nfs_server *server, struct fs_context *fc) return error; if (ctx->rsize) - server->rsize = nfs_block_size(ctx->rsize, NULL); + server->rsize = nfs_io_size(ctx->rsize, server->nfs_client->cl_proto); if (ctx->wsize) - server->wsize = nfs_block_size(ctx->wsize, NULL); + server->wsize = nfs_io_size(ctx->wsize, server->nfs_client->cl_proto); server->acregmin = ctx->acregmin * HZ; server->acregmax = ctx->acregmax * HZ; diff --git a/fs/nfs/nfs4idmap.c b/fs/nfs/nfs4idmap.c index f331866dd418..ec6afd3c4bca 100644 --- a/fs/nfs/nfs4idmap.c +++ b/fs/nfs/nfs4idmap.c @@ -561,22 +561,20 @@ nfs_idmap_prepare_pipe_upcall(struct idmap *idmap, return true; } -static void -nfs_idmap_complete_pipe_upcall_locked(struct idmap *idmap, int ret) +static void nfs_idmap_complete_pipe_upcall(struct idmap_legacy_upcalldata *data, + int ret) { - struct key *authkey = idmap->idmap_upcall_data->authkey; - - kfree(idmap->idmap_upcall_data); - idmap->idmap_upcall_data = NULL; - complete_request_key(authkey, ret); - key_put(authkey); + complete_request_key(data->authkey, ret); + key_put(data->authkey); + kfree(data); } -static void -nfs_idmap_abort_pipe_upcall(struct idmap *idmap, int ret) +static void nfs_idmap_abort_pipe_upcall(struct idmap *idmap, + struct idmap_legacy_upcalldata *data, + int ret) { - if (idmap->idmap_upcall_data != NULL) - nfs_idmap_complete_pipe_upcall_locked(idmap, ret); + if (cmpxchg(&idmap->idmap_upcall_data, data, NULL) == data) + nfs_idmap_complete_pipe_upcall(data, ret); } static int nfs_idmap_legacy_upcall(struct key *authkey, void *aux) @@ -613,7 +611,7 @@ static int nfs_idmap_legacy_upcall(struct key *authkey, void *aux) ret = rpc_queue_upcall(idmap->idmap_pipe, msg); if (ret < 0) - nfs_idmap_abort_pipe_upcall(idmap, ret); + nfs_idmap_abort_pipe_upcall(idmap, data, ret); return ret; out2: @@ -669,6 +667,7 @@ idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) struct request_key_auth *rka; struct rpc_inode *rpci = RPC_I(file_inode(filp)); struct idmap *idmap = (struct idmap *)rpci->private; + struct idmap_legacy_upcalldata *data; struct key *authkey; struct idmap_msg im; size_t namelen_in; @@ -678,10 +677,11 @@ idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) * will have been woken up and someone else may now have used * idmap_key_cons - so after this point we may no longer touch it. */ - if (idmap->idmap_upcall_data == NULL) + data = xchg(&idmap->idmap_upcall_data, NULL); + if (data == NULL) goto out_noupcall; - authkey = idmap->idmap_upcall_data->authkey; + authkey = data->authkey; rka = get_request_key_auth(authkey); if (mlen != sizeof(im)) { @@ -703,18 +703,17 @@ idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) if (namelen_in == 0 || namelen_in == IDMAP_NAMESZ) { ret = -EINVAL; goto out; -} + } - ret = nfs_idmap_read_and_verify_message(&im, - &idmap->idmap_upcall_data->idmap_msg, - rka->target_key, authkey); + ret = nfs_idmap_read_and_verify_message(&im, &data->idmap_msg, + rka->target_key, authkey); if (ret >= 0) { key_set_timeout(rka->target_key, nfs_idmap_cache_timeout); ret = mlen; } out: - nfs_idmap_complete_pipe_upcall_locked(idmap, ret); + nfs_idmap_complete_pipe_upcall(data, ret); out_noupcall: return ret; } @@ -728,7 +727,7 @@ idmap_pipe_destroy_msg(struct rpc_pipe_msg *msg) struct idmap *idmap = data->idmap; if (msg->errno) - nfs_idmap_abort_pipe_upcall(idmap, msg->errno); + nfs_idmap_abort_pipe_upcall(idmap, data, msg->errno); } static void @@ -736,8 +735,11 @@ idmap_release_pipe(struct inode *inode) { struct rpc_inode *rpci = RPC_I(inode); struct idmap *idmap = (struct idmap *)rpci->private; + struct idmap_legacy_upcalldata *data; - nfs_idmap_abort_pipe_upcall(idmap, -EPIPE); + data = xchg(&idmap->idmap_upcall_data, NULL); + if (data) + nfs_idmap_complete_pipe_upcall(data, -EPIPE); } int nfs_map_name_to_uid(const struct nfs_server *server, const char *name, size_t namelen, kuid_t *uid) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index bb0e84a46d61..3ed14a2a84a4 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -784,10 +784,9 @@ static void nfs4_slot_sequence_record_sent(struct nfs4_slot *slot, if ((s32)(seqnr - slot->seq_nr_highest_sent) > 0) slot->seq_nr_highest_sent = seqnr; } -static void nfs4_slot_sequence_acked(struct nfs4_slot *slot, - u32 seqnr) +static void nfs4_slot_sequence_acked(struct nfs4_slot *slot, u32 seqnr) { - slot->seq_nr_highest_sent = seqnr; + nfs4_slot_sequence_record_sent(slot, seqnr); slot->seq_nr_last_acked = seqnr; } @@ -854,7 +853,6 @@ static int nfs41_sequence_process(struct rpc_task *task, __func__, slot->slot_nr, slot->seq_nr); - nfs4_slot_sequence_acked(slot, slot->seq_nr); goto out_retry; case -NFS4ERR_RETRY_UNCACHED_REP: case -NFS4ERR_SEQ_FALSE_RETRY: @@ -3098,12 +3096,13 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata, } out: - if (opendata->lgp) { - nfs4_lgopen_release(opendata->lgp); - opendata->lgp = NULL; - } - if (!opendata->cancelled) + if (!opendata->cancelled) { + if (opendata->lgp) { + nfs4_lgopen_release(opendata->lgp); + opendata->lgp = NULL; + } nfs4_sequence_free_slot(&opendata->o_res.seq_res); + } return ret; } @@ -8924,6 +8923,9 @@ void nfs4_test_session_trunk(struct rpc_clnt *clnt, struct rpc_xprt *xprt, if (status == 0) rpc_clnt_xprt_switch_add_xprt(clnt, xprt); + else if (rpc_clnt_xprt_switch_has_addr(clnt, + (struct sockaddr *)&xprt->addr)) + rpc_clnt_xprt_switch_remove_xprt(clnt, xprt); rpc_put_task(task); } @@ -9248,6 +9250,13 @@ int nfs4_proc_create_session(struct nfs_client *clp, const struct cred *cred) int status; unsigned *ptr; struct nfs4_session *session = clp->cl_session; + struct nfs4_add_xprt_data xprtdata = { + .clp = clp, + }; + struct rpc_add_xprt_test rpcdata = { + .add_xprt_test = clp->cl_mvops->session_trunk, + .data = &xprtdata, + }; dprintk("--> %s clp=%p session=%p\n", __func__, clp, session); @@ -9264,6 +9273,7 @@ int nfs4_proc_create_session(struct nfs_client *clp, const struct cred *cred) ptr = (unsigned *)&session->sess_id.data[0]; dprintk("%s client>seqid %d sessionid %u:%u:%u:%u\n", __func__, clp->cl_seqid, ptr[0], ptr[1], ptr[2], ptr[3]); + rpc_clnt_probe_trunked_xprts(clp->cl_rpcclient, &rpcdata); out: return status; } @@ -9293,6 +9303,7 @@ int nfs4_proc_destroy_session(struct nfs4_session *session, if (status) dprintk("NFS: Got error %d from the server on DESTROY_SESSION. " "Session has been destroyed regardless...\n", status); + rpc_clnt_manage_trunked_xprts(session->clp->cl_rpcclient); return status; } @@ -9477,6 +9488,9 @@ static int nfs41_reclaim_complete_handle_errors(struct rpc_task *task, struct nf rpc_delay(task, NFS4_POLL_RETRY_MAX); fallthrough; case -NFS4ERR_RETRY_UNCACHED_REP: + case -EACCES: + dprintk("%s: failed to reclaim complete error %d for server %s, retrying\n", + __func__, task->tk_status, clp->cl_hostname); return -EAGAIN; case -NFS4ERR_BADSESSION: case -NFS4ERR_DEADSESSION: diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h index 012bd7339862..8c6cc58679ff 100644 --- a/fs/nfs/nfstrace.h +++ b/fs/nfs/nfstrace.h @@ -1137,7 +1137,7 @@ TRACE_EVENT(nfs_readpage_done, __field(u32, arg_count) __field(u32, res_count) __field(bool, eof) - __field(int, status) + __field(int, error) ), TP_fast_assign( @@ -1146,7 +1146,7 @@ TRACE_EVENT(nfs_readpage_done, const struct nfs_fh *fh = hdr->args.fh ? hdr->args.fh : &nfsi->fh; - __entry->status = task->tk_status; + __entry->error = task->tk_status; __entry->offset = hdr->args.offset; __entry->arg_count = hdr->args.count; __entry->res_count = hdr->res.count; @@ -1157,14 +1157,13 @@ TRACE_EVENT(nfs_readpage_done, ), TP_printk( - "fileid=%02x:%02x:%llu fhandle=0x%08x " - "offset=%lld count=%u res=%u status=%d%s", + "error=%d fileid=%02x:%02x:%llu fhandle=0x%08x " + "offset=%lld count=%u res=%u%s", __entry->error, MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, (long long)__entry->offset, __entry->arg_count, - __entry->res_count, __entry->status, - __entry->eof ? " eof" : "" + __entry->res_count, __entry->eof ? " eof" : "" ) ); @@ -1184,7 +1183,7 @@ TRACE_EVENT(nfs_readpage_short, __field(u32, arg_count) __field(u32, res_count) __field(bool, eof) - __field(int, status) + __field(int, error) ), TP_fast_assign( @@ -1193,7 +1192,7 @@ TRACE_EVENT(nfs_readpage_short, const struct nfs_fh *fh = hdr->args.fh ? hdr->args.fh : &nfsi->fh; - __entry->status = task->tk_status; + __entry->error = task->tk_status; __entry->offset = hdr->args.offset; __entry->arg_count = hdr->args.count; __entry->res_count = hdr->res.count; @@ -1204,14 +1203,13 @@ TRACE_EVENT(nfs_readpage_short, ), TP_printk( - "fileid=%02x:%02x:%llu fhandle=0x%08x " - "offset=%lld count=%u res=%u status=%d%s", + "error=%d fileid=%02x:%02x:%llu fhandle=0x%08x " + "offset=%lld count=%u res=%u%s", __entry->error, MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, (long long)__entry->offset, __entry->arg_count, - __entry->res_count, __entry->status, - __entry->eof ? " eof" : "" + __entry->res_count, __entry->eof ? " eof" : "" ) ); @@ -1323,7 +1321,7 @@ TRACE_EVENT(nfs_pgio_error, __field(u32, arg_count) __field(u32, res_count) __field(loff_t, pos) - __field(int, status) + __field(int, error) ), TP_fast_assign( @@ -1332,7 +1330,7 @@ TRACE_EVENT(nfs_pgio_error, const struct nfs_fh *fh = hdr->args.fh ? hdr->args.fh : &nfsi->fh; - __entry->status = error; + __entry->error = error; __entry->offset = hdr->args.offset; __entry->arg_count = hdr->args.count; __entry->res_count = hdr->res.count; @@ -1341,12 +1339,12 @@ TRACE_EVENT(nfs_pgio_error, __entry->fhandle = nfs_fhandle_hash(fh); ), - TP_printk("fileid=%02x:%02x:%llu fhandle=0x%08x " - "offset=%lld count=%u res=%u pos=%llu status=%d", + TP_printk("error=%d fileid=%02x:%02x:%llu fhandle=0x%08x " + "offset=%lld count=%u res=%u pos=%llu", __entry->error, MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, (long long)__entry->offset, __entry->arg_count, __entry->res_count, - __entry->pos, __entry->status + __entry->pos ) ); @@ -1406,7 +1404,7 @@ TRACE_EVENT(nfs_writeback_done, __field(loff_t, offset) __field(u32, arg_count) __field(u32, res_count) - __field(int, status) + __field(int, error) __field(unsigned long, stable) __array(char, verifier, NFS4_VERIFIER_SIZE) ), @@ -1418,7 +1416,7 @@ TRACE_EVENT(nfs_writeback_done, hdr->args.fh : &nfsi->fh; const struct nfs_writeverf *verf = hdr->res.verf; - __entry->status = task->tk_status; + __entry->error = task->tk_status; __entry->offset = hdr->args.offset; __entry->arg_count = hdr->args.count; __entry->res_count = hdr->res.count; @@ -1432,14 +1430,14 @@ TRACE_EVENT(nfs_writeback_done, ), TP_printk( - "fileid=%02x:%02x:%llu fhandle=0x%08x " - "offset=%lld count=%u res=%u status=%d stable=%s " - "verifier=%s", + "error=%d fileid=%02x:%02x:%llu fhandle=0x%08x " + "offset=%lld count=%u res=%u stable=%s " + "verifier=%s", __entry->error, MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, (long long)__entry->offset, __entry->arg_count, - __entry->res_count, __entry->status, + __entry->res_count, show_nfs_stable_how(__entry->stable), show_nfs4_verifier(__entry->verifier) ) @@ -1447,44 +1445,50 @@ TRACE_EVENT(nfs_writeback_done, DECLARE_EVENT_CLASS(nfs_page_error_class, TP_PROTO( + const struct inode *inode, const struct nfs_page *req, int error ), - TP_ARGS(req, error), + TP_ARGS(inode, req, error), TP_STRUCT__entry( - __field(const void *, req) - __field(pgoff_t, index) - __field(unsigned int, offset) - __field(unsigned int, pgbase) - __field(unsigned int, bytes) + __field(dev_t, dev) + __field(u32, fhandle) + __field(u64, fileid) + __field(loff_t, offset) + __field(unsigned int, count) __field(int, error) ), TP_fast_assign( - __entry->req = req; - __entry->index = req->wb_index; - __entry->offset = req->wb_offset; - __entry->pgbase = req->wb_pgbase; - __entry->bytes = req->wb_bytes; + const struct nfs_inode *nfsi = NFS_I(inode); + __entry->dev = inode->i_sb->s_dev; + __entry->fileid = nfsi->fileid; + __entry->fhandle = nfs_fhandle_hash(&nfsi->fh); + __entry->offset = req_offset(req); + __entry->count = req->wb_bytes; __entry->error = error; ), TP_printk( - "req=%p index=%lu offset=%u pgbase=%u bytes=%u error=%d", - __entry->req, __entry->index, __entry->offset, - __entry->pgbase, __entry->bytes, __entry->error + "error=%d fileid=%02x:%02x:%llu fhandle=0x%08x " + "offset=%lld count=%u", __entry->error, + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->fileid, + __entry->fhandle, __entry->offset, + __entry->count ) ); #define DEFINE_NFS_PAGEERR_EVENT(name) \ DEFINE_EVENT(nfs_page_error_class, name, \ TP_PROTO( \ + const struct inode *inode, \ const struct nfs_page *req, \ int error \ ), \ - TP_ARGS(req, error)) + TP_ARGS(inode, req, error)) DEFINE_NFS_PAGEERR_EVENT(nfs_write_error); DEFINE_NFS_PAGEERR_EVENT(nfs_comp_error); @@ -1541,7 +1545,7 @@ TRACE_EVENT(nfs_commit_done, __field(u32, fhandle) __field(u64, fileid) __field(loff_t, offset) - __field(int, status) + __field(int, error) __field(unsigned long, stable) __array(char, verifier, NFS4_VERIFIER_SIZE) ), @@ -1553,7 +1557,7 @@ TRACE_EVENT(nfs_commit_done, data->args.fh : &nfsi->fh; const struct nfs_writeverf *verf = data->res.verf; - __entry->status = task->tk_status; + __entry->error = task->tk_status; __entry->offset = data->args.offset; __entry->stable = verf->committed; memcpy(__entry->verifier, @@ -1565,17 +1569,83 @@ TRACE_EVENT(nfs_commit_done, ), TP_printk( - "fileid=%02x:%02x:%llu fhandle=0x%08x " - "offset=%lld status=%d stable=%s verifier=%s", + "error=%d fileid=%02x:%02x:%llu fhandle=0x%08x " + "offset=%lld stable=%s verifier=%s", __entry->error, MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, - (long long)__entry->offset, __entry->status, + (long long)__entry->offset, show_nfs_stable_how(__entry->stable), show_nfs4_verifier(__entry->verifier) ) ); +#define nfs_show_direct_req_flags(v) \ + __print_flags(v, "|", \ + { NFS_ODIRECT_DO_COMMIT, "DO_COMMIT" }, \ + { NFS_ODIRECT_RESCHED_WRITES, "RESCHED_WRITES" }, \ + { NFS_ODIRECT_SHOULD_DIRTY, "SHOULD DIRTY" }, \ + { NFS_ODIRECT_DONE, "DONE" } ) + +DECLARE_EVENT_CLASS(nfs_direct_req_class, + TP_PROTO( + const struct nfs_direct_req *dreq + ), + + TP_ARGS(dreq), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(u64, fileid) + __field(u32, fhandle) + __field(loff_t, offset) + __field(ssize_t, count) + __field(ssize_t, bytes_left) + __field(ssize_t, error) + __field(int, flags) + ), + + TP_fast_assign( + const struct inode *inode = dreq->inode; + const struct nfs_inode *nfsi = NFS_I(inode); + const struct nfs_fh *fh = &nfsi->fh; + + __entry->dev = inode->i_sb->s_dev; + __entry->fileid = nfsi->fileid; + __entry->fhandle = nfs_fhandle_hash(fh); + __entry->offset = dreq->io_start; + __entry->count = dreq->count; + __entry->bytes_left = dreq->bytes_left; + __entry->error = dreq->error; + __entry->flags = dreq->flags; + ), + + TP_printk( + "error=%zd fileid=%02x:%02x:%llu fhandle=0x%08x " + "offset=%lld count=%zd bytes_left=%zd flags=%s", + __entry->error, MAJOR(__entry->dev), + MINOR(__entry->dev), + (unsigned long long)__entry->fileid, + __entry->fhandle, __entry->offset, + __entry->count, __entry->bytes_left, + nfs_show_direct_req_flags(__entry->flags) + ) +); + +#define DEFINE_NFS_DIRECT_REQ_EVENT(name) \ + DEFINE_EVENT(nfs_direct_req_class, name, \ + TP_PROTO( \ + const struct nfs_direct_req *dreq \ + ), \ + TP_ARGS(dreq)) + +DEFINE_NFS_DIRECT_REQ_EVENT(nfs_direct_commit_complete); +DEFINE_NFS_DIRECT_REQ_EVENT(nfs_direct_resched_write); +DEFINE_NFS_DIRECT_REQ_EVENT(nfs_direct_write_complete); +DEFINE_NFS_DIRECT_REQ_EVENT(nfs_direct_write_completion); +DEFINE_NFS_DIRECT_REQ_EVENT(nfs_direct_write_schedule_iovec); +DEFINE_NFS_DIRECT_REQ_EVENT(nfs_direct_write_reschedule_io); + TRACE_EVENT(nfs_fh_to_dentry, TP_PROTO( const struct super_block *sb, @@ -1609,6 +1679,65 @@ TRACE_EVENT(nfs_fh_to_dentry, ) ); +TRACE_EVENT(nfs_mount_assign, + TP_PROTO( + const char *option, + const char *value + ), + + TP_ARGS(option, value), + + TP_STRUCT__entry( + __string(option, option) + __string(value, value) + ), + + TP_fast_assign( + __assign_str(option, option); + __assign_str(value, value); + ), + + TP_printk("option %s=%s", + __get_str(option), __get_str(value) + ) +); + +TRACE_EVENT(nfs_mount_option, + TP_PROTO( + const struct fs_parameter *param + ), + + TP_ARGS(param), + + TP_STRUCT__entry( + __string(option, param->key) + ), + + TP_fast_assign( + __assign_str(option, param->key); + ), + + TP_printk("option %s", __get_str(option)) +); + +TRACE_EVENT(nfs_mount_path, + TP_PROTO( + const char *path + ), + + TP_ARGS(path), + + TP_STRUCT__entry( + __string(path, path) + ), + + TP_fast_assign( + __assign_str(path, path); + ), + + TP_printk("path='%s'", __get_str(path)) +); + DECLARE_EVENT_CLASS(nfs_xdr_event, TP_PROTO( const struct xdr_stream *xdr, diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 69569696dde0..51a7e202d6e5 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -592,7 +592,8 @@ nfs_lock_and_join_requests(struct page *page) static void nfs_write_error(struct nfs_page *req, int error) { - trace_nfs_write_error(req, error); + trace_nfs_write_error(page_file_mapping(req->wb_page)->host, req, + error); nfs_mapping_set_error(req->wb_page, error); nfs_inode_remove_request(req); nfs_end_page_writeback(req); @@ -1000,7 +1001,7 @@ static void nfs_write_completion(struct nfs_pgio_header *hdr) nfs_list_remove_request(req); if (test_bit(NFS_IOHDR_ERROR, &hdr->flags) && (hdr->good_bytes < bytes)) { - trace_nfs_comp_error(req, hdr->error); + trace_nfs_comp_error(hdr->inode, req, hdr->error); nfs_mapping_set_error(req->wb_page, hdr->error); goto remove_req; } @@ -1444,8 +1445,6 @@ static void nfs_async_write_error(struct list_head *head, int error) static void nfs_async_write_reschedule_io(struct nfs_pgio_header *hdr) { nfs_async_write_error(&hdr->pages, 0); - filemap_fdatawrite_range(hdr->inode->i_mapping, hdr->args.offset, - hdr->args.offset + hdr->args.count - 1); } static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops = { @@ -1576,25 +1575,37 @@ static int nfs_writeback_done(struct rpc_task *task, nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, hdr->res.count); trace_nfs_writeback_done(task, hdr); - if (hdr->res.verf->committed < hdr->args.stable && - task->tk_status >= 0) { - /* We tried a write call, but the server did not - * commit data to stable storage even though we - * requested it. - * Note: There is a known bug in Tru64 < 5.0 in which - * the server reports NFS_DATA_SYNC, but performs - * NFS_FILE_SYNC. We therefore implement this checking - * as a dprintk() in order to avoid filling syslog. - */ - static unsigned long complain; + if (task->tk_status >= 0) { + enum nfs3_stable_how committed = hdr->res.verf->committed; + + if (committed == NFS_UNSTABLE) { + /* + * We have some uncommitted data on the server at + * this point, so ensure that we keep track of that + * fact irrespective of what later writes do. + */ + set_bit(NFS_IOHDR_UNSTABLE_WRITES, &hdr->flags); + } - /* Note this will print the MDS for a DS write */ - if (time_before(complain, jiffies)) { - dprintk("NFS: faulty NFS server %s:" - " (committed = %d) != (stable = %d)\n", - NFS_SERVER(inode)->nfs_client->cl_hostname, - hdr->res.verf->committed, hdr->args.stable); - complain = jiffies + 300 * HZ; + if (committed < hdr->args.stable) { + /* We tried a write call, but the server did not + * commit data to stable storage even though we + * requested it. + * Note: There is a known bug in Tru64 < 5.0 in which + * the server reports NFS_DATA_SYNC, but performs + * NFS_FILE_SYNC. We therefore implement this checking + * as a dprintk() in order to avoid filling syslog. + */ + static unsigned long complain; + + /* Note this will print the MDS for a DS write */ + if (time_before(complain, jiffies)) { + dprintk("NFS: faulty NFS server %s:" + " (committed = %d) != (stable = %d)\n", + NFS_SERVER(inode)->nfs_client->cl_hostname, + committed, hdr->args.stable); + complain = jiffies + 300 * HZ; + } } } @@ -1872,7 +1883,8 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data) (long long)req_offset(req)); if (status < 0) { if (req->wb_page) { - trace_nfs_commit_error(req, status); + trace_nfs_commit_error(data->inode, req, + status); nfs_mapping_set_error(req->wb_page, status); nfs_inode_remove_request(req); } |