summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2019-05-09 14:33:15 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2019-05-09 14:33:15 -0700
commit06cbd26d312edfe4a83ff541c23f8f866265eb24 (patch)
tree45046d0daca202df6c590390df05fbd07ed84a5d
parentabde77eb5c66b2f98539c4644b54f34b7e179e6b (diff)
parent5940d1cf9f42f67e9cc3f7df9eda39f5888d6e9e (diff)
downloadlwn-06cbd26d312edfe4a83ff541c23f8f866265eb24.tar.gz
lwn-06cbd26d312edfe4a83ff541c23f8f866265eb24.zip
Merge tag 'nfs-for-5.2-1' of git://git.linux-nfs.org/projects/anna/linux-nfs
Pull NFS client updates from Anna Schumaker: "Highlights include: Stable bugfixes: - Fall back to MDS if no deviceid is found rather than aborting # v4.11+ - NFS4: Fix v4.0 client state corruption when mount Features: - Much improved handling of soft mounts with NFS v4.0: - Reduce risk of false positive timeouts - Faster failover of reads and writes after a timeout - Added a "softerr" mount option to return ETIMEDOUT instead of EIO to the application after a timeout - Increase number of xprtrdma backchannel requests - Add additional xprtrdma tracepoints - Improved send completion batching for xprtrdma Other bugfixes and cleanups: - Return -EINVAL when NFS v4.2 is passed an invalid dedup mode - Reduce usage of GFP_ATOMIC pages in SUNRPC - Various minor NFS over RDMA cleanups and bugfixes - Use the correct container namespace for upcalls - Don't share superblocks between user namespaces - Various other container fixes - Make nfs_match_client() killable to prevent soft lockups - Don't mark all open state for recovery when handling recallable state revoked flag" * tag 'nfs-for-5.2-1' of git://git.linux-nfs.org/projects/anna/linux-nfs: (69 commits) SUNRPC: Rebalance a kref in auth_gss.c NFS: Fix a double unlock from nfs_match,get_client nfs: pass the correct prototype to read_cache_page NFSv4: don't mark all open state for recovery when handling recallable state revoked flag SUNRPC: Fix an error code in gss_alloc_msg() SUNRPC: task should be exit if encode return EKEYEXPIRED more times NFS4: Fix v4.0 client state corruption when mount PNFS fallback to MDS if no deviceid found NFS: make nfs_match_client killable lockd: Store the lockd client credential in struct nlm_host NFS: When mounting, don't share filesystems between different user namespaces NFS: Convert NFSv2 to use the container user namespace NFSv4: Convert the NFS client idmapper to use the container user namespace NFS: Convert NFSv3 to use the container user namespace SUNRPC: Use namespace of listening daemon in the client AUTH_GSS upcall SUNRPC: Use the client user namespace when encoding creds NFS: Store the credential of the mount process in the nfs_server SUNRPC: Cache cred of process creating the rpc_client xprtrdma: Remove stale comment xprtrdma: Update comments that reference ib_drain_qp ...
-rw-r--r--fs/lockd/clntlock.c2
-rw-r--r--fs/lockd/clntproc.c4
-rw-r--r--fs/lockd/host.c10
-rw-r--r--fs/lockd/mon.c1
-rw-r--r--fs/nfs/client.c16
-rw-r--r--fs/nfs/delegation.c12
-rw-r--r--fs/nfs/delegation.h1
-rw-r--r--fs/nfs/dir.c7
-rw-r--r--fs/nfs/direct.c11
-rw-r--r--fs/nfs/file.c31
-rw-r--r--fs/nfs/filelayout/filelayout.c6
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayout.c14
-rw-r--r--fs/nfs/inode.c13
-rw-r--r--fs/nfs/internal.h8
-rw-r--r--fs/nfs/mount_clnt.c2
-rw-r--r--fs/nfs/nfs2xdr.c58
-rw-r--r--fs/nfs/nfs3client.c1
-rw-r--r--fs/nfs/nfs3xdr.c142
-rw-r--r--fs/nfs/nfs4_fs.h1
-rw-r--r--fs/nfs/nfs4client.c6
-rw-r--r--fs/nfs/nfs4file.c4
-rw-r--r--fs/nfs/nfs4idmap.c27
-rw-r--r--fs/nfs/nfs4proc.c159
-rw-r--r--fs/nfs/nfs4state.c7
-rw-r--r--fs/nfs/pagelist.c123
-rw-r--r--fs/nfs/pnfs.c4
-rw-r--r--fs/nfs/pnfs.h4
-rw-r--r--fs/nfs/read.c6
-rw-r--r--fs/nfs/super.c32
-rw-r--r--fs/nfs/symlink.c7
-rw-r--r--fs/nfs/write.c70
-rw-r--r--fs/nfsd/nfs4callback.c5
-rw-r--r--include/linux/lockd/bind.h1
-rw-r--r--include/linux/lockd/lockd.h4
-rw-r--r--include/linux/nfs_fs.h1
-rw-r--r--include/linux/nfs_fs_sb.h13
-rw-r--r--include/linux/nfs_page.h12
-rw-r--r--include/linux/sunrpc/clnt.h4
-rw-r--r--include/linux/sunrpc/sched.h20
-rw-r--r--include/linux/sunrpc/xprt.h6
-rw-r--r--include/trace/events/rpcrdma.h27
-rw-r--r--include/trace/events/sunrpc.h8
-rw-r--r--include/uapi/linux/nfs_mount.h9
-rw-r--r--net/sunrpc/auth_gss/auth_gss.c71
-rw-r--r--net/sunrpc/auth_unix.c9
-rw-r--r--net/sunrpc/clnt.c132
-rw-r--r--net/sunrpc/debugfs.c2
-rw-r--r--net/sunrpc/rpcb_clnt.c12
-rw-r--r--net/sunrpc/sched.c158
-rw-r--r--net/sunrpc/socklib.c2
-rw-r--r--net/sunrpc/xprt.c154
-rw-r--r--net/sunrpc/xprtrdma/backchannel.c120
-rw-r--r--net/sunrpc/xprtrdma/frwr_ops.c63
-rw-r--r--net/sunrpc/xprtrdma/rpc_rdma.c115
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_backchannel.c2
-rw-r--r--net/sunrpc/xprtrdma/transport.c105
-rw-r--r--net/sunrpc/xprtrdma/verbs.c338
-rw-r--r--net/sunrpc/xprtrdma/xprt_rdma.h121
-rw-r--r--net/sunrpc/xprtsock.c9
59 files changed, 1366 insertions, 946 deletions
diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c
index c2a128678e6e..70f520b41a19 100644
--- a/fs/lockd/clntlock.c
+++ b/fs/lockd/clntlock.c
@@ -63,7 +63,7 @@ struct nlm_host *nlmclnt_init(const struct nlmclnt_initdata *nlm_init)
host = nlmclnt_lookup_host(nlm_init->address, nlm_init->addrlen,
nlm_init->protocol, nlm_version,
nlm_init->hostname, nlm_init->noresvport,
- nlm_init->net);
+ nlm_init->net, nlm_init->cred);
if (host == NULL)
goto out_nohost;
if (host->h_rpcclnt == NULL && nlm_bind_host(host) == NULL)
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index e8a004097d18..d9c32d1a20c0 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -715,7 +715,7 @@ static void nlmclnt_unlock_callback(struct rpc_task *task, void *data)
struct nlm_rqst *req = data;
u32 status = ntohl(req->a_res.status);
- if (RPC_ASSASSINATED(task))
+ if (RPC_SIGNALLED(task))
goto die;
if (task->tk_status < 0) {
@@ -783,7 +783,7 @@ static void nlmclnt_cancel_callback(struct rpc_task *task, void *data)
struct nlm_rqst *req = data;
u32 status = ntohl(req->a_res.status);
- if (RPC_ASSASSINATED(task))
+ if (RPC_SIGNALLED(task))
goto die;
if (task->tk_status < 0) {
diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index f0b5c987d6ae..7d46fafdbbe5 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -60,6 +60,7 @@ struct nlm_lookup_host_info {
const size_t hostname_len; /* it's length */
const int noresvport; /* use non-priv port */
struct net *net; /* network namespace to bind */
+ const struct cred *cred;
};
/*
@@ -162,6 +163,7 @@ static struct nlm_host *nlm_alloc_host(struct nlm_lookup_host_info *ni,
host->h_nsmhandle = nsm;
host->h_addrbuf = nsm->sm_addrbuf;
host->net = ni->net;
+ host->h_cred = get_cred(ni->cred),
strlcpy(host->nodename, utsname()->nodename, sizeof(host->nodename));
out:
@@ -188,6 +190,7 @@ static void nlm_destroy_host_locked(struct nlm_host *host)
clnt = host->h_rpcclnt;
if (clnt != NULL)
rpc_shutdown_client(clnt);
+ put_cred(host->h_cred);
kfree(host);
ln->nrhosts--;
@@ -202,6 +205,8 @@ static void nlm_destroy_host_locked(struct nlm_host *host)
* @version: NLM protocol version
* @hostname: '\0'-terminated hostname of server
* @noresvport: 1 if non-privileged port should be used
+ * @net: pointer to net namespace
+ * @cred: pointer to cred
*
* Returns an nlm_host structure that matches the passed-in
* [server address, transport protocol, NLM version, server hostname].
@@ -214,7 +219,8 @@ struct nlm_host *nlmclnt_lookup_host(const struct sockaddr *sap,
const u32 version,
const char *hostname,
int noresvport,
- struct net *net)
+ struct net *net,
+ const struct cred *cred)
{
struct nlm_lookup_host_info ni = {
.server = 0,
@@ -226,6 +232,7 @@ struct nlm_host *nlmclnt_lookup_host(const struct sockaddr *sap,
.hostname_len = strlen(hostname),
.noresvport = noresvport,
.net = net,
+ .cred = cred,
};
struct hlist_head *chain;
struct nlm_host *host;
@@ -458,6 +465,7 @@ nlm_bind_host(struct nlm_host *host)
.authflavor = RPC_AUTH_UNIX,
.flags = (RPC_CLNT_CREATE_NOPING |
RPC_CLNT_CREATE_AUTOBIND),
+ .cred = host->h_cred,
};
/*
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index 654594ef4f94..1eabd91870e6 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -82,6 +82,7 @@ static struct rpc_clnt *nsm_create(struct net *net, const char *nodename)
.version = NSM_VERSION,
.authflavor = RPC_AUTH_NULL,
.flags = RPC_CLNT_CREATE_NOPING,
+ .cred = current_cred(),
};
return rpc_create(&args);
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 90d71fda65ce..da74c4c4a244 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -284,6 +284,7 @@ static struct nfs_client *nfs_match_client(const struct nfs_client_initdata *dat
struct nfs_client *clp;
const struct sockaddr *sap = data->addr;
struct nfs_net *nn = net_generic(data->net, nfs_net_id);
+ int error;
again:
list_for_each_entry(clp, &nn->nfs_client_list, cl_share_link) {
@@ -296,9 +297,11 @@ again:
if (clp->cl_cons_state > NFS_CS_READY) {
refcount_inc(&clp->cl_count);
spin_unlock(&nn->nfs_client_lock);
- nfs_wait_client_init_complete(clp);
+ error = nfs_wait_client_init_complete(clp);
nfs_put_client(clp);
spin_lock(&nn->nfs_client_lock);
+ if (error < 0)
+ return ERR_PTR(error);
goto again;
}
@@ -407,6 +410,8 @@ struct nfs_client *nfs_get_client(const struct nfs_client_initdata *cl_init)
clp = nfs_match_client(cl_init);
if (clp) {
spin_unlock(&nn->nfs_client_lock);
+ if (IS_ERR(clp))
+ return clp;
if (new)
new->rpc_ops->free_client(new);
return nfs_found_client(cl_init, clp);
@@ -500,6 +505,7 @@ int nfs_create_rpc_client(struct nfs_client *clp,
.program = &nfs_program,
.version = clp->rpc_ops->version,
.authflavor = flavor,
+ .cred = cl_init->cred,
};
if (test_bit(NFS_CS_DISCRTRY, &clp->cl_flags))
@@ -598,6 +604,8 @@ int nfs_init_server_rpcclient(struct nfs_server *server,
sizeof(server->client->cl_timeout_default));
server->client->cl_timeout = &server->client->cl_timeout_default;
server->client->cl_softrtry = 0;
+ if (server->flags & NFS_MOUNT_SOFTERR)
+ server->client->cl_softerr = 1;
if (server->flags & NFS_MOUNT_SOFT)
server->client->cl_softrtry = 1;
@@ -652,6 +660,7 @@ static int nfs_init_server(struct nfs_server *server,
.proto = data->nfs_server.protocol,
.net = data->net,
.timeparms = &timeparms,
+ .cred = server->cred,
};
struct nfs_client *clp;
int error;
@@ -920,6 +929,7 @@ void nfs_free_server(struct nfs_server *server)
ida_destroy(&server->lockowner_id);
ida_destroy(&server->openowner_id);
nfs_free_iostats(server->io_stats);
+ put_cred(server->cred);
kfree(server);
nfs_release_automount_timer();
}
@@ -940,6 +950,8 @@ struct nfs_server *nfs_create_server(struct nfs_mount_info *mount_info,
if (!server)
return ERR_PTR(-ENOMEM);
+ server->cred = get_cred(current_cred());
+
error = -ENOMEM;
fattr = nfs_alloc_fattr();
if (fattr == NULL)
@@ -1006,6 +1018,8 @@ struct nfs_server *nfs_clone_server(struct nfs_server *source,
if (!server)
return ERR_PTR(-ENOMEM);
+ server->cred = get_cred(source->cred);
+
error = -ENOMEM;
fattr_fsinfo = nfs_alloc_fattr();
if (fattr_fsinfo == NULL)
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 2f6b447cdd82..8b78274e3e56 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -1034,6 +1034,18 @@ void nfs_mark_test_expired_all_delegations(struct nfs_client *clp)
}
/**
+ * nfs_test_expired_all_delegations - test all delegations for a client
+ * @clp: nfs_client to process
+ *
+ * Helper for handling "recallable state revoked" status from server.
+ */
+void nfs_test_expired_all_delegations(struct nfs_client *clp)
+{
+ nfs_mark_test_expired_all_delegations(clp);
+ nfs4_schedule_state_manager(clp);
+}
+
+/**
* nfs_reap_expired_delegations - reap expired delegations
* @clp: nfs_client to process
*
diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h
index 35b4b02c1ae0..5799777df5ec 100644
--- a/fs/nfs/delegation.h
+++ b/fs/nfs/delegation.h
@@ -58,6 +58,7 @@ void nfs_delegation_mark_reclaim(struct nfs_client *clp);
void nfs_delegation_reap_unclaimed(struct nfs_client *clp);
void nfs_mark_test_expired_all_delegations(struct nfs_client *clp);
+void nfs_test_expired_all_delegations(struct nfs_client *clp);
void nfs_reap_expired_delegations(struct nfs_client *clp);
/* NFSv4 delegation-related procedures */
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index a71d0b42d160..47d445bec8c9 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -714,8 +714,9 @@ out:
* We only need to convert from xdr once so future lookups are much simpler
*/
static
-int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page* page)
+int nfs_readdir_filler(void *data, struct page* page)
{
+ nfs_readdir_descriptor_t *desc = data;
struct inode *inode = file_inode(desc->file);
int ret;
@@ -762,8 +763,8 @@ void cache_page_release(nfs_readdir_descriptor_t *desc)
static
struct page *get_cache_page(nfs_readdir_descriptor_t *desc)
{
- return read_cache_page(desc->file->f_mapping,
- desc->page_index, (filler_t *)nfs_readdir_filler, desc);
+ return read_cache_page(desc->file->f_mapping, desc->page_index,
+ nfs_readdir_filler, desc);
}
/*
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 0fd811ac08b5..2436bd92bc00 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -492,7 +492,7 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
struct nfs_page *req;
unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase);
/* XXX do we need to do the eof zeroing found in async_filler? */
- req = nfs_create_request(dreq->ctx, pagevec[i], NULL,
+ req = nfs_create_request(dreq->ctx, pagevec[i],
pgbase, req_len);
if (IS_ERR(req)) {
result = PTR_ERR(req);
@@ -663,6 +663,8 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
}
list_for_each_entry_safe(req, tmp, &reqs, wb_list) {
+ /* Bump the transmission count */
+ req->wb_nio++;
if (!nfs_pageio_add_request(&desc, req)) {
nfs_list_move_request(req, &failed);
spin_lock(&cinfo.inode->i_lock);
@@ -703,6 +705,11 @@ static void nfs_direct_commit_complete(struct nfs_commit_data *data)
req = nfs_list_entry(data->pages.next);
nfs_list_remove_request(req);
if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES) {
+ /*
+ * Despite the reboot, the write was successful,
+ * so reset wb_nio.
+ */
+ req->wb_nio = 0;
/* Note the rewrite will go through mds */
nfs_mark_request_commit(req, NULL, &cinfo, 0);
} else
@@ -899,7 +906,7 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
struct nfs_page *req;
unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase);
- req = nfs_create_request(dreq->ctx, pagevec[i], NULL,
+ req = nfs_create_request(dreq->ctx, pagevec[i],
pgbase, req_len);
if (IS_ERR(req)) {
result = PTR_ERR(req);
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 4899b85f9b3c..144e183250c3 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -147,7 +147,7 @@ nfs_file_flush(struct file *file, fl_owner_t id)
return 0;
/* Flush writes to the server and return any errors */
- return vfs_fsync(file, 0);
+ return nfs_wb_all(inode);
}
ssize_t
@@ -199,13 +199,6 @@ EXPORT_SYMBOL_GPL(nfs_file_mmap);
* Flush any dirty pages for this process, and check for write errors.
* The return status from this call provides a reliable indication of
* whether any write errors occurred for this process.
- *
- * Notice that it clears the NFS_CONTEXT_ERROR_WRITE before synching to
- * disk, but it retrieves and clears ctx->error after synching, despite
- * the two being set at the same time in nfs_context_set_write_error().
- * This is because the former is used to notify the _next_ call to
- * nfs_file_write() that a write error occurred, and hence cause it to
- * fall back to doing a synchronous write.
*/
static int
nfs_file_fsync_commit(struct file *file, int datasync)
@@ -220,11 +213,8 @@ nfs_file_fsync_commit(struct file *file, int datasync)
nfs_inc_stats(inode, NFSIOS_VFSFSYNC);
do_resend = test_and_clear_bit(NFS_CONTEXT_RESEND_WRITES, &ctx->flags);
status = nfs_commit_inode(inode, FLUSH_SYNC);
- if (test_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags)) {
- ret = xchg(&ctx->error, 0);
- if (ret)
- goto out;
- }
+ if (status == 0)
+ status = file_check_and_advance_wb_err(file);
if (status < 0) {
ret = status;
goto out;
@@ -245,13 +235,7 @@ nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync)
trace_nfs_fsync_enter(inode);
do {
- struct nfs_open_context *ctx = nfs_file_open_context(file);
- ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
- if (test_and_clear_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags)) {
- int ret2 = xchg(&ctx->error, 0);
- if (ret2)
- ret = ret2;
- }
+ ret = file_write_and_wait_range(file, start, end);
if (ret != 0)
break;
ret = nfs_file_fsync_commit(file, datasync);
@@ -600,8 +584,7 @@ static int nfs_need_check_write(struct file *filp, struct inode *inode)
struct nfs_open_context *ctx;
ctx = nfs_file_open_context(filp);
- if (test_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags) ||
- nfs_ctx_key_to_expire(ctx, inode))
+ if (nfs_ctx_key_to_expire(ctx, inode))
return 1;
return 0;
}
@@ -655,7 +638,7 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from)
/* Return error values */
if (nfs_need_check_write(file, inode)) {
- int err = vfs_fsync(file, 0);
+ int err = nfs_wb_all(inode);
if (err < 0)
result = err;
}
@@ -709,7 +692,7 @@ do_unlk(struct file *filp, int cmd, struct file_lock *fl, int is_local)
* Flush all pending writes before doing anything
* with locks..
*/
- vfs_fsync(filp, 0);
+ nfs_wb_all(inode);
l_ctx = nfs_get_lock_context(nfs_file_open_context(filp));
if (!IS_ERR(l_ctx)) {
diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c
index 61f46facb39c..3cb073c50fa6 100644
--- a/fs/nfs/filelayout/filelayout.c
+++ b/fs/nfs/filelayout/filelayout.c
@@ -904,7 +904,7 @@ fl_pnfs_update_layout(struct inode *ino,
status = filelayout_check_deviceid(lo, fl, gfp_flags);
if (status) {
pnfs_put_lseg(lseg);
- lseg = ERR_PTR(status);
+ lseg = NULL;
}
out:
return lseg;
@@ -917,7 +917,7 @@ filelayout_pg_init_read(struct nfs_pageio_descriptor *pgio,
pnfs_generic_pg_check_layout(pgio);
if (!pgio->pg_lseg) {
pgio->pg_lseg = fl_pnfs_update_layout(pgio->pg_inode,
- req->wb_context,
+ nfs_req_openctx(req),
0,
NFS4_MAX_UINT64,
IOMODE_READ,
@@ -944,7 +944,7 @@ filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio,
pnfs_generic_pg_check_layout(pgio);
if (!pgio->pg_lseg) {
pgio->pg_lseg = fl_pnfs_update_layout(pgio->pg_inode,
- req->wb_context,
+ nfs_req_openctx(req),
0,
NFS4_MAX_UINT64,
IOMODE_RW,
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index 6673d4ff5a2a..9920c52bd0cd 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -28,6 +28,8 @@
#define FF_LAYOUT_POLL_RETRY_MAX (15*HZ)
#define FF_LAYOUTRETURN_MAXERR 20
+static unsigned short io_maxretrans;
+
static void ff_layout_read_record_layoutstats_done(struct rpc_task *task,
struct nfs_pgio_header *hdr);
static int ff_layout_mirror_prepare_stats(struct pnfs_layout_hdr *lo,
@@ -871,7 +873,7 @@ ff_layout_pg_get_read(struct nfs_pageio_descriptor *pgio,
{
pnfs_put_lseg(pgio->pg_lseg);
pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
- req->wb_context,
+ nfs_req_openctx(req),
0,
NFS4_MAX_UINT64,
IOMODE_READ,
@@ -925,6 +927,7 @@ retry:
pgm = &pgio->pg_mirrors[0];
pgm->pg_bsize = mirror->mirror_ds->ds_versions[0].rsize;
+ pgio->pg_maxretrans = io_maxretrans;
return;
out_nolseg:
if (pgio->pg_error < 0)
@@ -950,7 +953,7 @@ retry:
pnfs_generic_pg_check_layout(pgio);
if (!pgio->pg_lseg) {
pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
- req->wb_context,
+ nfs_req_openctx(req),
0,
NFS4_MAX_UINT64,
IOMODE_RW,
@@ -992,6 +995,7 @@ retry:
pgm->pg_bsize = mirror->mirror_ds->ds_versions[0].wsize;
}
+ pgio->pg_maxretrans = io_maxretrans;
return;
out_mds:
@@ -1006,7 +1010,7 @@ ff_layout_pg_get_mirror_count_write(struct nfs_pageio_descriptor *pgio,
{
if (!pgio->pg_lseg) {
pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
- req->wb_context,
+ nfs_req_openctx(req),
0,
NFS4_MAX_UINT64,
IOMODE_RW,
@@ -2515,3 +2519,7 @@ MODULE_DESCRIPTION("The NFSv4 flexfile layout driver");
module_init(nfs4flexfilelayout_init);
module_exit(nfs4flexfilelayout_exit);
+
+module_param(io_maxretrans, ushort, 0644);
+MODULE_PARM_DESC(io_maxretrans, "The number of times the NFSv4.1 client "
+ "retries an I/O request before returning an error. ");
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index f61af8307dc8..3bc2550cfe4e 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -885,10 +885,14 @@ struct nfs_lock_context *nfs_get_lock_context(struct nfs_open_context *ctx)
spin_lock(&inode->i_lock);
res = __nfs_find_lock_context(ctx);
if (res == NULL) {
- list_add_tail_rcu(&new->list, &ctx->lock_context.list);
- new->open_context = ctx;
- res = new;
- new = NULL;
+ new->open_context = get_nfs_open_context(ctx);
+ if (new->open_context) {
+ list_add_tail_rcu(&new->list,
+ &ctx->lock_context.list);
+ res = new;
+ new = NULL;
+ } else
+ res = ERR_PTR(-EBADF);
}
spin_unlock(&inode->i_lock);
kfree(new);
@@ -906,6 +910,7 @@ void nfs_put_lock_context(struct nfs_lock_context *l_ctx)
return;
list_del_rcu(&l_ctx->list);
spin_unlock(&inode->i_lock);
+ put_nfs_open_context(ctx);
kfree_rcu(l_ctx, rcu_head);
}
EXPORT_SYMBOL_GPL(nfs_put_lock_context);
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 331a0504eaf8..498fab72f70b 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -84,6 +84,7 @@ struct nfs_client_initdata {
u32 minorversion;
struct net *net;
const struct rpc_timeout *timeparms;
+ const struct cred *cred;
};
/*
@@ -766,15 +767,10 @@ static inline bool nfs_error_is_fatal(int err)
case -ESTALE:
case -E2BIG:
case -ENOMEM:
+ case -ETIMEDOUT:
return true;
default:
return false;
}
}
-static inline void nfs_context_set_write_error(struct nfs_open_context *ctx, int error)
-{
- ctx->error = error;
- smp_wmb();
- set_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags);
-}
diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c
index d979ff4fee7e..cb7c10e9721e 100644
--- a/fs/nfs/mount_clnt.c
+++ b/fs/nfs/mount_clnt.c
@@ -163,6 +163,7 @@ int nfs_mount(struct nfs_mount_request *info)
.program = &mnt_program,
.version = info->version,
.authflavor = RPC_AUTH_UNIX,
+ .cred = current_cred(),
};
struct rpc_clnt *mnt_clnt;
int status;
@@ -249,6 +250,7 @@ void nfs_umount(const struct nfs_mount_request *info)
.version = info->version,
.authflavor = RPC_AUTH_UNIX,
.flags = RPC_CLNT_CREATE_NOPING,
+ .cred = current_cred(),
};
struct rpc_message msg = {
.rpc_argp = info->dirpath,
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index a7ed29de0a40..572794dab4b1 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -76,6 +76,20 @@ static int nfs_stat_to_errno(enum nfs_stat);
* or decoded inline.
*/
+static struct user_namespace *rpc_userns(const struct rpc_clnt *clnt)
+{
+ if (clnt && clnt->cl_cred)
+ return clnt->cl_cred->user_ns;
+ return &init_user_ns;
+}
+
+static struct user_namespace *rpc_rqst_userns(const struct rpc_rqst *rqstp)
+{
+ if (rqstp->rq_task)
+ return rpc_userns(rqstp->rq_task->tk_client);
+ return &init_user_ns;
+}
+
/*
* typedef opaque nfsdata<>;
*/
@@ -248,7 +262,8 @@ static __be32 *xdr_decode_time(__be32 *p, struct timespec *timep)
* };
*
*/
-static int decode_fattr(struct xdr_stream *xdr, struct nfs_fattr *fattr)
+static int decode_fattr(struct xdr_stream *xdr, struct nfs_fattr *fattr,
+ struct user_namespace *userns)
{
u32 rdev, type;
__be32 *p;
@@ -263,10 +278,10 @@ static int decode_fattr(struct xdr_stream *xdr, struct nfs_fattr *fattr)
fattr->mode = be32_to_cpup(p++);
fattr->nlink = be32_to_cpup(p++);
- fattr->uid = make_kuid(&init_user_ns, be32_to_cpup(p++));
+ fattr->uid = make_kuid(userns, be32_to_cpup(p++));
if (!uid_valid(fattr->uid))
goto out_uid;
- fattr->gid = make_kgid(&init_user_ns, be32_to_cpup(p++));
+ fattr->gid = make_kgid(userns, be32_to_cpup(p++));
if (!gid_valid(fattr->gid))
goto out_gid;
@@ -321,7 +336,8 @@ static __be32 *xdr_time_not_set(__be32 *p)
return p;
}
-static void encode_sattr(struct xdr_stream *xdr, const struct iattr *attr)
+static void encode_sattr(struct xdr_stream *xdr, const struct iattr *attr,
+ struct user_namespace *userns)
{
struct timespec ts;
__be32 *p;
@@ -333,11 +349,11 @@ static void encode_sattr(struct xdr_stream *xdr, const struct iattr *attr)
else
*p++ = cpu_to_be32(NFS2_SATTR_NOT_SET);
if (attr->ia_valid & ATTR_UID)
- *p++ = cpu_to_be32(from_kuid(&init_user_ns, attr->ia_uid));
+ *p++ = cpu_to_be32(from_kuid_munged(userns, attr->ia_uid));
else
*p++ = cpu_to_be32(NFS2_SATTR_NOT_SET);
if (attr->ia_valid & ATTR_GID)
- *p++ = cpu_to_be32(from_kgid(&init_user_ns, attr->ia_gid));
+ *p++ = cpu_to_be32(from_kgid_munged(userns, attr->ia_gid));
else
*p++ = cpu_to_be32(NFS2_SATTR_NOT_SET);
if (attr->ia_valid & ATTR_SIZE)
@@ -451,7 +467,8 @@ out_cheating:
* };
*/
static int decode_attrstat(struct xdr_stream *xdr, struct nfs_fattr *result,
- __u32 *op_status)
+ __u32 *op_status,
+ struct user_namespace *userns)
{
enum nfs_stat status;
int error;
@@ -463,7 +480,7 @@ static int decode_attrstat(struct xdr_stream *xdr, struct nfs_fattr *result,
*op_status = status;
if (status != NFS_OK)
goto out_default;
- error = decode_fattr(xdr, result);
+ error = decode_fattr(xdr, result, userns);
out:
return error;
out_default:
@@ -498,19 +515,21 @@ static void encode_diropargs(struct xdr_stream *xdr, const struct nfs_fh *fh,
* void;
* };
*/
-static int decode_diropok(struct xdr_stream *xdr, struct nfs_diropok *result)
+static int decode_diropok(struct xdr_stream *xdr, struct nfs_diropok *result,
+ struct user_namespace *userns)
{
int error;
error = decode_fhandle(xdr, result->fh);
if (unlikely(error))
goto out;
- error = decode_fattr(xdr, result->fattr);
+ error = decode_fattr(xdr, result->fattr, userns);
out:
return error;
}
-static int decode_diropres(struct xdr_stream *xdr, struct nfs_diropok *result)
+static int decode_diropres(struct xdr_stream *xdr, struct nfs_diropok *result,
+ struct user_namespace *userns)
{
enum nfs_stat status;
int error;
@@ -520,7 +539,7 @@ static int decode_diropres(struct xdr_stream *xdr, struct nfs_diropok *result)
goto out;
if (status != NFS_OK)
goto out_default;
- error = decode_diropok(xdr, result);
+ error = decode_diropok(xdr, result, userns);
out:
return error;
out_default:
@@ -559,7 +578,7 @@ static void nfs2_xdr_enc_sattrargs(struct rpc_rqst *req,
const struct nfs_sattrargs *args = data;
encode_fhandle(xdr, args->fh);
- encode_sattr(xdr, args->sattr);
+ encode_sattr(xdr, args->sattr, rpc_rqst_userns(req));
}
static void nfs2_xdr_enc_diropargs(struct rpc_rqst *req,
@@ -674,7 +693,7 @@ static void nfs2_xdr_enc_createargs(struct rpc_rqst *req,
const struct nfs_createargs *args = data;
encode_diropargs(xdr, args->fh, args->name, args->len);
- encode_sattr(xdr, args->sattr);
+ encode_sattr(xdr, args->sattr, rpc_rqst_userns(req));
}
static void nfs2_xdr_enc_removeargs(struct rpc_rqst *req,
@@ -741,7 +760,7 @@ static void nfs2_xdr_enc_symlinkargs(struct rpc_rqst *req,
encode_diropargs(xdr, args->fromfh, args->fromname, args->fromlen);
encode_path(xdr, args->pages, args->pathlen);
- encode_sattr(xdr, args->sattr);
+ encode_sattr(xdr, args->sattr, rpc_rqst_userns(req));
}
/*
@@ -803,13 +822,13 @@ out_default:
static int nfs2_xdr_dec_attrstat(struct rpc_rqst *req, struct xdr_stream *xdr,
void *result)
{
- return decode_attrstat(xdr, result, NULL);
+ return decode_attrstat(xdr, result, NULL, rpc_rqst_userns(req));
}
static int nfs2_xdr_dec_diropres(struct rpc_rqst *req, struct xdr_stream *xdr,
void *result)
{
- return decode_diropres(xdr, result);
+ return decode_diropres(xdr, result, rpc_rqst_userns(req));
}
/*
@@ -864,7 +883,7 @@ static int nfs2_xdr_dec_readres(struct rpc_rqst *req, struct xdr_stream *xdr,
result->op_status = status;
if (status != NFS_OK)
goto out_default;
- error = decode_fattr(xdr, result->fattr);
+ error = decode_fattr(xdr, result->fattr, rpc_rqst_userns(req));
if (unlikely(error))
goto out;
error = decode_nfsdata(xdr, result);
@@ -881,7 +900,8 @@ static int nfs2_xdr_dec_writeres(struct rpc_rqst *req, struct xdr_stream *xdr,
/* All NFSv2 writes are "file sync" writes */
result->verf->committed = NFS_FILE_SYNC;
- return decode_attrstat(xdr, result->fattr, &result->op_status);
+ return decode_attrstat(xdr, result->fattr, &result->op_status,
+ rpc_rqst_userns(req));
}
/**
diff --git a/fs/nfs/nfs3client.c b/fs/nfs/nfs3client.c
index 7879f2a0fcfd..1afdb0f7473f 100644
--- a/fs/nfs/nfs3client.c
+++ b/fs/nfs/nfs3client.c
@@ -91,6 +91,7 @@ struct nfs_client *nfs3_set_ds_client(struct nfs_server *mds_srv,
.proto = ds_proto,
.net = mds_clp->cl_net,
.timeparms = &ds_timeout,
+ .cred = mds_srv->cred,
};
struct nfs_client *clp;
char buf[INET6_ADDRSTRLEN + 1];
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index 110358f4986d..abbbdde97e31 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -104,6 +104,20 @@ static const umode_t nfs_type2fmt[] = {
[NF3FIFO] = S_IFIFO,
};
+static struct user_namespace *rpc_userns(const struct rpc_clnt *clnt)
+{
+ if (clnt && clnt->cl_cred)
+ return clnt->cl_cred->user_ns;
+ return &init_user_ns;
+}
+
+static struct user_namespace *rpc_rqst_userns(const struct rpc_rqst *rqstp)
+{
+ if (rqstp->rq_task)
+ return rpc_userns(rqstp->rq_task->tk_client);
+ return &init_user_ns;
+}
+
/*
* Encode/decode NFSv3 basic data types
*
@@ -516,7 +530,8 @@ static __be32 *xdr_decode_nfstime3(__be32 *p, struct timespec *timep)
* set_mtime mtime;
* };
*/
-static void encode_sattr3(struct xdr_stream *xdr, const struct iattr *attr)
+static void encode_sattr3(struct xdr_stream *xdr, const struct iattr *attr,
+ struct user_namespace *userns)
{
struct timespec ts;
u32 nbytes;
@@ -551,13 +566,13 @@ static void encode_sattr3(struct xdr_stream *xdr, const struct iattr *attr)
if (attr->ia_valid & ATTR_UID) {
*p++ = xdr_one;
- *p++ = cpu_to_be32(from_kuid(&init_user_ns, attr->ia_uid));
+ *p++ = cpu_to_be32(from_kuid_munged(userns, attr->ia_uid));
} else
*p++ = xdr_zero;
if (attr->ia_valid & ATTR_GID) {
*p++ = xdr_one;
- *p++ = cpu_to_be32(from_kgid(&init_user_ns, attr->ia_gid));
+ *p++ = cpu_to_be32(from_kgid_munged(userns, attr->ia_gid));
} else
*p++ = xdr_zero;
@@ -606,7 +621,8 @@ static void encode_sattr3(struct xdr_stream *xdr, const struct iattr *attr)
* nfstime3 ctime;
* };
*/
-static int decode_fattr3(struct xdr_stream *xdr, struct nfs_fattr *fattr)
+static int decode_fattr3(struct xdr_stream *xdr, struct nfs_fattr *fattr,
+ struct user_namespace *userns)
{
umode_t fmode;
__be32 *p;
@@ -619,10 +635,10 @@ static int decode_fattr3(struct xdr_stream *xdr, struct nfs_fattr *fattr)
fattr->mode = (be32_to_cpup(p++) & ~S_IFMT) | fmode;
fattr->nlink = be32_to_cpup(p++);
- fattr->uid = make_kuid(&init_user_ns, be32_to_cpup(p++));
+ fattr->uid = make_kuid(userns, be32_to_cpup(p++));
if (!uid_valid(fattr->uid))
goto out_uid;
- fattr->gid = make_kgid(&init_user_ns, be32_to_cpup(p++));
+ fattr->gid = make_kgid(userns, be32_to_cpup(p++));
if (!gid_valid(fattr->gid))
goto out_gid;
@@ -659,7 +675,8 @@ out_gid:
* void;
* };
*/
-static int decode_post_op_attr(struct xdr_stream *xdr, struct nfs_fattr *fattr)
+static int decode_post_op_attr(struct xdr_stream *xdr, struct nfs_fattr *fattr,
+ struct user_namespace *userns)
{
__be32 *p;
@@ -667,7 +684,7 @@ static int decode_post_op_attr(struct xdr_stream *xdr, struct nfs_fattr *fattr)
if (unlikely(!p))
return -EIO;
if (*p != xdr_zero)
- return decode_fattr3(xdr, fattr);
+ return decode_fattr3(xdr, fattr, userns);
return 0;
}
@@ -728,14 +745,15 @@ static int decode_pre_op_attr(struct xdr_stream *xdr, struct nfs_fattr *fattr)
return 0;
}
-static int decode_wcc_data(struct xdr_stream *xdr, struct nfs_fattr *fattr)
+static int decode_wcc_data(struct xdr_stream *xdr, struct nfs_fattr *fattr,
+ struct user_namespace *userns)
{
int error;
error = decode_pre_op_attr(xdr, fattr);
if (unlikely(error))
goto out;
- error = decode_post_op_attr(xdr, fattr);
+ error = decode_post_op_attr(xdr, fattr, userns);
out:
return error;
}
@@ -837,7 +855,7 @@ static void nfs3_xdr_enc_setattr3args(struct rpc_rqst *req,
{
const struct nfs3_sattrargs *args = data;
encode_nfs_fh3(xdr, args->fh);
- encode_sattr3(xdr, args->sattr);
+ encode_sattr3(xdr, args->sattr, rpc_rqst_userns(req));
encode_sattrguard3(xdr, args);
}
@@ -998,13 +1016,14 @@ static void nfs3_xdr_enc_write3args(struct rpc_rqst *req,
* };
*/
static void encode_createhow3(struct xdr_stream *xdr,
- const struct nfs3_createargs *args)
+ const struct nfs3_createargs *args,
+ struct user_namespace *userns)
{
encode_uint32(xdr, args->createmode);
switch (args->createmode) {
case NFS3_CREATE_UNCHECKED:
case NFS3_CREATE_GUARDED:
- encode_sattr3(xdr, args->sattr);
+ encode_sattr3(xdr, args->sattr, userns);
break;
case NFS3_CREATE_EXCLUSIVE:
encode_createverf3(xdr, args->verifier);
@@ -1021,7 +1040,7 @@ static void nfs3_xdr_enc_create3args(struct rpc_rqst *req,
const struct nfs3_createargs *args = data;
encode_diropargs3(xdr, args->fh, args->name, args->len);
- encode_createhow3(xdr, args);
+ encode_createhow3(xdr, args, rpc_rqst_userns(req));
}
/*
@@ -1039,7 +1058,7 @@ static void nfs3_xdr_enc_mkdir3args(struct rpc_rqst *req,
const struct nfs3_mkdirargs *args = data;
encode_diropargs3(xdr, args->fh, args->name, args->len);
- encode_sattr3(xdr, args->sattr);
+ encode_sattr3(xdr, args->sattr, rpc_rqst_userns(req));
}
/*
@@ -1056,11 +1075,12 @@ static void nfs3_xdr_enc_mkdir3args(struct rpc_rqst *req,
* };
*/
static void encode_symlinkdata3(struct xdr_stream *xdr,
- const void *data)
+ const void *data,
+ struct user_namespace *userns)
{
const struct nfs3_symlinkargs *args = data;
- encode_sattr3(xdr, args->sattr);
+ encode_sattr3(xdr, args->sattr, userns);
encode_nfspath3(xdr, args->pages, args->pathlen);
}
@@ -1071,7 +1091,7 @@ static void nfs3_xdr_enc_symlink3args(struct rpc_rqst *req,
const struct nfs3_symlinkargs *args = data;
encode_diropargs3(xdr, args->fromfh, args->fromname, args->fromlen);
- encode_symlinkdata3(xdr, args);
+ encode_symlinkdata3(xdr, args, rpc_rqst_userns(req));
xdr->buf->flags |= XDRBUF_WRITE;
}
@@ -1100,24 +1120,26 @@ static void nfs3_xdr_enc_symlink3args(struct rpc_rqst *req,
* };
*/
static void encode_devicedata3(struct xdr_stream *xdr,
- const struct nfs3_mknodargs *args)
+ const struct nfs3_mknodargs *args,
+ struct user_namespace *userns)
{
- encode_sattr3(xdr, args->sattr);
+ encode_sattr3(xdr, args->sattr, userns);
encode_specdata3(xdr, args->rdev);
}
static void encode_mknoddata3(struct xdr_stream *xdr,
- const struct nfs3_mknodargs *args)
+ const struct nfs3_mknodargs *args,
+ struct user_namespace *userns)
{
encode_ftype3(xdr, args->type);
switch (args->type) {
case NF3CHR:
case NF3BLK:
- encode_devicedata3(xdr, args);
+ encode_devicedata3(xdr, args, userns);
break;
case NF3SOCK:
case NF3FIFO:
- encode_sattr3(xdr, args->sattr);
+ encode_sattr3(xdr, args->sattr, userns);
break;
case NF3REG:
case NF3DIR:
@@ -1134,7 +1156,7 @@ static void nfs3_xdr_enc_mknod3args(struct rpc_rqst *req,
const struct nfs3_mknodargs *args = data;
encode_diropargs3(xdr, args->fh, args->name, args->len);
- encode_mknoddata3(xdr, args);
+ encode_mknoddata3(xdr, args, rpc_rqst_userns(req));
}
/*
@@ -1379,7 +1401,7 @@ static int nfs3_xdr_dec_getattr3res(struct rpc_rqst *req,
goto out;
if (status != NFS3_OK)
goto out_default;
- error = decode_fattr3(xdr, result);
+ error = decode_fattr3(xdr, result, rpc_rqst_userns(req));
out:
return error;
out_default:
@@ -1414,7 +1436,7 @@ static int nfs3_xdr_dec_setattr3res(struct rpc_rqst *req,
error = decode_nfsstat3(xdr, &status);
if (unlikely(error))
goto out;
- error = decode_wcc_data(xdr, result);
+ error = decode_wcc_data(xdr, result, rpc_rqst_userns(req));
if (unlikely(error))
goto out;
if (status != NFS3_OK)
@@ -1449,6 +1471,7 @@ static int nfs3_xdr_dec_lookup3res(struct rpc_rqst *req,
struct xdr_stream *xdr,
void *data)
{
+ struct user_namespace *userns = rpc_rqst_userns(req);
struct nfs3_diropres *result = data;
enum nfs_stat status;
int error;
@@ -1461,14 +1484,14 @@ static int nfs3_xdr_dec_lookup3res(struct rpc_rqst *req,
error = decode_nfs_fh3(xdr, result->fh);
if (unlikely(error))
goto out;
- error = decode_post_op_attr(xdr, result->fattr);
+ error = decode_post_op_attr(xdr, result->fattr, userns);
if (unlikely(error))
goto out;
- error = decode_post_op_attr(xdr, result->dir_attr);
+ error = decode_post_op_attr(xdr, result->dir_attr, userns);
out:
return error;
out_default:
- error = decode_post_op_attr(xdr, result->dir_attr);
+ error = decode_post_op_attr(xdr, result->dir_attr, userns);
if (unlikely(error))
goto out;
return nfs3_stat_to_errno(status);
@@ -1504,7 +1527,7 @@ static int nfs3_xdr_dec_access3res(struct rpc_rqst *req,
error = decode_nfsstat3(xdr, &status);
if (unlikely(error))
goto out;
- error = decode_post_op_attr(xdr, result->fattr);
+ error = decode_post_op_attr(xdr, result->fattr, rpc_rqst_userns(req));
if (unlikely(error))
goto out;
if (status != NFS3_OK)
@@ -1545,7 +1568,7 @@ static int nfs3_xdr_dec_readlink3res(struct rpc_rqst *req,
error = decode_nfsstat3(xdr, &status);
if (unlikely(error))
goto out;
- error = decode_post_op_attr(xdr, result);
+ error = decode_post_op_attr(xdr, result, rpc_rqst_userns(req));
if (unlikely(error))
goto out;
if (status != NFS3_OK)
@@ -1623,7 +1646,7 @@ static int nfs3_xdr_dec_read3res(struct rpc_rqst *req, struct xdr_stream *xdr,
error = decode_nfsstat3(xdr, &status);
if (unlikely(error))
goto out;
- error = decode_post_op_attr(xdr, result->fattr);
+ error = decode_post_op_attr(xdr, result->fattr, rpc_rqst_userns(req));
if (unlikely(error))
goto out;
result->op_status = status;
@@ -1694,7 +1717,7 @@ static int nfs3_xdr_dec_write3res(struct rpc_rqst *req, struct xdr_stream *xdr,
error = decode_nfsstat3(xdr, &status);
if (unlikely(error))
goto out;
- error = decode_wcc_data(xdr, result->fattr);
+ error = decode_wcc_data(xdr, result->fattr, rpc_rqst_userns(req));
if (unlikely(error))
goto out;
result->op_status = status;
@@ -1728,14 +1751,15 @@ out_status:
* };
*/
static int decode_create3resok(struct xdr_stream *xdr,
- struct nfs3_diropres *result)
+ struct nfs3_diropres *result,
+ struct user_namespace *userns)
{
int error;
error = decode_post_op_fh3(xdr, result->fh);
if (unlikely(error))
goto out;
- error = decode_post_op_attr(xdr, result->fattr);
+ error = decode_post_op_attr(xdr, result->fattr, userns);
if (unlikely(error))
goto out;
/* The server isn't required to return a file handle.
@@ -1744,7 +1768,7 @@ static int decode_create3resok(struct xdr_stream *xdr,
* values for the new object. */
if (result->fh->size == 0)
result->fattr->valid = 0;
- error = decode_wcc_data(xdr, result->dir_attr);
+ error = decode_wcc_data(xdr, result->dir_attr, userns);
out:
return error;
}
@@ -1753,6 +1777,7 @@ static int nfs3_xdr_dec_create3res(struct rpc_rqst *req,
struct xdr_stream *xdr,
void *data)
{
+ struct user_namespace *userns = rpc_rqst_userns(req);
struct nfs3_diropres *result = data;
enum nfs_stat status;
int error;
@@ -1762,11 +1787,11 @@ static int nfs3_xdr_dec_create3res(struct rpc_rqst *req,
goto out;
if (status != NFS3_OK)
goto out_default;
- error = decode_create3resok(xdr, result);
+ error = decode_create3resok(xdr, result, userns);
out:
return error;
out_default:
- error = decode_wcc_data(xdr, result->dir_attr);
+ error = decode_wcc_data(xdr, result->dir_attr, userns);
if (unlikely(error))
goto out;
return nfs3_stat_to_errno(status);
@@ -1801,7 +1826,7 @@ static int nfs3_xdr_dec_remove3res(struct rpc_rqst *req,
error = decode_nfsstat3(xdr, &status);
if (unlikely(error))
goto out;
- error = decode_wcc_data(xdr, result->dir_attr);
+ error = decode_wcc_data(xdr, result->dir_attr, rpc_rqst_userns(req));
if (unlikely(error))
goto out;
if (status != NFS3_OK)
@@ -1836,6 +1861,7 @@ static int nfs3_xdr_dec_rename3res(struct rpc_rqst *req,
struct xdr_stream *xdr,
void *data)
{
+ struct user_namespace *userns = rpc_rqst_userns(req);
struct nfs_renameres *result = data;
enum nfs_stat status;
int error;
@@ -1843,10 +1869,10 @@ static int nfs3_xdr_dec_rename3res(struct rpc_rqst *req,
error = decode_nfsstat3(xdr, &status);
if (unlikely(error))
goto out;
- error = decode_wcc_data(xdr, result->old_fattr);
+ error = decode_wcc_data(xdr, result->old_fattr, userns);
if (unlikely(error))
goto out;
- error = decode_wcc_data(xdr, result->new_fattr);
+ error = decode_wcc_data(xdr, result->new_fattr, userns);
if (unlikely(error))
goto out;
if (status != NFS3_OK)
@@ -1880,6 +1906,7 @@ out_status:
static int nfs3_xdr_dec_link3res(struct rpc_rqst *req, struct xdr_stream *xdr,
void *data)
{
+ struct user_namespace *userns = rpc_rqst_userns(req);
struct nfs3_linkres *result = data;
enum nfs_stat status;
int error;
@@ -1887,10 +1914,10 @@ static int nfs3_xdr_dec_link3res(struct rpc_rqst *req, struct xdr_stream *xdr,
error = decode_nfsstat3(xdr, &status);
if (unlikely(error))
goto out;
- error = decode_post_op_attr(xdr, result->fattr);
+ error = decode_post_op_attr(xdr, result->fattr, userns);
if (unlikely(error))
goto out;
- error = decode_wcc_data(xdr, result->dir_attr);
+ error = decode_wcc_data(xdr, result->dir_attr, userns);
if (unlikely(error))
goto out;
if (status != NFS3_OK)
@@ -1939,6 +1966,7 @@ out_status:
int nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
bool plus)
{
+ struct user_namespace *userns = rpc_userns(entry->server->client);
struct nfs_entry old = *entry;
__be32 *p;
int error;
@@ -1973,7 +2001,7 @@ int nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
if (plus) {
entry->fattr->valid = 0;
- error = decode_post_op_attr(xdr, entry->fattr);
+ error = decode_post_op_attr(xdr, entry->fattr, userns);
if (unlikely(error))
return error;
if (entry->fattr->valid & NFS_ATTR_FATTR_V3)
@@ -2045,11 +2073,12 @@ static int decode_dirlist3(struct xdr_stream *xdr)
}
static int decode_readdir3resok(struct xdr_stream *xdr,
- struct nfs3_readdirres *result)
+ struct nfs3_readdirres *result,
+ struct user_namespace *userns)
{
int error;
- error = decode_post_op_attr(xdr, result->dir_attr);
+ error = decode_post_op_attr(xdr, result->dir_attr, userns);
if (unlikely(error))
goto out;
/* XXX: do we need to check if result->verf != NULL ? */
@@ -2074,11 +2103,11 @@ static int nfs3_xdr_dec_readdir3res(struct rpc_rqst *req,
goto out;
if (status != NFS3_OK)
goto out_default;
- error = decode_readdir3resok(xdr, result);
+ error = decode_readdir3resok(xdr, result, rpc_rqst_userns(req));
out:
return error;
out_default:
- error = decode_post_op_attr(xdr, result->dir_attr);
+ error = decode_post_op_attr(xdr, result->dir_attr, rpc_rqst_userns(req));
if (unlikely(error))
goto out;
return nfs3_stat_to_errno(status);
@@ -2138,7 +2167,7 @@ static int nfs3_xdr_dec_fsstat3res(struct rpc_rqst *req,
error = decode_nfsstat3(xdr, &status);
if (unlikely(error))
goto out;
- error = decode_post_op_attr(xdr, result->fattr);
+ error = decode_post_op_attr(xdr, result->fattr, rpc_rqst_userns(req));
if (unlikely(error))
goto out;
if (status != NFS3_OK)
@@ -2212,7 +2241,7 @@ static int nfs3_xdr_dec_fsinfo3res(struct rpc_rqst *req,
error = decode_nfsstat3(xdr, &status);
if (unlikely(error))
goto out;
- error = decode_post_op_attr(xdr, result->fattr);
+ error = decode_post_op_attr(xdr, result->fattr, rpc_rqst_userns(req));
if (unlikely(error))
goto out;
if (status != NFS3_OK)
@@ -2273,7 +2302,7 @@ static int nfs3_xdr_dec_pathconf3res(struct rpc_rqst *req,
error = decode_nfsstat3(xdr, &status);
if (unlikely(error))
goto out;
- error = decode_post_op_attr(xdr, result->fattr);
+ error = decode_post_op_attr(xdr, result->fattr, rpc_rqst_userns(req));
if (unlikely(error))
goto out;
if (status != NFS3_OK)
@@ -2315,7 +2344,7 @@ static int nfs3_xdr_dec_commit3res(struct rpc_rqst *req,
error = decode_nfsstat3(xdr, &status);
if (unlikely(error))
goto out;
- error = decode_wcc_data(xdr, result->fattr);
+ error = decode_wcc_data(xdr, result->fattr, rpc_rqst_userns(req));
if (unlikely(error))
goto out;
result->op_status = status;
@@ -2331,14 +2360,15 @@ out_status:
#ifdef CONFIG_NFS_V3_ACL
static inline int decode_getacl3resok(struct xdr_stream *xdr,
- struct nfs3_getaclres *result)
+ struct nfs3_getaclres *result,
+ struct user_namespace *userns)
{
struct posix_acl **acl;
unsigned int *aclcnt;
size_t hdrlen;
int error;
- error = decode_post_op_attr(xdr, result->fattr);
+ error = decode_post_op_attr(xdr, result->fattr, userns);
if (unlikely(error))
goto out;
error = decode_uint32(xdr, &result->mask);
@@ -2386,7 +2416,7 @@ static int nfs3_xdr_dec_getacl3res(struct rpc_rqst *req,
goto out;
if (status != NFS3_OK)
goto out_default;
- error = decode_getacl3resok(xdr, result);
+ error = decode_getacl3resok(xdr, result, rpc_rqst_userns(req));
out:
return error;
out_default:
@@ -2405,7 +2435,7 @@ static int nfs3_xdr_dec_setacl3res(struct rpc_rqst *req,
goto out;
if (status != NFS3_OK)
goto out_default;
- error = decode_post_op_attr(xdr, result);
+ error = decode_post_op_attr(xdr, result, rpc_rqst_userns(req));
out:
return error;
out_default:
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 06ac3d9ac7c6..8a38a254f516 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -206,6 +206,7 @@ struct nfs4_exception {
unsigned char delay : 1,
recovering : 1,
retry : 1;
+ bool interruptible;
};
struct nfs4_state_recovery_ops {
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index 1339ede979af..3ce246346f02 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -870,6 +870,7 @@ static int nfs4_set_client(struct nfs_server *server,
.minorversion = minorversion,
.net = net,
.timeparms = timeparms,
+ .cred = server->cred,
};
struct nfs_client *clp;
@@ -931,6 +932,7 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_server *mds_srv,
.minorversion = minor_version,
.net = mds_clp->cl_net,
.timeparms = &ds_timeout,
+ .cred = mds_srv->cred,
};
char buf[INET6_ADDRSTRLEN + 1];
@@ -1107,6 +1109,8 @@ struct nfs_server *nfs4_create_server(struct nfs_mount_info *mount_info,
if (!server)
return ERR_PTR(-ENOMEM);
+ server->cred = get_cred(current_cred());
+
auth_probe = mount_info->parsed->auth_info.flavor_len < 1;
/* set up the general RPC client */
@@ -1143,6 +1147,8 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data,
parent_server = NFS_SB(data->sb);
parent_client = parent_server->nfs_client;
+ server->cred = get_cred(parent_server->cred);
+
/* Initialise the client representation from the parent server */
nfs_server_copy_userdata(server, parent_server);
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
index 00d17198ee12..cf42a8b939e3 100644
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c
@@ -125,7 +125,7 @@ nfs4_file_flush(struct file *file, fl_owner_t id)
return filemap_fdatawrite(file->f_mapping);
/* Flush writes to the server and return any errors */
- return vfs_fsync(file, 0);
+ return nfs_wb_all(inode);
}
#ifdef CONFIG_NFS_V4_2
@@ -187,7 +187,7 @@ static loff_t nfs42_remap_file_range(struct file *src_file, loff_t src_off,
bool same_inode = false;
int ret;
- if (remap_flags & ~REMAP_FILE_ADVISORY)
+ if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY))
return -EINVAL;
/* check alignment w.r.t. clone_blksize */
diff --git a/fs/nfs/nfs4idmap.c b/fs/nfs/nfs4idmap.c
index bf34ddaa2ad7..4884fdae28fb 100644
--- a/fs/nfs/nfs4idmap.c
+++ b/fs/nfs/nfs4idmap.c
@@ -69,8 +69,16 @@ struct idmap {
struct rpc_pipe *idmap_pipe;
struct idmap_legacy_upcalldata *idmap_upcall_data;
struct mutex idmap_mutex;
+ const struct cred *cred;
};
+static struct user_namespace *idmap_userns(const struct idmap *idmap)
+{
+ if (idmap && idmap->cred)
+ return idmap->cred->user_ns;
+ return &init_user_ns;
+}
+
/**
* nfs_fattr_init_names - initialise the nfs_fattr owner_name/group_name fields
* @fattr: fully initialised struct nfs_fattr
@@ -271,14 +279,15 @@ static struct key *nfs_idmap_request_key(const char *name, size_t namelen,
const char *type, struct idmap *idmap)
{
char *desc;
- struct key *rkey;
+ struct key *rkey = ERR_PTR(-EAGAIN);
ssize_t ret;
ret = nfs_idmap_get_desc(name, namelen, type, strlen(type), &desc);
if (ret < 0)
return ERR_PTR(ret);
- rkey = request_key(&key_type_id_resolver, desc, "");
+ if (!idmap->cred || idmap->cred->user_ns == &init_user_ns)
+ rkey = request_key(&key_type_id_resolver, desc, "");
if (IS_ERR(rkey)) {
mutex_lock(&idmap->idmap_mutex);
rkey = request_key_with_auxdata(&key_type_id_resolver_legacy,
@@ -452,6 +461,9 @@ nfs_idmap_new(struct nfs_client *clp)
if (idmap == NULL)
return -ENOMEM;
+ mutex_init(&idmap->idmap_mutex);
+ idmap->cred = get_cred(clp->cl_rpcclient->cl_cred);
+
rpc_init_pipe_dir_object(&idmap->idmap_pdo,
&nfs_idmap_pipe_dir_object_ops,
idmap);
@@ -462,7 +474,6 @@ nfs_idmap_new(struct nfs_client *clp)
goto err;
}
idmap->idmap_pipe = pipe;
- mutex_init(&idmap->idmap_mutex);
error = rpc_add_pipe_dir_object(clp->cl_net,
&clp->cl_rpcclient->cl_pipedir_objects,
@@ -475,6 +486,7 @@ nfs_idmap_new(struct nfs_client *clp)
err_destroy_pipe:
rpc_destroy_pipe_data(idmap->idmap_pipe);
err:
+ put_cred(idmap->cred);
kfree(idmap);
return error;
}
@@ -491,6 +503,7 @@ nfs_idmap_delete(struct nfs_client *clp)
&clp->cl_rpcclient->cl_pipedir_objects,
&idmap->idmap_pdo);
rpc_destroy_pipe_data(idmap->idmap_pipe);
+ put_cred(idmap->cred);
kfree(idmap);
}
@@ -735,7 +748,7 @@ int nfs_map_name_to_uid(const struct nfs_server *server, const char *name, size_
if (!nfs_map_string_to_numeric(name, namelen, &id))
ret = nfs_idmap_lookup_id(name, namelen, "uid", &id, idmap);
if (ret == 0) {
- *uid = make_kuid(&init_user_ns, id);
+ *uid = make_kuid(idmap_userns(idmap), id);
if (!uid_valid(*uid))
ret = -ERANGE;
}
@@ -752,7 +765,7 @@ int nfs_map_group_to_gid(const struct nfs_server *server, const char *name, size
if (!nfs_map_string_to_numeric(name, namelen, &id))
ret = nfs_idmap_lookup_id(name, namelen, "gid", &id, idmap);
if (ret == 0) {
- *gid = make_kgid(&init_user_ns, id);
+ *gid = make_kgid(idmap_userns(idmap), id);
if (!gid_valid(*gid))
ret = -ERANGE;
}
@@ -766,7 +779,7 @@ int nfs_map_uid_to_name(const struct nfs_server *server, kuid_t uid, char *buf,
int ret = -EINVAL;
__u32 id;
- id = from_kuid(&init_user_ns, uid);
+ id = from_kuid_munged(idmap_userns(idmap), uid);
if (!(server->caps & NFS_CAP_UIDGID_NOMAP))
ret = nfs_idmap_lookup_name(id, "user", buf, buflen, idmap);
if (ret < 0)
@@ -780,7 +793,7 @@ int nfs_map_gid_to_group(const struct nfs_server *server, kgid_t gid, char *buf,
int ret = -EINVAL;
__u32 id;
- id = from_kgid(&init_user_ns, gid);
+ id = from_kgid_munged(idmap_userns(idmap), gid);
if (!(server->caps & NFS_CAP_UIDGID_NOMAP))
ret = nfs_idmap_lookup_name(id, "group", buf, buflen, idmap);
if (ret < 0)
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 741ff8c9c6ed..c29cbef6b53f 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -400,17 +400,32 @@ static long nfs4_update_delay(long *timeout)
return ret;
}
-static int nfs4_delay(struct rpc_clnt *clnt, long *timeout)
+static int nfs4_delay_killable(long *timeout)
{
- int res = 0;
-
might_sleep();
freezable_schedule_timeout_killable_unsafe(
nfs4_update_delay(timeout));
- if (fatal_signal_pending(current))
- res = -ERESTARTSYS;
- return res;
+ if (!__fatal_signal_pending(current))
+ return 0;
+ return -EINTR;
+}
+
+static int nfs4_delay_interruptible(long *timeout)
+{
+ might_sleep();
+
+ freezable_schedule_timeout_interruptible(nfs4_update_delay(timeout));
+ if (!signal_pending(current))
+ return 0;
+ return __fatal_signal_pending(current) ? -EINTR :-ERESTARTSYS;
+}
+
+static int nfs4_delay(long *timeout, bool interruptible)
+{
+ if (interruptible)
+ return nfs4_delay_interruptible(timeout);
+ return nfs4_delay_killable(timeout);
}
/* This is the error handling routine for processes that are allowed
@@ -546,7 +561,8 @@ int nfs4_handle_exception(struct nfs_server *server, int errorcode, struct nfs4_
ret = nfs4_do_handle_exception(server, errorcode, exception);
if (exception->delay) {
- ret = nfs4_delay(server->client, &exception->timeout);
+ ret = nfs4_delay(&exception->timeout,
+ exception->interruptible);
goto out_retry;
}
if (exception->recovering) {
@@ -978,10 +994,8 @@ int nfs4_setup_sequence(struct nfs_client *client,
if (res->sr_slot != NULL)
goto out_start;
- if (session) {
+ if (session)
tbl = &session->fc_slot_table;
- task->tk_timeout = 0;
- }
spin_lock(&tbl->slot_tbl_lock);
/* The state manager will wait until the slot table is empty */
@@ -990,9 +1004,8 @@ int nfs4_setup_sequence(struct nfs_client *client,
slot = nfs4_alloc_slot(tbl);
if (IS_ERR(slot)) {
- /* Try again in 1/4 second */
if (slot == ERR_PTR(-ENOMEM))
- task->tk_timeout = HZ >> 2;
+ goto out_sleep_timeout;
goto out_sleep;
}
spin_unlock(&tbl->slot_tbl_lock);
@@ -1004,11 +1017,20 @@ out_start:
nfs41_sequence_res_init(res);
rpc_call_start(task);
return 0;
-
+out_sleep_timeout:
+ /* Try again in 1/4 second */
+ if (args->sa_privileged)
+ rpc_sleep_on_priority_timeout(&tbl->slot_tbl_waitq, task,
+ jiffies + (HZ >> 2), RPC_PRIORITY_PRIVILEGED);
+ else
+ rpc_sleep_on_timeout(&tbl->slot_tbl_waitq, task,
+ NULL, jiffies + (HZ >> 2));
+ spin_unlock(&tbl->slot_tbl_lock);
+ return -EAGAIN;
out_sleep:
if (args->sa_privileged)
rpc_sleep_on_priority(&tbl->slot_tbl_waitq, task,
- NULL, RPC_PRIORITY_PRIVILEGED);
+ RPC_PRIORITY_PRIVILEGED);
else
rpc_sleep_on(&tbl->slot_tbl_waitq, task, NULL);
spin_unlock(&tbl->slot_tbl_lock);
@@ -3060,7 +3082,9 @@ static struct nfs4_state *nfs4_do_open(struct inode *dir,
int *opened)
{
struct nfs_server *server = NFS_SERVER(dir);
- struct nfs4_exception exception = { };
+ struct nfs4_exception exception = {
+ .interruptible = true,
+ };
struct nfs4_state *res;
struct nfs4_open_createattrs c = {
.label = label,
@@ -3673,7 +3697,9 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f
int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle)
{
- struct nfs4_exception exception = { };
+ struct nfs4_exception exception = {
+ .interruptible = true,
+ };
int err;
do {
err = nfs4_handle_exception(server,
@@ -3715,7 +3741,9 @@ static int _nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle,
static int nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle,
struct nfs_fsinfo *info)
{
- struct nfs4_exception exception = { };
+ struct nfs4_exception exception = {
+ .interruptible = true,
+ };
int err;
do {
err = _nfs4_lookup_root(server, fhandle, info);
@@ -3942,7 +3970,9 @@ static int nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle,
struct nfs_fattr *fattr, struct nfs4_label *label,
struct inode *inode)
{
- struct nfs4_exception exception = { };
+ struct nfs4_exception exception = {
+ .interruptible = true,
+ };
int err;
do {
err = _nfs4_proc_getattr(server, fhandle, fattr, label, inode);
@@ -4065,7 +4095,9 @@ static int nfs4_proc_lookup_common(struct rpc_clnt **clnt, struct inode *dir,
const struct qstr *name, struct nfs_fh *fhandle,
struct nfs_fattr *fattr, struct nfs4_label *label)
{
- struct nfs4_exception exception = { };
+ struct nfs4_exception exception = {
+ .interruptible = true,
+ };
struct rpc_clnt *client = *clnt;
int err;
do {
@@ -4169,7 +4201,9 @@ static int _nfs4_proc_lookupp(struct inode *inode,
static int nfs4_proc_lookupp(struct inode *inode, struct nfs_fh *fhandle,
struct nfs_fattr *fattr, struct nfs4_label *label)
{
- struct nfs4_exception exception = { };
+ struct nfs4_exception exception = {
+ .interruptible = true,
+ };
int err;
do {
err = _nfs4_proc_lookupp(inode, fhandle, fattr, label);
@@ -4216,7 +4250,9 @@ static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry
static int nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry)
{
- struct nfs4_exception exception = { };
+ struct nfs4_exception exception = {
+ .interruptible = true,
+ };
int err;
do {
err = _nfs4_proc_access(inode, entry);
@@ -4271,7 +4307,9 @@ static int _nfs4_proc_readlink(struct inode *inode, struct page *page,
static int nfs4_proc_readlink(struct inode *inode, struct page *page,
unsigned int pgbase, unsigned int pglen)
{
- struct nfs4_exception exception = { };
+ struct nfs4_exception exception = {
+ .interruptible = true,
+ };
int err;
do {
err = _nfs4_proc_readlink(inode, page, pgbase, pglen);
@@ -4347,7 +4385,9 @@ _nfs4_proc_remove(struct inode *dir, const struct qstr *name, u32 ftype)
static int nfs4_proc_remove(struct inode *dir, struct dentry *dentry)
{
- struct nfs4_exception exception = { };
+ struct nfs4_exception exception = {
+ .interruptible = true,
+ };
struct inode *inode = d_inode(dentry);
int err;
@@ -4368,7 +4408,9 @@ static int nfs4_proc_remove(struct inode *dir, struct dentry *dentry)
static int nfs4_proc_rmdir(struct inode *dir, const struct qstr *name)
{
- struct nfs4_exception exception = { };
+ struct nfs4_exception exception = {
+ .interruptible = true,
+ };
int err;
do {
@@ -4527,7 +4569,9 @@ out:
static int nfs4_proc_link(struct inode *inode, struct inode *dir, const struct qstr *name)
{
- struct nfs4_exception exception = { };
+ struct nfs4_exception exception = {
+ .interruptible = true,
+ };
int err;
do {
err = nfs4_handle_exception(NFS_SERVER(inode),
@@ -4634,7 +4678,9 @@ out:
static int nfs4_proc_symlink(struct inode *dir, struct dentry *dentry,
struct page *page, unsigned int len, struct iattr *sattr)
{
- struct nfs4_exception exception = { };
+ struct nfs4_exception exception = {
+ .interruptible = true,
+ };
struct nfs4_label l, *label = NULL;
int err;
@@ -4673,7 +4719,9 @@ static int nfs4_proc_mkdir(struct inode *dir, struct dentry *dentry,
struct iattr *sattr)
{
struct nfs_server *server = NFS_SERVER(dir);
- struct nfs4_exception exception = { };
+ struct nfs4_exception exception = {
+ .interruptible = true,
+ };
struct nfs4_label l, *label = NULL;
int err;
@@ -4733,7 +4781,9 @@ static int _nfs4_proc_readdir(struct dentry *dentry, const struct cred *cred,
static int nfs4_proc_readdir(struct dentry *dentry, const struct cred *cred,
u64 cookie, struct page **pages, unsigned int count, bool plus)
{
- struct nfs4_exception exception = { };
+ struct nfs4_exception exception = {
+ .interruptible = true,
+ };
int err;
do {
err = _nfs4_proc_readdir(dentry, cred, cookie,
@@ -4784,7 +4834,9 @@ static int nfs4_proc_mknod(struct inode *dir, struct dentry *dentry,
struct iattr *sattr, dev_t rdev)
{
struct nfs_server *server = NFS_SERVER(dir);
- struct nfs4_exception exception = { };
+ struct nfs4_exception exception = {
+ .interruptible = true,
+ };
struct nfs4_label l, *label = NULL;
int err;
@@ -4826,7 +4878,9 @@ static int _nfs4_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle,
static int nfs4_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fsstat *fsstat)
{
- struct nfs4_exception exception = { };
+ struct nfs4_exception exception = {
+ .interruptible = true,
+ };
int err;
do {
err = nfs4_handle_exception(server,
@@ -4857,7 +4911,9 @@ static int _nfs4_do_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle,
static int nfs4_do_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fsinfo *fsinfo)
{
- struct nfs4_exception exception = { };
+ struct nfs4_exception exception = {
+ .interruptible = true,
+ };
unsigned long now = jiffies;
int err;
@@ -4919,7 +4975,9 @@ static int _nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle
static int nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
struct nfs_pathconf *pathconf)
{
- struct nfs4_exception exception = { };
+ struct nfs4_exception exception = {
+ .interruptible = true,
+ };
int err;
do {
@@ -5488,7 +5546,9 @@ out_free:
static ssize_t nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t buflen)
{
- struct nfs4_exception exception = { };
+ struct nfs4_exception exception = {
+ .interruptible = true,
+ };
ssize_t ret;
do {
ret = __nfs4_get_acl_uncached(inode, buf, buflen);
@@ -5622,7 +5682,9 @@ static int _nfs4_get_security_label(struct inode *inode, void *buf,
static int nfs4_get_security_label(struct inode *inode, void *buf,
size_t buflen)
{
- struct nfs4_exception exception = { };
+ struct nfs4_exception exception = {
+ .interruptible = true,
+ };
int err;
if (!nfs_server_capable(inode, NFS_CAP_SECURITY_LABEL))
@@ -6263,7 +6325,9 @@ out:
static int nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock *request)
{
- struct nfs4_exception exception = { };
+ struct nfs4_exception exception = {
+ .interruptible = true,
+ };
int err;
do {
@@ -6827,6 +6891,7 @@ static int nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock *
struct nfs4_exception exception = {
.state = state,
.inode = state->inode,
+ .interruptible = true,
};
int err;
@@ -7240,7 +7305,9 @@ int nfs4_proc_fs_locations(struct rpc_clnt *client, struct inode *dir,
struct nfs4_fs_locations *fs_locations,
struct page *page)
{
- struct nfs4_exception exception = { };
+ struct nfs4_exception exception = {
+ .interruptible = true,
+ };
int err;
do {
err = _nfs4_proc_fs_locations(client, dir, name,
@@ -7383,7 +7450,9 @@ int nfs4_proc_get_locations(struct inode *inode,
struct nfs_client *clp = server->nfs_client;
const struct nfs4_mig_recovery_ops *ops =
clp->cl_mvops->mig_recovery_ops;
- struct nfs4_exception exception = { };
+ struct nfs4_exception exception = {
+ .interruptible = true,
+ };
int status;
dprintk("%s: FSID %llx:%llx on \"%s\"\n", __func__,
@@ -7507,7 +7576,9 @@ int nfs4_proc_fsid_present(struct inode *inode, const struct cred *cred)
struct nfs_client *clp = server->nfs_client;
const struct nfs4_mig_recovery_ops *ops =
clp->cl_mvops->mig_recovery_ops;
- struct nfs4_exception exception = { };
+ struct nfs4_exception exception = {
+ .interruptible = true,
+ };
int status;
dprintk("%s: FSID %llx:%llx on \"%s\"\n", __func__,
@@ -7573,7 +7644,9 @@ static int _nfs4_proc_secinfo(struct inode *dir, const struct qstr *name, struct
int nfs4_proc_secinfo(struct inode *dir, const struct qstr *name,
struct nfs4_secinfo_flavors *flavors)
{
- struct nfs4_exception exception = { };
+ struct nfs4_exception exception = {
+ .interruptible = true,
+ };
int err;
do {
err = -NFS4ERR_WRONGSEC;
@@ -9263,7 +9336,9 @@ static int
nfs41_proc_secinfo_no_name(struct nfs_server *server, struct nfs_fh *fhandle,
struct nfs_fsinfo *info, struct nfs4_secinfo_flavors *flavors)
{
- struct nfs4_exception exception = { };
+ struct nfs4_exception exception = {
+ .interruptible = true,
+ };
int err;
do {
/* first try using integrity protection */
@@ -9430,7 +9505,9 @@ static int nfs41_test_stateid(struct nfs_server *server,
nfs4_stateid *stateid,
const struct cred *cred)
{
- struct nfs4_exception exception = { };
+ struct nfs4_exception exception = {
+ .interruptible = true,
+ };
int err;
do {
err = _nfs41_test_stateid(server, stateid, cred);
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 3de36479ed7a..e2e3c4f04d3e 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -159,6 +159,10 @@ int nfs40_discover_server_trunking(struct nfs_client *clp,
/* Sustain the lease, even if it's empty. If the clientid4
* goes stale it's of no use for trunking discovery. */
nfs4_schedule_state_renewal(*result);
+
+ /* If the client state need to recover, do it. */
+ if (clp->cl_state)
+ nfs4_schedule_state_manager(clp);
}
out:
return status;
@@ -2346,8 +2350,7 @@ static void nfs41_handle_recallable_state_revoked(struct nfs_client *clp)
{
/* FIXME: For now, we destroy all layouts. */
pnfs_destroy_all_layouts(clp);
- /* FIXME: For now, we test all delegations+open state+locks. */
- nfs41_handle_some_state_revoked(clp);
+ nfs_test_expired_all_delegations(clp);
dprintk("%s: Recallable state revoked on server %s!\n", __func__,
clp->cl_hostname);
}
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index e9f39fa5964b..6ec30014a439 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -16,8 +16,8 @@
#include <linux/nfs.h>
#include <linux/nfs3.h>
#include <linux/nfs4.h>
-#include <linux/nfs_page.h>
#include <linux/nfs_fs.h>
+#include <linux/nfs_page.h>
#include <linux/nfs_mount.h>
#include <linux/export.h>
@@ -47,7 +47,7 @@ void nfs_pgheader_init(struct nfs_pageio_descriptor *desc,
hdr->req = nfs_list_entry(mirror->pg_list.next);
hdr->inode = desc->pg_inode;
- hdr->cred = hdr->req->wb_context->cred;
+ hdr->cred = nfs_req_openctx(hdr->req)->cred;
hdr->io_start = req_offset(hdr->req);
hdr->good_bytes = mirror->pg_count;
hdr->io_completion = desc->pg_io_completion;
@@ -295,25 +295,13 @@ out:
nfs_release_request(head);
}
-/**
- * nfs_create_request - Create an NFS read/write request.
- * @ctx: open context to use
- * @page: page to write
- * @last: last nfs request created for this page group or NULL if head
- * @offset: starting offset within the page for the write
- * @count: number of bytes to read/write
- *
- * The page must be locked by the caller. This makes sure we never
- * create two different requests for the same page.
- * User should ensure it is safe to sleep in this function.
- */
-struct nfs_page *
-nfs_create_request(struct nfs_open_context *ctx, struct page *page,
- struct nfs_page *last, unsigned int offset,
+static struct nfs_page *
+__nfs_create_request(struct nfs_lock_context *l_ctx, struct page *page,
+ unsigned int pgbase, unsigned int offset,
unsigned int count)
{
struct nfs_page *req;
- struct nfs_lock_context *l_ctx;
+ struct nfs_open_context *ctx = l_ctx->open_context;
if (test_bit(NFS_CONTEXT_BAD, &ctx->flags))
return ERR_PTR(-EBADF);
@@ -322,13 +310,8 @@ nfs_create_request(struct nfs_open_context *ctx, struct page *page,
if (req == NULL)
return ERR_PTR(-ENOMEM);
- /* get lock context early so we can deal with alloc failures */
- l_ctx = nfs_get_lock_context(ctx);
- if (IS_ERR(l_ctx)) {
- nfs_page_free(req);
- return ERR_CAST(l_ctx);
- }
req->wb_lock_context = l_ctx;
+ refcount_inc(&l_ctx->count);
atomic_inc(&l_ctx->io_count);
/* Initialize the request struct. Initially, we assume a
@@ -340,15 +323,59 @@ nfs_create_request(struct nfs_open_context *ctx, struct page *page,
get_page(page);
}
req->wb_offset = offset;
- req->wb_pgbase = offset;
+ req->wb_pgbase = pgbase;
req->wb_bytes = count;
- req->wb_context = get_nfs_open_context(ctx);
kref_init(&req->wb_kref);
- nfs_page_group_init(req, last);
+ req->wb_nio = 0;
return req;
}
/**
+ * nfs_create_request - Create an NFS read/write request.
+ * @ctx: open context to use
+ * @page: page to write
+ * @offset: starting offset within the page for the write
+ * @count: number of bytes to read/write
+ *
+ * The page must be locked by the caller. This makes sure we never
+ * create two different requests for the same page.
+ * User should ensure it is safe to sleep in this function.
+ */
+struct nfs_page *
+nfs_create_request(struct nfs_open_context *ctx, struct page *page,
+ unsigned int offset, unsigned int count)
+{
+ struct nfs_lock_context *l_ctx = nfs_get_lock_context(ctx);
+ struct nfs_page *ret;
+
+ if (IS_ERR(l_ctx))
+ return ERR_CAST(l_ctx);
+ ret = __nfs_create_request(l_ctx, page, offset, offset, count);
+ if (!IS_ERR(ret))
+ nfs_page_group_init(ret, NULL);
+ nfs_put_lock_context(l_ctx);
+ return ret;
+}
+
+static struct nfs_page *
+nfs_create_subreq(struct nfs_page *req, struct nfs_page *last,
+ unsigned int pgbase, unsigned int offset,
+ unsigned int count)
+{
+ struct nfs_page *ret;
+
+ ret = __nfs_create_request(req->wb_lock_context, req->wb_page,
+ pgbase, offset, count);
+ if (!IS_ERR(ret)) {
+ nfs_lock_request(ret);
+ ret->wb_index = req->wb_index;
+ nfs_page_group_init(ret, last);
+ ret->wb_nio = req->wb_nio;
+ }
+ return ret;
+}
+
+/**
* nfs_unlock_request - Unlock request and wake up sleepers.
* @req: pointer to request
*/
@@ -386,8 +413,8 @@ void nfs_unlock_and_release_request(struct nfs_page *req)
static void nfs_clear_request(struct nfs_page *req)
{
struct page *page = req->wb_page;
- struct nfs_open_context *ctx = req->wb_context;
struct nfs_lock_context *l_ctx = req->wb_lock_context;
+ struct nfs_open_context *ctx;
if (page != NULL) {
put_page(page);
@@ -396,16 +423,13 @@ static void nfs_clear_request(struct nfs_page *req)
if (l_ctx != NULL) {
if (atomic_dec_and_test(&l_ctx->io_count)) {
wake_up_var(&l_ctx->io_count);
+ ctx = l_ctx->open_context;
if (test_bit(NFS_CONTEXT_UNLOCK, &ctx->flags))
rpc_wake_up(&NFS_SERVER(d_inode(ctx->dentry))->uoc_rpcwaitq);
}
nfs_put_lock_context(l_ctx);
req->wb_lock_context = NULL;
}
- if (ctx != NULL) {
- put_nfs_open_context(ctx);
- req->wb_context = NULL;
- }
}
/**
@@ -550,7 +574,7 @@ static void nfs_pgio_rpcsetup(struct nfs_pgio_header *hdr,
hdr->args.pgbase = req->wb_pgbase;
hdr->args.pages = hdr->page_array.pagevec;
hdr->args.count = count;
- hdr->args.context = get_nfs_open_context(req->wb_context);
+ hdr->args.context = get_nfs_open_context(nfs_req_openctx(req));
hdr->args.lock_context = req->wb_lock_context;
hdr->args.stable = NFS_UNSTABLE;
switch (how & (FLUSH_STABLE | FLUSH_COND_STABLE)) {
@@ -698,6 +722,7 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
desc->pg_mirrors_dynamic = NULL;
desc->pg_mirrors = desc->pg_mirrors_static;
nfs_pageio_mirror_init(&desc->pg_mirrors[0], bsize);
+ desc->pg_maxretrans = 0;
}
/**
@@ -906,9 +931,9 @@ static bool nfs_can_coalesce_requests(struct nfs_page *prev,
struct file_lock_context *flctx;
if (prev) {
- if (!nfs_match_open_context(req->wb_context, prev->wb_context))
+ if (!nfs_match_open_context(nfs_req_openctx(req), nfs_req_openctx(prev)))
return false;
- flctx = d_inode(req->wb_context->dentry)->i_flctx;
+ flctx = d_inode(nfs_req_openctx(req)->dentry)->i_flctx;
if (flctx != NULL &&
!(list_empty_careful(&flctx->flc_posix) &&
list_empty_careful(&flctx->flc_flock)) &&
@@ -957,6 +982,15 @@ static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc,
return 0;
mirror->pg_base = req->wb_pgbase;
}
+
+ if (desc->pg_maxretrans && req->wb_nio > desc->pg_maxretrans) {
+ if (NFS_SERVER(desc->pg_inode)->flags & NFS_MOUNT_SOFTERR)
+ desc->pg_error = -ETIMEDOUT;
+ else
+ desc->pg_error = -EIO;
+ return 0;
+ }
+
if (!nfs_can_coalesce_requests(prev, req, desc))
return 0;
nfs_list_move_request(req, &mirror->pg_list);
@@ -1049,14 +1083,10 @@ static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
pgbase += subreq->wb_bytes;
if (bytes_left) {
- subreq = nfs_create_request(req->wb_context,
- req->wb_page,
- subreq, pgbase, bytes_left);
+ subreq = nfs_create_subreq(req, subreq, pgbase,
+ offset, bytes_left);
if (IS_ERR(subreq))
goto err_ptr;
- nfs_lock_request(subreq);
- subreq->wb_offset = offset;
- subreq->wb_index = req->wb_index;
}
} while (bytes_left > 0);
@@ -1158,19 +1188,14 @@ int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
lastreq = lastreq->wb_this_page)
;
- dupreq = nfs_create_request(req->wb_context,
- req->wb_page, lastreq, pgbase, bytes);
+ dupreq = nfs_create_subreq(req, lastreq,
+ pgbase, offset, bytes);
+ nfs_page_group_unlock(req);
if (IS_ERR(dupreq)) {
- nfs_page_group_unlock(req);
desc->pg_error = PTR_ERR(dupreq);
goto out_failed;
}
-
- nfs_lock_request(dupreq);
- nfs_page_group_unlock(req);
- dupreq->wb_offset = offset;
- dupreq->wb_index = req->wb_index;
} else
dupreq = req;
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 7066cd7c7aff..83722e936b4a 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -2436,7 +2436,7 @@ pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *r
rd_size = nfs_dreq_bytes_left(pgio->pg_dreq);
pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
- req->wb_context,
+ nfs_req_openctx(req),
req_offset(req),
rd_size,
IOMODE_READ,
@@ -2463,7 +2463,7 @@ pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio,
pnfs_generic_pg_check_range(pgio, req);
if (pgio->pg_lseg == NULL) {
pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
- req->wb_context,
+ nfs_req_openctx(req),
req_offset(req),
wb_size,
IOMODE_RW,
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index c0420b979d88..f15609c003d8 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -459,7 +459,7 @@ static inline bool
pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg,
struct nfs_commit_info *cinfo, u32 ds_commit_idx)
{
- struct inode *inode = d_inode(req->wb_context->dentry);
+ struct inode *inode = d_inode(nfs_req_openctx(req)->dentry);
struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld;
if (lseg == NULL || ld->mark_request_commit == NULL)
@@ -471,7 +471,7 @@ pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg,
static inline bool
pnfs_clear_request_commit(struct nfs_page *req, struct nfs_commit_info *cinfo)
{
- struct inode *inode = d_inode(req->wb_context->dentry);
+ struct inode *inode = d_inode(nfs_req_openctx(req)->dentry);
struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld;
if (ld == NULL || ld->clear_request_commit == NULL)
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 1d95a60b2586..c799e540ed1e 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -92,7 +92,7 @@ EXPORT_SYMBOL_GPL(nfs_pageio_reset_read_mds);
static void nfs_readpage_release(struct nfs_page *req)
{
- struct inode *inode = d_inode(req->wb_context->dentry);
+ struct inode *inode = d_inode(nfs_req_openctx(req)->dentry);
dprintk("NFS: read done (%s/%llu %d@%lld)\n", inode->i_sb->s_id,
(unsigned long long)NFS_FILEID(inode), req->wb_bytes,
@@ -118,7 +118,7 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
len = nfs_page_length(page);
if (len == 0)
return nfs_return_empty_page(page);
- new = nfs_create_request(ctx, page, NULL, 0, len);
+ new = nfs_create_request(ctx, page, 0, len);
if (IS_ERR(new)) {
unlock_page(page);
return PTR_ERR(new);
@@ -363,7 +363,7 @@ readpage_async_filler(void *data, struct page *page)
if (len == 0)
return nfs_return_empty_page(page);
- new = nfs_create_request(desc->ctx, page, NULL, 0, len);
+ new = nfs_create_request(desc->ctx, page, 0, len);
if (IS_ERR(new))
goto out_error;
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 450ae77d19bf..d6c687419a81 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -78,7 +78,7 @@
enum {
/* Mount options that take no arguments */
- Opt_soft, Opt_hard,
+ Opt_soft, Opt_softerr, Opt_hard,
Opt_posix, Opt_noposix,
Opt_cto, Opt_nocto,
Opt_ac, Opt_noac,
@@ -125,6 +125,7 @@ static const match_table_t nfs_mount_option_tokens = {
{ Opt_sloppy, "sloppy" },
{ Opt_soft, "soft" },
+ { Opt_softerr, "softerr" },
{ Opt_hard, "hard" },
{ Opt_deprecated, "intr" },
{ Opt_deprecated, "nointr" },
@@ -628,7 +629,8 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss,
const char *str;
const char *nostr;
} nfs_info[] = {
- { NFS_MOUNT_SOFT, ",soft", ",hard" },
+ { NFS_MOUNT_SOFT, ",soft", "" },
+ { NFS_MOUNT_SOFTERR, ",softerr", "" },
{ NFS_MOUNT_POSIX, ",posix", "" },
{ NFS_MOUNT_NOCTO, ",nocto", "" },
{ NFS_MOUNT_NOAC, ",noac", "" },
@@ -658,6 +660,8 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss,
seq_printf(m, ",acdirmin=%u", nfss->acdirmin/HZ);
if (nfss->acdirmax != NFS_DEF_ACDIRMAX*HZ || showdefaults)
seq_printf(m, ",acdirmax=%u", nfss->acdirmax/HZ);
+ if (!(nfss->flags & (NFS_MOUNT_SOFT|NFS_MOUNT_SOFTERR)))
+ seq_puts(m, ",hard");
for (nfs_infop = nfs_info; nfs_infop->flag; nfs_infop++) {
if (nfss->flags & nfs_infop->flag)
seq_puts(m, nfs_infop->str);
@@ -1239,10 +1243,15 @@ static int nfs_parse_mount_options(char *raw,
*/
case Opt_soft:
mnt->flags |= NFS_MOUNT_SOFT;
+ mnt->flags &= ~NFS_MOUNT_SOFTERR;
break;
- case Opt_hard:
+ case Opt_softerr:
+ mnt->flags |= NFS_MOUNT_SOFTERR;
mnt->flags &= ~NFS_MOUNT_SOFT;
break;
+ case Opt_hard:
+ mnt->flags &= ~(NFS_MOUNT_SOFT|NFS_MOUNT_SOFTERR);
+ break;
case Opt_posix:
mnt->flags |= NFS_MOUNT_POSIX;
break;
@@ -2476,6 +2485,21 @@ static int nfs_compare_super_address(struct nfs_server *server1,
return 1;
}
+static int nfs_compare_userns(const struct nfs_server *old,
+ const struct nfs_server *new)
+{
+ const struct user_namespace *oldns = &init_user_ns;
+ const struct user_namespace *newns = &init_user_ns;
+
+ if (old->client && old->client->cl_cred)
+ oldns = old->client->cl_cred->user_ns;
+ if (new->client && new->client->cl_cred)
+ newns = new->client->cl_cred->user_ns;
+ if (oldns != newns)
+ return 0;
+ return 1;
+}
+
static int nfs_compare_super(struct super_block *sb, void *data)
{
struct nfs_sb_mountdata *sb_mntdata = data;
@@ -2489,6 +2513,8 @@ static int nfs_compare_super(struct super_block *sb, void *data)
return 0;
if (memcmp(&old->fsid, &server->fsid, sizeof(old->fsid)) != 0)
return 0;
+ if (!nfs_compare_userns(old, server))
+ return 0;
return nfs_compare_mount_options(sb, server, mntflags);
}
diff --git a/fs/nfs/symlink.c b/fs/nfs/symlink.c
index 06eb44b47885..25ba299fdac2 100644
--- a/fs/nfs/symlink.c
+++ b/fs/nfs/symlink.c
@@ -26,8 +26,9 @@
* and straight-forward than readdir caching.
*/
-static int nfs_symlink_filler(struct inode *inode, struct page *page)
+static int nfs_symlink_filler(void *data, struct page *page)
{
+ struct inode *inode = data;
int error;
error = NFS_PROTO(inode)->readlink(inode, page, 0, PAGE_SIZE);
@@ -65,8 +66,8 @@ static const char *nfs_get_link(struct dentry *dentry,
err = ERR_PTR(nfs_revalidate_mapping(inode, inode->i_mapping));
if (err)
return err;
- page = read_cache_page(&inode->i_data, 0,
- (filler_t *)nfs_symlink_filler, inode);
+ page = read_cache_page(&inode->i_data, 0, nfs_symlink_filler,
+ inode);
if (IS_ERR(page))
return ERR_CAST(page);
}
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index f3ebabaa291d..bc5bb9323412 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -244,6 +244,12 @@ static void nfs_set_pageerror(struct address_space *mapping)
nfs_zap_mapping(mapping->host, mapping);
}
+static void nfs_mapping_set_error(struct page *page, int error)
+{
+ SetPageError(page);
+ mapping_set_error(page_file_mapping(page), error);
+}
+
/*
* nfs_page_group_search_locked
* @head - head request of page group
@@ -582,11 +588,10 @@ release_request:
return ERR_PTR(ret);
}
-static void nfs_write_error_remove_page(struct nfs_page *req)
+static void nfs_write_error(struct nfs_page *req, int error)
{
+ nfs_mapping_set_error(req->wb_page, error);
nfs_end_page_writeback(req);
- generic_error_remove_page(page_file_mapping(req->wb_page),
- req->wb_page);
nfs_release_request(req);
}
@@ -609,6 +614,7 @@ nfs_error_is_fatal_on_server(int err)
static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
struct page *page)
{
+ struct address_space *mapping;
struct nfs_page *req;
int ret = 0;
@@ -622,19 +628,19 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
nfs_set_page_writeback(page);
WARN_ON_ONCE(test_bit(PG_CLEAN, &req->wb_flags));
- ret = req->wb_context->error;
/* If there is a fatal error that covers this write, just exit */
- if (nfs_error_is_fatal_on_server(ret))
+ ret = 0;
+ mapping = page_file_mapping(page);
+ if (test_bit(AS_ENOSPC, &mapping->flags) ||
+ test_bit(AS_EIO, &mapping->flags))
goto out_launder;
- ret = 0;
if (!nfs_pageio_add_request(pgio, req)) {
ret = pgio->pg_error;
/*
* Remove the problematic req upon fatal errors on the server
*/
if (nfs_error_is_fatal(ret)) {
- nfs_context_set_write_error(req->wb_context, ret);
if (nfs_error_is_fatal_on_server(ret))
goto out_launder;
} else
@@ -646,8 +652,8 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
out:
return ret;
out_launder:
- nfs_write_error_remove_page(req);
- return ret;
+ nfs_write_error(req, ret);
+ return 0;
}
static int nfs_do_writepage(struct page *page, struct writeback_control *wbc,
@@ -958,7 +964,8 @@ static void
nfs_clear_request_commit(struct nfs_page *req)
{
if (test_bit(PG_CLEAN, &req->wb_flags)) {
- struct inode *inode = d_inode(req->wb_context->dentry);
+ struct nfs_open_context *ctx = nfs_req_openctx(req);
+ struct inode *inode = d_inode(ctx->dentry);
struct nfs_commit_info cinfo;
nfs_init_cinfo_from_inode(&cinfo, inode);
@@ -999,10 +1006,12 @@ static void nfs_write_completion(struct nfs_pgio_header *hdr)
if (test_bit(NFS_IOHDR_ERROR, &hdr->flags) &&
(hdr->good_bytes < bytes)) {
nfs_set_pageerror(page_file_mapping(req->wb_page));
- nfs_context_set_write_error(req->wb_context, hdr->error);
+ nfs_mapping_set_error(req->wb_page, hdr->error);
goto remove_req;
}
if (nfs_write_need_commit(hdr)) {
+ /* Reset wb_nio, since the write was successful. */
+ req->wb_nio = 0;
memcpy(&req->wb_verf, &hdr->verf.verifier, sizeof(req->wb_verf));
nfs_mark_request_commit(req, hdr->lseg, &cinfo,
hdr->pgio_mirror_idx);
@@ -1136,6 +1145,7 @@ static struct nfs_page *nfs_try_to_update_request(struct inode *inode,
req->wb_bytes = end - req->wb_offset;
else
req->wb_bytes = rqend - req->wb_offset;
+ req->wb_nio = 0;
return req;
out_flushme:
/*
@@ -1165,7 +1175,7 @@ static struct nfs_page * nfs_setup_write_request(struct nfs_open_context* ctx,
req = nfs_try_to_update_request(inode, page, offset, bytes);
if (req != NULL)
goto out;
- req = nfs_create_request(ctx, page, NULL, offset, bytes);
+ req = nfs_create_request(ctx, page, offset, bytes);
if (IS_ERR(req))
goto out;
nfs_inode_add_request(inode, req);
@@ -1210,7 +1220,7 @@ int nfs_flush_incompatible(struct file *file, struct page *page)
return 0;
l_ctx = req->wb_lock_context;
do_flush = req->wb_page != page ||
- !nfs_match_open_context(req->wb_context, ctx);
+ !nfs_match_open_context(nfs_req_openctx(req), ctx);
if (l_ctx && flctx &&
!(list_empty_careful(&flctx->flc_posix) &&
list_empty_careful(&flctx->flc_flock))) {
@@ -1410,8 +1420,10 @@ static void nfs_initiate_write(struct nfs_pgio_header *hdr,
*/
static void nfs_redirty_request(struct nfs_page *req)
{
+ /* Bump the transmission count */
+ req->wb_nio++;
nfs_mark_request_dirty(req);
- set_bit(NFS_CONTEXT_RESEND_WRITES, &req->wb_context->flags);
+ set_bit(NFS_CONTEXT_RESEND_WRITES, &nfs_req_openctx(req)->flags);
nfs_end_page_writeback(req);
nfs_release_request(req);
}
@@ -1423,14 +1435,10 @@ static void nfs_async_write_error(struct list_head *head, int error)
while (!list_empty(head)) {
req = nfs_list_entry(head->next);
nfs_list_remove_request(req);
- if (nfs_error_is_fatal(error)) {
- nfs_context_set_write_error(req->wb_context, error);
- if (nfs_error_is_fatal_on_server(error)) {
- nfs_write_error_remove_page(req);
- continue;
- }
- }
- nfs_redirty_request(req);
+ if (nfs_error_is_fatal(error))
+ nfs_write_error(req, error);
+ else
+ nfs_redirty_request(req);
}
}
@@ -1735,7 +1743,8 @@ void nfs_init_commit(struct nfs_commit_data *data,
struct nfs_commit_info *cinfo)
{
struct nfs_page *first = nfs_list_entry(head->next);
- struct inode *inode = d_inode(first->wb_context->dentry);
+ struct nfs_open_context *ctx = nfs_req_openctx(first);
+ struct inode *inode = d_inode(ctx->dentry);
/* Set up the RPC argument and reply structs
* NB: take care not to mess about with data->commit et al. */
@@ -1743,7 +1752,7 @@ void nfs_init_commit(struct nfs_commit_data *data,
list_splice_init(head, &data->pages);
data->inode = inode;
- data->cred = first->wb_context->cred;
+ data->cred = ctx->cred;
data->lseg = lseg; /* reference transferred */
/* only set lwb for pnfs commit */
if (lseg)
@@ -1756,7 +1765,7 @@ void nfs_init_commit(struct nfs_commit_data *data,
/* Note: we always request a commit of the entire inode */
data->args.offset = 0;
data->args.count = 0;
- data->context = get_nfs_open_context(first->wb_context);
+ data->context = get_nfs_open_context(ctx);
data->res.fattr = &data->fattr;
data->res.verf = &data->verf;
nfs_fattr_init(&data->fattr);
@@ -1839,14 +1848,15 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data)
nfs_clear_page_commit(req->wb_page);
dprintk("NFS: commit (%s/%llu %d@%lld)",
- req->wb_context->dentry->d_sb->s_id,
- (unsigned long long)NFS_FILEID(d_inode(req->wb_context->dentry)),
+ nfs_req_openctx(req)->dentry->d_sb->s_id,
+ (unsigned long long)NFS_FILEID(d_inode(nfs_req_openctx(req)->dentry)),
req->wb_bytes,
(long long)req_offset(req));
if (status < 0) {
- nfs_context_set_write_error(req->wb_context, status);
- if (req->wb_page)
+ if (req->wb_page) {
+ nfs_mapping_set_error(req->wb_page, status);
nfs_inode_remove_request(req);
+ }
dprintk_cont(", error = %d\n", status);
goto next;
}
@@ -1863,7 +1873,7 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data)
/* We have a mismatch. Write the page again */
dprintk_cont(" mismatch\n");
nfs_mark_request_dirty(req);
- set_bit(NFS_CONTEXT_RESEND_WRITES, &req->wb_context->flags);
+ set_bit(NFS_CONTEXT_RESEND_WRITES, &nfs_req_openctx(req)->flags);
next:
nfs_unlock_and_release_request(req);
/* Latency breaker */
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 7caa3801ce72..9b93e7a9a26d 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -868,6 +868,7 @@ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *c
.program = &cb_program,
.version = 1,
.flags = (RPC_CLNT_CREATE_NOPING | RPC_CLNT_CREATE_QUIET),
+ .cred = current_cred(),
};
struct rpc_clnt *client;
const struct cred *cred;
@@ -1033,7 +1034,7 @@ static bool nfsd4_cb_sequence_done(struct rpc_task *task, struct nfsd4_callback
* the submission code will error out, so we don't need to
* handle that case here.
*/
- if (task->tk_flags & RPC_TASK_KILLED)
+ if (RPC_SIGNALLED(task))
goto need_restart;
return true;
@@ -1086,7 +1087,7 @@ static bool nfsd4_cb_sequence_done(struct rpc_task *task, struct nfsd4_callback
dprintk("%s: freed slot, new seqid=%d\n", __func__,
clp->cl_cb_session->se_cb_seq_nr);
- if (task->tk_flags & RPC_TASK_KILLED)
+ if (RPC_SIGNALLED(task))
goto need_restart;
out:
return ret;
diff --git a/include/linux/lockd/bind.h b/include/linux/lockd/bind.h
index 053a4ef3d431..8c0cf1059443 100644
--- a/include/linux/lockd/bind.h
+++ b/include/linux/lockd/bind.h
@@ -46,6 +46,7 @@ struct nlmclnt_initdata {
int noresvport;
struct net *net;
const struct nlmclnt_operations *nlmclnt_ops;
+ const struct cred *cred;
};
/*
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index b065ef406770..c9b422dde542 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -70,6 +70,7 @@ struct nlm_host {
struct nsm_handle *h_nsmhandle; /* NSM status handle */
char *h_addrbuf; /* address eyecatcher */
struct net *net; /* host net */
+ const struct cred *h_cred;
char nodename[UNX_MAXNODENAME + 1];
const struct nlmclnt_operations *h_nlmclnt_ops; /* Callback ops for NLM users */
};
@@ -229,7 +230,8 @@ struct nlm_host *nlmclnt_lookup_host(const struct sockaddr *sap,
const u32 version,
const char *hostname,
int noresvport,
- struct net *net);
+ struct net *net,
+ const struct cred *cred);
void nlmclnt_release_host(struct nlm_host *);
struct nlm_host *nlmsvc_lookup_host(const struct svc_rqst *rqstp,
const char *hostname,
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 40e30376130b..d363d5765cdf 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -76,7 +76,6 @@ struct nfs_open_context {
fmode_t mode;
unsigned long flags;
-#define NFS_CONTEXT_ERROR_WRITE (0)
#define NFS_CONTEXT_RESEND_WRITES (1)
#define NFS_CONTEXT_BAD (2)
#define NFS_CONTEXT_UNLOCK (3)
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index c827d31298cc..1e78032a174b 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -139,6 +139,16 @@ struct nfs_server {
struct nfs_iostats __percpu *io_stats; /* I/O statistics */
atomic_long_t writeback; /* number of writeback pages */
int flags; /* various flags */
+
+/* The following are for internal use only. Also see uapi/linux/nfs_mount.h */
+#define NFS_MOUNT_LOOKUP_CACHE_NONEG 0x10000
+#define NFS_MOUNT_LOOKUP_CACHE_NONE 0x20000
+#define NFS_MOUNT_NORESVPORT 0x40000
+#define NFS_MOUNT_LEGACY_INTERFACE 0x80000
+#define NFS_MOUNT_LOCAL_FLOCK 0x100000
+#define NFS_MOUNT_LOCAL_FCNTL 0x200000
+#define NFS_MOUNT_SOFTERR 0x400000
+
unsigned int caps; /* server capabilities */
unsigned int rsize; /* read size */
unsigned int rpages; /* read size (in pages) */
@@ -231,6 +241,9 @@ struct nfs_server {
/* XDR related information */
unsigned int read_hdrsize;
+
+ /* User namespace info */
+ const struct cred *cred;
};
/* Server capabilities */
diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index ad69430fd0eb..0bbd587fac6a 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -42,7 +42,6 @@ struct nfs_inode;
struct nfs_page {
struct list_head wb_list; /* Defines state of page: */
struct page *wb_page; /* page to read in/write out */
- struct nfs_open_context *wb_context; /* File state context info */
struct nfs_lock_context *wb_lock_context; /* lock context info */
pgoff_t wb_index; /* Offset >> PAGE_SHIFT */
unsigned int wb_offset, /* Offset & ~PAGE_MASK */
@@ -53,6 +52,7 @@ struct nfs_page {
struct nfs_write_verifier wb_verf; /* Commit cookie */
struct nfs_page *wb_this_page; /* list of reqs for this page */
struct nfs_page *wb_head; /* head pointer for req list */
+ unsigned short wb_nio; /* Number of I/O attempts */
};
struct nfs_pageio_descriptor;
@@ -87,7 +87,6 @@ struct nfs_pgio_mirror {
};
struct nfs_pageio_descriptor {
- unsigned char pg_moreio : 1;
struct inode *pg_inode;
const struct nfs_pageio_ops *pg_ops;
const struct nfs_rw_ops *pg_rw_ops;
@@ -105,6 +104,8 @@ struct nfs_pageio_descriptor {
struct nfs_pgio_mirror pg_mirrors_static[1];
struct nfs_pgio_mirror *pg_mirrors_dynamic;
u32 pg_mirror_idx; /* current mirror */
+ unsigned short pg_maxretrans;
+ unsigned char pg_moreio : 1;
};
/* arbitrarily selected limit to number of mirrors */
@@ -114,7 +115,6 @@ struct nfs_pageio_descriptor {
extern struct nfs_page *nfs_create_request(struct nfs_open_context *ctx,
struct page *page,
- struct nfs_page *last,
unsigned int offset,
unsigned int count);
extern void nfs_release_request(struct nfs_page *);
@@ -199,4 +199,10 @@ loff_t req_offset(struct nfs_page *req)
return (((loff_t)req->wb_index) << PAGE_SHIFT) + req->wb_offset;
}
+static inline struct nfs_open_context *
+nfs_req_openctx(struct nfs_page *req)
+{
+ return req->wb_lock_context->open_context;
+}
+
#endif /* _LINUX_NFS_PAGE_H */
diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index 98bc9883b230..6e8073140a5d 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -50,6 +50,7 @@ struct rpc_clnt {
struct rpc_iostats * cl_metrics; /* per-client statistics */
unsigned int cl_softrtry : 1,/* soft timeouts */
+ cl_softerr : 1,/* Timeouts return errors */
cl_discrtry : 1,/* disconnect before retry */
cl_noretranstimeo: 1,/* No retransmit timeouts */
cl_autobind : 1,/* use getport() */
@@ -71,6 +72,7 @@ struct rpc_clnt {
struct dentry *cl_debugfs; /* debugfs directory */
#endif
struct rpc_xprt_iter cl_xpi;
+ const struct cred *cl_cred;
};
/*
@@ -125,6 +127,7 @@ struct rpc_create_args {
unsigned long flags;
char *client_name;
struct svc_xprt *bc_xprt; /* NFSv4.1 backchannel */
+ const struct cred *cred;
};
struct rpc_add_xprt_test {
@@ -144,6 +147,7 @@ struct rpc_add_xprt_test {
#define RPC_CLNT_CREATE_INFINITE_SLOTS (1UL << 7)
#define RPC_CLNT_CREATE_NO_IDLE_TIMEOUT (1UL << 8)
#define RPC_CLNT_CREATE_NO_RETRANS_TIMEOUT (1UL << 9)
+#define RPC_CLNT_CREATE_SOFTERR (1UL << 10)
struct rpc_clnt *rpc_create(struct rpc_create_args *args);
struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *,
diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index 52d41d0c1ae1..d0e451868f02 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -35,7 +35,6 @@ struct rpc_wait {
struct list_head list; /* wait queue links */
struct list_head links; /* Links to related tasks */
struct list_head timer_list; /* Timer list */
- unsigned long expires;
};
/*
@@ -62,6 +61,8 @@ struct rpc_task {
struct rpc_wait tk_wait; /* RPC wait */
} u;
+ int tk_rpc_status; /* Result of last RPC operation */
+
/*
* RPC call state
*/
@@ -125,7 +126,6 @@ struct rpc_task_setup {
#define RPC_CALL_MAJORSEEN 0x0020 /* major timeout seen */
#define RPC_TASK_ROOTCREDS 0x0040 /* force root creds */
#define RPC_TASK_DYNAMIC 0x0080 /* task was kmalloc'ed */
-#define RPC_TASK_KILLED 0x0100 /* task was killed */
#define RPC_TASK_SOFT 0x0200 /* Use soft timeouts */
#define RPC_TASK_SOFTCONN 0x0400 /* Fail if can't connect */
#define RPC_TASK_SENT 0x0800 /* message was sent */
@@ -135,7 +135,6 @@ struct rpc_task_setup {
#define RPC_IS_ASYNC(t) ((t)->tk_flags & RPC_TASK_ASYNC)
#define RPC_IS_SWAPPER(t) ((t)->tk_flags & RPC_TASK_SWAPPER)
-#define RPC_ASSASSINATED(t) ((t)->tk_flags & RPC_TASK_KILLED)
#define RPC_IS_SOFT(t) ((t)->tk_flags & (RPC_TASK_SOFT|RPC_TASK_TIMEOUT))
#define RPC_IS_SOFTCONN(t) ((t)->tk_flags & RPC_TASK_SOFTCONN)
#define RPC_WAS_SENT(t) ((t)->tk_flags & RPC_TASK_SENT)
@@ -146,6 +145,7 @@ struct rpc_task_setup {
#define RPC_TASK_NEED_XMIT 3
#define RPC_TASK_NEED_RECV 4
#define RPC_TASK_MSG_PIN_WAIT 5
+#define RPC_TASK_SIGNALLED 6
#define RPC_IS_RUNNING(t) test_bit(RPC_TASK_RUNNING, &(t)->tk_runstate)
#define rpc_set_running(t) set_bit(RPC_TASK_RUNNING, &(t)->tk_runstate)
@@ -169,6 +169,8 @@ struct rpc_task_setup {
#define RPC_IS_ACTIVATED(t) test_bit(RPC_TASK_ACTIVE, &(t)->tk_runstate)
+#define RPC_SIGNALLED(t) test_bit(RPC_TASK_SIGNALLED, &(t)->tk_runstate)
+
/*
* Task priorities.
* Note: if you change these, you must also change
@@ -183,7 +185,6 @@ struct rpc_task_setup {
struct rpc_timer {
struct timer_list timer;
struct list_head list;
- unsigned long expires;
};
/*
@@ -217,6 +218,7 @@ struct rpc_task *rpc_run_task(const struct rpc_task_setup *);
struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req);
void rpc_put_task(struct rpc_task *);
void rpc_put_task_async(struct rpc_task *);
+void rpc_signal_task(struct rpc_task *);
void rpc_exit_task(struct rpc_task *);
void rpc_exit(struct rpc_task *, int);
void rpc_release_calldata(const struct rpc_call_ops *, void *);
@@ -225,11 +227,19 @@ void rpc_execute(struct rpc_task *);
void rpc_init_priority_wait_queue(struct rpc_wait_queue *, const char *);
void rpc_init_wait_queue(struct rpc_wait_queue *, const char *);
void rpc_destroy_wait_queue(struct rpc_wait_queue *);
+unsigned long rpc_task_timeout(const struct rpc_task *task);
+void rpc_sleep_on_timeout(struct rpc_wait_queue *queue,
+ struct rpc_task *task,
+ rpc_action action,
+ unsigned long timeout);
void rpc_sleep_on(struct rpc_wait_queue *, struct rpc_task *,
rpc_action action);
+void rpc_sleep_on_priority_timeout(struct rpc_wait_queue *queue,
+ struct rpc_task *task,
+ unsigned long timeout,
+ int priority);
void rpc_sleep_on_priority(struct rpc_wait_queue *,
struct rpc_task *,
- rpc_action action,
int priority);
void rpc_wake_up_queued_task_on_wq(struct workqueue_struct *wq,
struct rpc_wait_queue *queue,
diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index 3a391544299e..a6d9fce7f20e 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -143,7 +143,7 @@ struct rpc_xprt_ops {
void (*buf_free)(struct rpc_task *task);
void (*prepare_request)(struct rpc_rqst *req);
int (*send_request)(struct rpc_rqst *req);
- void (*set_retrans_timeout)(struct rpc_task *task);
+ void (*wait_for_reply_request)(struct rpc_task *task);
void (*timer)(struct rpc_xprt *xprt, struct rpc_task *task);
void (*release_request)(struct rpc_task *task);
void (*close)(struct rpc_xprt *xprt);
@@ -378,8 +378,8 @@ xprt_disable_swap(struct rpc_xprt *xprt)
int xprt_register_transport(struct xprt_class *type);
int xprt_unregister_transport(struct xprt_class *type);
int xprt_load_transport(const char *);
-void xprt_set_retrans_timeout_def(struct rpc_task *task);
-void xprt_set_retrans_timeout_rtt(struct rpc_task *task);
+void xprt_wait_for_reply_request_def(struct rpc_task *task);
+void xprt_wait_for_reply_request_rtt(struct rpc_task *task);
void xprt_wake_pending_tasks(struct rpc_xprt *xprt, int status);
void xprt_wait_for_buffer_space(struct rpc_xprt *xprt);
bool xprt_write_space(struct rpc_xprt *xprt);
diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h
index 962975b4313f..df9851cb82b2 100644
--- a/include/trace/events/rpcrdma.h
+++ b/include/trace/events/rpcrdma.h
@@ -511,6 +511,33 @@ TRACE_EVENT(xprtrdma_marshal,
)
);
+TRACE_EVENT(xprtrdma_marshal_failed,
+ TP_PROTO(const struct rpc_rqst *rqst,
+ int ret
+ ),
+
+ TP_ARGS(rqst, ret),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, task_id)
+ __field(unsigned int, client_id)
+ __field(u32, xid)
+ __field(int, ret)
+ ),
+
+ TP_fast_assign(
+ __entry->task_id = rqst->rq_task->tk_pid;
+ __entry->client_id = rqst->rq_task->tk_client->cl_clid;
+ __entry->xid = be32_to_cpu(rqst->rq_xid);
+ __entry->ret = ret;
+ ),
+
+ TP_printk("task:%u@%u xid=0x%08x: ret=%d",
+ __entry->task_id, __entry->client_id, __entry->xid,
+ __entry->ret
+ )
+);
+
TRACE_EVENT(xprtrdma_post_send,
TP_PROTO(
const struct rpcrdma_req *req,
diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h
index f0a6f0c5549c..ffa3c51dbb1a 100644
--- a/include/trace/events/sunrpc.h
+++ b/include/trace/events/sunrpc.h
@@ -82,7 +82,6 @@ TRACE_DEFINE_ENUM(RPC_TASK_SWAPPER);
TRACE_DEFINE_ENUM(RPC_CALL_MAJORSEEN);
TRACE_DEFINE_ENUM(RPC_TASK_ROOTCREDS);
TRACE_DEFINE_ENUM(RPC_TASK_DYNAMIC);
-TRACE_DEFINE_ENUM(RPC_TASK_KILLED);
TRACE_DEFINE_ENUM(RPC_TASK_SOFT);
TRACE_DEFINE_ENUM(RPC_TASK_SOFTCONN);
TRACE_DEFINE_ENUM(RPC_TASK_SENT);
@@ -97,7 +96,6 @@ TRACE_DEFINE_ENUM(RPC_TASK_NO_RETRANS_TIMEOUT);
{ RPC_CALL_MAJORSEEN, "MAJORSEEN" }, \
{ RPC_TASK_ROOTCREDS, "ROOTCREDS" }, \
{ RPC_TASK_DYNAMIC, "DYNAMIC" }, \
- { RPC_TASK_KILLED, "KILLED" }, \
{ RPC_TASK_SOFT, "SOFT" }, \
{ RPC_TASK_SOFTCONN, "SOFTCONN" }, \
{ RPC_TASK_SENT, "SENT" }, \
@@ -111,6 +109,7 @@ TRACE_DEFINE_ENUM(RPC_TASK_ACTIVE);
TRACE_DEFINE_ENUM(RPC_TASK_NEED_XMIT);
TRACE_DEFINE_ENUM(RPC_TASK_NEED_RECV);
TRACE_DEFINE_ENUM(RPC_TASK_MSG_PIN_WAIT);
+TRACE_DEFINE_ENUM(RPC_TASK_SIGNALLED);
#define rpc_show_runstate(flags) \
__print_flags(flags, "|", \
@@ -119,7 +118,8 @@ TRACE_DEFINE_ENUM(RPC_TASK_MSG_PIN_WAIT);
{ (1UL << RPC_TASK_ACTIVE), "ACTIVE" }, \
{ (1UL << RPC_TASK_NEED_XMIT), "NEED_XMIT" }, \
{ (1UL << RPC_TASK_NEED_RECV), "NEED_RECV" }, \
- { (1UL << RPC_TASK_MSG_PIN_WAIT), "MSG_PIN_WAIT" })
+ { (1UL << RPC_TASK_MSG_PIN_WAIT), "MSG_PIN_WAIT" }, \
+ { (1UL << RPC_TASK_SIGNALLED), "SIGNALLED" })
DECLARE_EVENT_CLASS(rpc_task_running,
@@ -186,7 +186,7 @@ DECLARE_EVENT_CLASS(rpc_task_queued,
__entry->client_id = task->tk_client ?
task->tk_client->cl_clid : -1;
__entry->task_id = task->tk_pid;
- __entry->timeout = task->tk_timeout;
+ __entry->timeout = rpc_task_timeout(task);
__entry->runstate = task->tk_runstate;
__entry->status = task->tk_status;
__entry->flags = task->tk_flags;
diff --git a/include/uapi/linux/nfs_mount.h b/include/uapi/linux/nfs_mount.h
index e44e00616ab5..e3bcfc6aa3b0 100644
--- a/include/uapi/linux/nfs_mount.h
+++ b/include/uapi/linux/nfs_mount.h
@@ -66,13 +66,4 @@ struct nfs_mount_data {
#define NFS_MOUNT_UNSHARED 0x8000 /* 5 */
#define NFS_MOUNT_FLAGMASK 0xFFFF
-/* The following are for internal use only */
-#define NFS_MOUNT_LOOKUP_CACHE_NONEG 0x10000
-#define NFS_MOUNT_LOOKUP_CACHE_NONE 0x20000
-#define NFS_MOUNT_NORESVPORT 0x40000
-#define NFS_MOUNT_LEGACY_INTERFACE 0x80000
-
-#define NFS_MOUNT_LOCAL_FLOCK 0x100000
-#define NFS_MOUNT_LOCAL_FCNTL 0x200000
-
#endif
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 3fd56c0c90ae..4ce42c62458e 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -269,6 +269,7 @@ err:
struct gss_upcall_msg {
refcount_t count;
kuid_t uid;
+ const char *service_name;
struct rpc_pipe_msg msg;
struct list_head list;
struct gss_auth *auth;
@@ -316,6 +317,7 @@ gss_release_msg(struct gss_upcall_msg *gss_msg)
gss_put_ctx(gss_msg->ctx);
rpc_destroy_wait_queue(&gss_msg->rpc_waitqueue);
gss_put_auth(gss_msg->auth);
+ kfree_const(gss_msg->service_name);
kfree(gss_msg);
}
@@ -410,9 +412,12 @@ gss_upcall_callback(struct rpc_task *task)
gss_release_msg(gss_msg);
}
-static void gss_encode_v0_msg(struct gss_upcall_msg *gss_msg)
+static void gss_encode_v0_msg(struct gss_upcall_msg *gss_msg,
+ const struct cred *cred)
{
- uid_t uid = from_kuid(&init_user_ns, gss_msg->uid);
+ struct user_namespace *userns = cred->user_ns;
+
+ uid_t uid = from_kuid_munged(userns, gss_msg->uid);
memcpy(gss_msg->databuf, &uid, sizeof(uid));
gss_msg->msg.data = gss_msg->databuf;
gss_msg->msg.len = sizeof(uid);
@@ -420,17 +425,31 @@ static void gss_encode_v0_msg(struct gss_upcall_msg *gss_msg)
BUILD_BUG_ON(sizeof(uid) > sizeof(gss_msg->databuf));
}
+static ssize_t
+gss_v0_upcall(struct file *file, struct rpc_pipe_msg *msg,
+ char __user *buf, size_t buflen)
+{
+ struct gss_upcall_msg *gss_msg = container_of(msg,
+ struct gss_upcall_msg,
+ msg);
+ if (msg->copied == 0)
+ gss_encode_v0_msg(gss_msg, file->f_cred);
+ return rpc_pipe_generic_upcall(file, msg, buf, buflen);
+}
+
static int gss_encode_v1_msg(struct gss_upcall_msg *gss_msg,
const char *service_name,
- const char *target_name)
+ const char *target_name,
+ const struct cred *cred)
{
+ struct user_namespace *userns = cred->user_ns;
struct gss_api_mech *mech = gss_msg->auth->mech;
char *p = gss_msg->databuf;
size_t buflen = sizeof(gss_msg->databuf);
int len;
len = scnprintf(p, buflen, "mech=%s uid=%d", mech->gm_name,
- from_kuid(&init_user_ns, gss_msg->uid));
+ from_kuid_munged(userns, gss_msg->uid));
buflen -= len;
p += len;
gss_msg->msg.len = len;
@@ -491,6 +510,25 @@ out_overflow:
return -ENOMEM;
}
+static ssize_t
+gss_v1_upcall(struct file *file, struct rpc_pipe_msg *msg,
+ char __user *buf, size_t buflen)
+{
+ struct gss_upcall_msg *gss_msg = container_of(msg,
+ struct gss_upcall_msg,
+ msg);
+ int err;
+ if (msg->copied == 0) {
+ err = gss_encode_v1_msg(gss_msg,
+ gss_msg->service_name,
+ gss_msg->auth->target_name,
+ file->f_cred);
+ if (err)
+ return err;
+ }
+ return rpc_pipe_generic_upcall(file, msg, buf, buflen);
+}
+
static struct gss_upcall_msg *
gss_alloc_msg(struct gss_auth *gss_auth,
kuid_t uid, const char *service_name)
@@ -513,16 +551,14 @@ gss_alloc_msg(struct gss_auth *gss_auth,
refcount_set(&gss_msg->count, 1);
gss_msg->uid = uid;
gss_msg->auth = gss_auth;
- switch (vers) {
- case 0:
- gss_encode_v0_msg(gss_msg);
- break;
- default:
- err = gss_encode_v1_msg(gss_msg, service_name, gss_auth->target_name);
- if (err)
+ kref_get(&gss_auth->kref);
+ if (service_name) {
+ gss_msg->service_name = kstrdup_const(service_name, GFP_NOFS);
+ if (!gss_msg->service_name) {
+ err = -ENOMEM;
goto err_put_pipe_version;
+ }
}
- kref_get(&gss_auth->kref);
return gss_msg;
err_put_pipe_version:
put_pipe_version(gss_auth->net);
@@ -581,8 +617,8 @@ gss_refresh_upcall(struct rpc_task *task)
/* XXX: warning on the first, under the assumption we
* shouldn't normally hit this case on a refresh. */
warn_gssd();
- task->tk_timeout = 15*HZ;
- rpc_sleep_on(&pipe_version_rpc_waitqueue, task, NULL);
+ rpc_sleep_on_timeout(&pipe_version_rpc_waitqueue,
+ task, NULL, jiffies + (15 * HZ));
err = -EAGAIN;
goto out;
}
@@ -595,7 +631,6 @@ gss_refresh_upcall(struct rpc_task *task)
if (gss_cred->gc_upcall != NULL)
rpc_sleep_on(&gss_cred->gc_upcall->rpc_waitqueue, task, NULL);
else if (gss_msg->ctx == NULL && gss_msg->msg.errno >= 0) {
- task->tk_timeout = 0;
gss_cred->gc_upcall = gss_msg;
/* gss_upcall_callback will release the reference to gss_upcall_msg */
refcount_inc(&gss_msg->count);
@@ -707,7 +742,7 @@ gss_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
goto err;
}
- uid = make_kuid(&init_user_ns, id);
+ uid = make_kuid(current_user_ns(), id);
if (!uid_valid(uid)) {
err = -EINVAL;
goto err;
@@ -2116,7 +2151,7 @@ static const struct rpc_credops gss_nullops = {
};
static const struct rpc_pipe_ops gss_upcall_ops_v0 = {
- .upcall = rpc_pipe_generic_upcall,
+ .upcall = gss_v0_upcall,
.downcall = gss_pipe_downcall,
.destroy_msg = gss_pipe_destroy_msg,
.open_pipe = gss_pipe_open_v0,
@@ -2124,7 +2159,7 @@ static const struct rpc_pipe_ops gss_upcall_ops_v0 = {
};
static const struct rpc_pipe_ops gss_upcall_ops_v1 = {
- .upcall = rpc_pipe_generic_upcall,
+ .upcall = gss_v1_upcall,
.downcall = gss_pipe_downcall,
.destroy_msg = gss_pipe_destroy_msg,
.open_pipe = gss_pipe_open_v1,
diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c
index d4018e5a24c5..e7df1f782b2e 100644
--- a/net/sunrpc/auth_unix.c
+++ b/net/sunrpc/auth_unix.c
@@ -107,6 +107,8 @@ unx_marshal(struct rpc_task *task, struct xdr_stream *xdr)
__be32 *p, *cred_len, *gidarr_len;
int i;
struct group_info *gi = cred->cr_cred->group_info;
+ struct user_namespace *userns = clnt->cl_cred ?
+ clnt->cl_cred->user_ns : &init_user_ns;
/* Credential */
@@ -122,14 +124,13 @@ unx_marshal(struct rpc_task *task, struct xdr_stream *xdr)
p = xdr_reserve_space(xdr, 3 * sizeof(*p));
if (!p)
goto marshal_failed;
- *p++ = cpu_to_be32(from_kuid(&init_user_ns, cred->cr_cred->fsuid));
- *p++ = cpu_to_be32(from_kgid(&init_user_ns, cred->cr_cred->fsgid));
+ *p++ = cpu_to_be32(from_kuid_munged(userns, cred->cr_cred->fsuid));
+ *p++ = cpu_to_be32(from_kgid_munged(userns, cred->cr_cred->fsgid));
gidarr_len = p++;
if (gi)
for (i = 0; i < UNX_NGROUPS && i < gi->ngroups; i++)
- *p++ = cpu_to_be32(from_kgid(&init_user_ns,
- gi->gid[i]));
+ *p++ = cpu_to_be32(from_kgid_munged(userns, gi->gid[i]));
*gidarr_len = cpu_to_be32(p - gidarr_len - 1);
*cred_len = cpu_to_be32((p - cred_len - 1) << 2);
p = xdr_reserve_space(xdr, (p - gidarr_len - 1) << 2);
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 8ff11dc98d7f..c1f1afabd024 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -394,6 +394,7 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args,
if (err)
goto out_no_clid;
+ clnt->cl_cred = get_cred(args->cred);
clnt->cl_procinfo = version->procs;
clnt->cl_maxproc = version->nrprocs;
clnt->cl_prog = args->prognumber ? : program->number;
@@ -439,6 +440,7 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args,
out_no_path:
rpc_free_iostats(clnt->cl_metrics);
out_no_stats:
+ put_cred(clnt->cl_cred);
rpc_free_clid(clnt);
out_no_clid:
kfree(clnt);
@@ -484,8 +486,11 @@ static struct rpc_clnt *rpc_create_xprt(struct rpc_create_args *args,
}
clnt->cl_softrtry = 1;
- if (args->flags & RPC_CLNT_CREATE_HARDRTRY)
+ if (args->flags & (RPC_CLNT_CREATE_HARDRTRY|RPC_CLNT_CREATE_SOFTERR)) {
clnt->cl_softrtry = 0;
+ if (args->flags & RPC_CLNT_CREATE_SOFTERR)
+ clnt->cl_softerr = 1;
+ }
if (args->flags & RPC_CLNT_CREATE_AUTOBIND)
clnt->cl_autobind = 1;
@@ -623,10 +628,12 @@ static struct rpc_clnt *__rpc_clone_client(struct rpc_create_args *args,
/* Turn off autobind on clones */
new->cl_autobind = 0;
new->cl_softrtry = clnt->cl_softrtry;
+ new->cl_softerr = clnt->cl_softerr;
new->cl_noretranstimeo = clnt->cl_noretranstimeo;
new->cl_discrtry = clnt->cl_discrtry;
new->cl_chatty = clnt->cl_chatty;
new->cl_principal = clnt->cl_principal;
+ new->cl_cred = get_cred(clnt->cl_cred);
return new;
out_err:
@@ -648,6 +655,7 @@ struct rpc_clnt *rpc_clone_client(struct rpc_clnt *clnt)
.prognumber = clnt->cl_prog,
.version = clnt->cl_vers,
.authflavor = clnt->cl_auth->au_flavor,
+ .cred = clnt->cl_cred,
};
return __rpc_clone_client(&args, clnt);
}
@@ -669,6 +677,7 @@ rpc_clone_client_set_auth(struct rpc_clnt *clnt, rpc_authflavor_t flavor)
.prognumber = clnt->cl_prog,
.version = clnt->cl_vers,
.authflavor = flavor,
+ .cred = clnt->cl_cred,
};
return __rpc_clone_client(&args, clnt);
}
@@ -827,14 +836,8 @@ void rpc_killall_tasks(struct rpc_clnt *clnt)
* Spin lock all_tasks to prevent changes...
*/
spin_lock(&clnt->cl_lock);
- list_for_each_entry(rovr, &clnt->cl_tasks, tk_task) {
- if (!RPC_IS_ACTIVATED(rovr))
- continue;
- if (!(rovr->tk_flags & RPC_TASK_KILLED)) {
- rovr->tk_flags |= RPC_TASK_KILLED;
- rpc_exit(rovr, -EIO);
- }
- }
+ list_for_each_entry(rovr, &clnt->cl_tasks, tk_task)
+ rpc_signal_task(rovr);
spin_unlock(&clnt->cl_lock);
}
EXPORT_SYMBOL_GPL(rpc_killall_tasks);
@@ -882,6 +885,7 @@ rpc_free_client(struct rpc_clnt *clnt)
xprt_put(rcu_dereference_raw(clnt->cl_xprt));
xprt_iter_destroy(&clnt->cl_xpi);
rpciod_down();
+ put_cred(clnt->cl_cred);
rpc_free_clid(clnt);
kfree(clnt);
return parent;
@@ -946,6 +950,7 @@ struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *old,
.prognumber = program->number,
.version = vers,
.authflavor = old->cl_auth->au_flavor,
+ .cred = old->cl_cred,
};
struct rpc_clnt *clnt;
int err;
@@ -1007,6 +1012,8 @@ void rpc_task_set_client(struct rpc_task *task, struct rpc_clnt *clnt)
atomic_inc(&clnt->cl_count);
if (clnt->cl_softrtry)
task->tk_flags |= RPC_TASK_SOFT;
+ if (clnt->cl_softerr)
+ task->tk_flags |= RPC_TASK_TIMEOUT;
if (clnt->cl_noretranstimeo)
task->tk_flags |= RPC_TASK_NO_RETRANS_TIMEOUT;
if (atomic_read(&clnt->cl_swapper))
@@ -1470,22 +1477,14 @@ void rpc_force_rebind(struct rpc_clnt *clnt)
}
EXPORT_SYMBOL_GPL(rpc_force_rebind);
-/*
- * Restart an (async) RPC call from the call_prepare state.
- * Usually called from within the exit handler.
- */
-int
-rpc_restart_call_prepare(struct rpc_task *task)
+static int
+__rpc_restart_call(struct rpc_task *task, void (*action)(struct rpc_task *))
{
- if (RPC_ASSASSINATED(task))
- return 0;
- task->tk_action = call_start;
task->tk_status = 0;
- if (task->tk_ops->rpc_call_prepare != NULL)
- task->tk_action = rpc_prepare_task;
+ task->tk_rpc_status = 0;
+ task->tk_action = action;
return 1;
}
-EXPORT_SYMBOL_GPL(rpc_restart_call_prepare);
/*
* Restart an (async) RPC call. Usually called from within the
@@ -1494,14 +1493,23 @@ EXPORT_SYMBOL_GPL(rpc_restart_call_prepare);
int
rpc_restart_call(struct rpc_task *task)
{
- if (RPC_ASSASSINATED(task))
- return 0;
- task->tk_action = call_start;
- task->tk_status = 0;
- return 1;
+ return __rpc_restart_call(task, call_start);
}
EXPORT_SYMBOL_GPL(rpc_restart_call);
+/*
+ * Restart an (async) RPC call from the call_prepare state.
+ * Usually called from within the exit handler.
+ */
+int
+rpc_restart_call_prepare(struct rpc_task *task)
+{
+ if (task->tk_ops->rpc_call_prepare != NULL)
+ return __rpc_restart_call(task, rpc_prepare_task);
+ return rpc_restart_call(task);
+}
+EXPORT_SYMBOL_GPL(rpc_restart_call_prepare);
+
const char
*rpc_proc_name(const struct rpc_task *task)
{
@@ -1516,6 +1524,19 @@ const char
return "no proc";
}
+static void
+__rpc_call_rpcerror(struct rpc_task *task, int tk_status, int rpc_status)
+{
+ task->tk_rpc_status = rpc_status;
+ rpc_exit(task, tk_status);
+}
+
+static void
+rpc_call_rpcerror(struct rpc_task *task, int status)
+{
+ __rpc_call_rpcerror(task, status, status);
+}
+
/*
* 0. Initial state
*
@@ -1580,7 +1601,7 @@ call_reserveresult(struct rpc_task *task)
printk(KERN_ERR "%s: status=%d, but no request slot, exiting\n",
__func__, status);
- rpc_exit(task, -EIO);
+ rpc_call_rpcerror(task, -EIO);
return;
}
@@ -1608,7 +1629,7 @@ call_reserveresult(struct rpc_task *task)
__func__, status);
break;
}
- rpc_exit(task, status);
+ rpc_call_rpcerror(task, status);
}
/*
@@ -1676,7 +1697,7 @@ call_refreshresult(struct rpc_task *task)
}
dprintk("RPC: %5u %s: refresh creds failed with error %d\n",
task->tk_pid, __func__, status);
- rpc_exit(task, status);
+ rpc_call_rpcerror(task, status);
}
/*
@@ -1727,7 +1748,7 @@ call_allocate(struct rpc_task *task)
if (status == 0)
return;
if (status != -ENOMEM) {
- rpc_exit(task, status);
+ rpc_call_rpcerror(task, status);
return;
}
@@ -1793,10 +1814,17 @@ call_encode(struct rpc_task *task)
rpc_delay(task, HZ >> 4);
break;
case -EKEYEXPIRED:
- task->tk_action = call_refresh;
+ if (!task->tk_cred_retry) {
+ rpc_exit(task, task->tk_status);
+ } else {
+ task->tk_action = call_refresh;
+ task->tk_cred_retry--;
+ dprintk("RPC: %5u %s: retry refresh creds\n",
+ task->tk_pid, __func__);
+ }
break;
default:
- rpc_exit(task, task->tk_status);
+ rpc_call_rpcerror(task, task->tk_status);
}
return;
} else {
@@ -1857,7 +1885,6 @@ call_bind(struct rpc_task *task)
if (!xprt_prepare_transmit(task))
return;
- task->tk_timeout = xprt->bind_timeout;
xprt->ops->rpcbind(task);
}
@@ -1938,7 +1965,7 @@ call_bind_status(struct rpc_task *task)
task->tk_pid, -task->tk_status);
}
- rpc_exit(task, status);
+ rpc_call_rpcerror(task, status);
return;
retry_timeout:
@@ -1973,7 +2000,7 @@ call_connect(struct rpc_task *task)
if (task->tk_status < 0)
return;
if (task->tk_flags & RPC_TASK_NOCONNECT) {
- rpc_exit(task, -ENOTCONN);
+ rpc_call_rpcerror(task, -ENOTCONN);
return;
}
if (!xprt_prepare_transmit(task))
@@ -2033,7 +2060,7 @@ call_connect_status(struct rpc_task *task)
task->tk_action = call_transmit;
return;
}
- rpc_exit(task, status);
+ rpc_call_rpcerror(task, status);
return;
out_retry:
/* Check for timeouts before looping back to call_bind */
@@ -2118,7 +2145,7 @@ call_transmit_status(struct rpc_task *task)
if (!task->tk_msg.rpc_proc->p_proc)
trace_xprt_ping(task->tk_xprt,
task->tk_status);
- rpc_exit(task, task->tk_status);
+ rpc_call_rpcerror(task, task->tk_status);
return;
}
/* fall through */
@@ -2282,7 +2309,7 @@ call_status(struct rpc_task *task)
rpc_check_timeout(task);
return;
out_exit:
- rpc_exit(task, status);
+ rpc_call_rpcerror(task, status);
}
static bool
@@ -2306,29 +2333,40 @@ rpc_check_timeout(struct rpc_task *task)
task->tk_timeouts++;
if (RPC_IS_SOFTCONN(task) && !rpc_check_connected(task->tk_rqstp)) {
- rpc_exit(task, -ETIMEDOUT);
+ rpc_call_rpcerror(task, -ETIMEDOUT);
return;
}
if (RPC_IS_SOFT(task)) {
+ /*
+ * Once a "no retrans timeout" soft tasks (a.k.a NFSv4) has
+ * been sent, it should time out only if the transport
+ * connection gets terminally broken.
+ */
+ if ((task->tk_flags & RPC_TASK_NO_RETRANS_TIMEOUT) &&
+ rpc_check_connected(task->tk_rqstp))
+ return;
+
if (clnt->cl_chatty) {
- printk(KERN_NOTICE "%s: server %s not responding, timed out\n",
+ pr_notice_ratelimited(
+ "%s: server %s not responding, timed out\n",
clnt->cl_program->name,
task->tk_xprt->servername);
}
if (task->tk_flags & RPC_TASK_TIMEOUT)
- rpc_exit(task, -ETIMEDOUT);
+ rpc_call_rpcerror(task, -ETIMEDOUT);
else
- rpc_exit(task, -EIO);
+ __rpc_call_rpcerror(task, -EIO, -ETIMEDOUT);
return;
}
if (!(task->tk_flags & RPC_CALL_MAJORSEEN)) {
task->tk_flags |= RPC_CALL_MAJORSEEN;
if (clnt->cl_chatty) {
- printk(KERN_NOTICE "%s: server %s not responding, still trying\n",
- clnt->cl_program->name,
- task->tk_xprt->servername);
+ pr_notice_ratelimited(
+ "%s: server %s not responding, still trying\n",
+ clnt->cl_program->name,
+ task->tk_xprt->servername);
}
}
rpc_force_rebind(clnt);
@@ -2358,7 +2396,7 @@ call_decode(struct rpc_task *task)
if (task->tk_flags & RPC_CALL_MAJORSEEN) {
if (clnt->cl_chatty) {
- printk(KERN_NOTICE "%s: server %s OK\n",
+ pr_notice_ratelimited("%s: server %s OK\n",
clnt->cl_program->name,
task->tk_xprt->servername);
}
@@ -2881,7 +2919,7 @@ static void rpc_show_task(const struct rpc_clnt *clnt,
printk(KERN_INFO "%5u %04x %6d %8p %8p %8ld %8p %sv%u %s a:%ps q:%s\n",
task->tk_pid, task->tk_flags, task->tk_status,
- clnt, task->tk_rqstp, task->tk_timeout, task->tk_ops,
+ clnt, task->tk_rqstp, rpc_task_timeout(task), task->tk_ops,
clnt->cl_program->name, clnt->cl_vers, rpc_proc_name(task),
task->tk_action, rpc_waitq);
}
diff --git a/net/sunrpc/debugfs.c b/net/sunrpc/debugfs.c
index 19bb356230ed..95ebd76b132d 100644
--- a/net/sunrpc/debugfs.c
+++ b/net/sunrpc/debugfs.c
@@ -33,7 +33,7 @@ tasks_show(struct seq_file *f, void *v)
seq_printf(f, "%5u %04x %6d 0x%x 0x%x %8ld %ps %sv%u %s a:%ps q:%s\n",
task->tk_pid, task->tk_flags, task->tk_status,
- clnt->cl_clid, xid, task->tk_timeout, task->tk_ops,
+ clnt->cl_clid, xid, rpc_task_timeout(task), task->tk_ops,
clnt->cl_program->name, clnt->cl_vers, rpc_proc_name(task),
task->tk_action, rpc_waitq);
return 0;
diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index 41a971ac1c63..2277b7cdad27 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -240,6 +240,7 @@ static int rpcb_create_local_unix(struct net *net)
.program = &rpcb_program,
.version = RPCBVERS_2,
.authflavor = RPC_AUTH_NULL,
+ .cred = current_cred(),
/*
* We turn off the idle timeout to prevent the kernel
* from automatically disconnecting the socket.
@@ -299,6 +300,7 @@ static int rpcb_create_local_net(struct net *net)
.program = &rpcb_program,
.version = RPCBVERS_2,
.authflavor = RPC_AUTH_UNIX,
+ .cred = current_cred(),
.flags = RPC_CLNT_CREATE_NOPING,
};
struct rpc_clnt *clnt, *clnt4;
@@ -358,7 +360,8 @@ out:
static struct rpc_clnt *rpcb_create(struct net *net, const char *nodename,
const char *hostname,
struct sockaddr *srvaddr, size_t salen,
- int proto, u32 version)
+ int proto, u32 version,
+ const struct cred *cred)
{
struct rpc_create_args args = {
.net = net,
@@ -370,6 +373,7 @@ static struct rpc_clnt *rpcb_create(struct net *net, const char *nodename,
.program = &rpcb_program,
.version = version,
.authflavor = RPC_AUTH_UNIX,
+ .cred = cred,
.flags = (RPC_CLNT_CREATE_NOPING |
RPC_CLNT_CREATE_NONPRIVPORT),
};
@@ -694,7 +698,8 @@ void rpcb_getport_async(struct rpc_task *task)
/* Put self on the wait queue to ensure we get notified if
* some other task is already attempting to bind the port */
- rpc_sleep_on(&xprt->binding, task, NULL);
+ rpc_sleep_on_timeout(&xprt->binding, task,
+ NULL, jiffies + xprt->bind_timeout);
if (xprt_test_and_set_binding(xprt)) {
dprintk("RPC: %5u %s: waiting for another binder\n",
@@ -744,7 +749,8 @@ void rpcb_getport_async(struct rpc_task *task)
rpcb_clnt = rpcb_create(xprt->xprt_net,
clnt->cl_nodename,
xprt->servername, sap, salen,
- xprt->prot, bind_version);
+ xprt->prot, bind_version,
+ clnt->cl_cred);
if (IS_ERR(rpcb_clnt)) {
status = PTR_ERR(rpcb_clnt);
dprintk("RPC: %5u %s: rpcb_create failed, error %ld\n",
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 28956c70100a..1a12fb03e611 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -58,6 +58,20 @@ static struct rpc_wait_queue delay_queue;
struct workqueue_struct *rpciod_workqueue __read_mostly;
struct workqueue_struct *xprtiod_workqueue __read_mostly;
+unsigned long
+rpc_task_timeout(const struct rpc_task *task)
+{
+ unsigned long timeout = READ_ONCE(task->tk_timeout);
+
+ if (timeout != 0) {
+ unsigned long now = jiffies;
+ if (time_before(now, timeout))
+ return timeout - now;
+ }
+ return 0;
+}
+EXPORT_SYMBOL_GPL(rpc_task_timeout);
+
/*
* Disable the timer for a given RPC task. Should be called with
* queue->lock and bh_disabled in order to avoid races within
@@ -66,7 +80,7 @@ struct workqueue_struct *xprtiod_workqueue __read_mostly;
static void
__rpc_disable_timer(struct rpc_wait_queue *queue, struct rpc_task *task)
{
- if (task->tk_timeout == 0)
+ if (list_empty(&task->u.tk_wait.timer_list))
return;
dprintk("RPC: %5u disabling timer\n", task->tk_pid);
task->tk_timeout = 0;
@@ -78,25 +92,21 @@ __rpc_disable_timer(struct rpc_wait_queue *queue, struct rpc_task *task)
static void
rpc_set_queue_timer(struct rpc_wait_queue *queue, unsigned long expires)
{
- queue->timer_list.expires = expires;
- mod_timer(&queue->timer_list.timer, expires);
+ timer_reduce(&queue->timer_list.timer, expires);
}
/*
* Set up a timer for the current task.
*/
static void
-__rpc_add_timer(struct rpc_wait_queue *queue, struct rpc_task *task)
+__rpc_add_timer(struct rpc_wait_queue *queue, struct rpc_task *task,
+ unsigned long timeout)
{
- if (!task->tk_timeout)
- return;
-
dprintk("RPC: %5u setting alarm for %u ms\n",
- task->tk_pid, jiffies_to_msecs(task->tk_timeout));
+ task->tk_pid, jiffies_to_msecs(timeout - jiffies));
- task->u.tk_wait.expires = jiffies + task->tk_timeout;
- if (list_empty(&queue->timer_list.list) || time_before(task->u.tk_wait.expires, queue->timer_list.expires))
- rpc_set_queue_timer(queue, task->u.tk_wait.expires);
+ task->tk_timeout = timeout;
+ rpc_set_queue_timer(queue, timeout);
list_add(&task->u.tk_wait.timer_list, &queue->timer_list.list);
}
@@ -188,6 +198,7 @@ static void __rpc_add_wait_queue(struct rpc_wait_queue *queue,
if (RPC_IS_QUEUED(task))
return;
+ INIT_LIST_HEAD(&task->u.tk_wait.timer_list);
if (RPC_IS_PRIORITY(queue))
__rpc_add_wait_queue_priority(queue, task, queue_priority);
else if (RPC_IS_SWAPPER(task))
@@ -238,7 +249,9 @@ static void __rpc_init_priority_wait_queue(struct rpc_wait_queue *queue, const c
queue->maxpriority = nr_queues - 1;
rpc_reset_waitqueue_priority(queue);
queue->qlen = 0;
- timer_setup(&queue->timer_list.timer, __rpc_queue_timer_fn, 0);
+ timer_setup(&queue->timer_list.timer,
+ __rpc_queue_timer_fn,
+ TIMER_DEFERRABLE);
INIT_LIST_HEAD(&queue->timer_list.list);
rpc_assign_waitqueue_name(queue, qname);
}
@@ -362,7 +375,6 @@ static void rpc_make_runnable(struct workqueue_struct *wq,
*/
static void __rpc_sleep_on_priority(struct rpc_wait_queue *q,
struct rpc_task *task,
- rpc_action action,
unsigned char queue_priority)
{
dprintk("RPC: %5u sleep_on(queue \"%s\" time %lu)\n",
@@ -372,47 +384,100 @@ static void __rpc_sleep_on_priority(struct rpc_wait_queue *q,
__rpc_add_wait_queue(q, task, queue_priority);
- WARN_ON_ONCE(task->tk_callback != NULL);
- task->tk_callback = action;
- __rpc_add_timer(q, task);
}
-void rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task,
- rpc_action action)
+static void __rpc_sleep_on_priority_timeout(struct rpc_wait_queue *q,
+ struct rpc_task *task, unsigned long timeout,
+ unsigned char queue_priority)
+{
+ if (time_is_after_jiffies(timeout)) {
+ __rpc_sleep_on_priority(q, task, queue_priority);
+ __rpc_add_timer(q, task, timeout);
+ } else
+ task->tk_status = -ETIMEDOUT;
+}
+
+static void rpc_set_tk_callback(struct rpc_task *task, rpc_action action)
+{
+ if (action && !WARN_ON_ONCE(task->tk_callback != NULL))
+ task->tk_callback = action;
+}
+
+static bool rpc_sleep_check_activated(struct rpc_task *task)
{
/* We shouldn't ever put an inactive task to sleep */
- WARN_ON_ONCE(!RPC_IS_ACTIVATED(task));
- if (!RPC_IS_ACTIVATED(task)) {
+ if (WARN_ON_ONCE(!RPC_IS_ACTIVATED(task))) {
task->tk_status = -EIO;
rpc_put_task_async(task);
- return;
+ return false;
}
+ return true;
+}
+
+void rpc_sleep_on_timeout(struct rpc_wait_queue *q, struct rpc_task *task,
+ rpc_action action, unsigned long timeout)
+{
+ if (!rpc_sleep_check_activated(task))
+ return;
+
+ rpc_set_tk_callback(task, action);
+
+ /*
+ * Protect the queue operations.
+ */
+ spin_lock_bh(&q->lock);
+ __rpc_sleep_on_priority_timeout(q, task, timeout, task->tk_priority);
+ spin_unlock_bh(&q->lock);
+}
+EXPORT_SYMBOL_GPL(rpc_sleep_on_timeout);
+void rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task,
+ rpc_action action)
+{
+ if (!rpc_sleep_check_activated(task))
+ return;
+
+ rpc_set_tk_callback(task, action);
+
+ WARN_ON_ONCE(task->tk_timeout != 0);
/*
* Protect the queue operations.
*/
spin_lock_bh(&q->lock);
- __rpc_sleep_on_priority(q, task, action, task->tk_priority);
+ __rpc_sleep_on_priority(q, task, task->tk_priority);
spin_unlock_bh(&q->lock);
}
EXPORT_SYMBOL_GPL(rpc_sleep_on);
+void rpc_sleep_on_priority_timeout(struct rpc_wait_queue *q,
+ struct rpc_task *task, unsigned long timeout, int priority)
+{
+ if (!rpc_sleep_check_activated(task))
+ return;
+
+ priority -= RPC_PRIORITY_LOW;
+ /*
+ * Protect the queue operations.
+ */
+ spin_lock_bh(&q->lock);
+ __rpc_sleep_on_priority_timeout(q, task, timeout, priority);
+ spin_unlock_bh(&q->lock);
+}
+EXPORT_SYMBOL_GPL(rpc_sleep_on_priority_timeout);
+
void rpc_sleep_on_priority(struct rpc_wait_queue *q, struct rpc_task *task,
- rpc_action action, int priority)
+ int priority)
{
- /* We shouldn't ever put an inactive task to sleep */
- WARN_ON_ONCE(!RPC_IS_ACTIVATED(task));
- if (!RPC_IS_ACTIVATED(task)) {
- task->tk_status = -EIO;
- rpc_put_task_async(task);
+ if (!rpc_sleep_check_activated(task))
return;
- }
+ WARN_ON_ONCE(task->tk_timeout != 0);
+ priority -= RPC_PRIORITY_LOW;
/*
* Protect the queue operations.
*/
spin_lock_bh(&q->lock);
- __rpc_sleep_on_priority(q, task, action, priority - RPC_PRIORITY_LOW);
+ __rpc_sleep_on_priority(q, task, priority);
spin_unlock_bh(&q->lock);
}
EXPORT_SYMBOL_GPL(rpc_sleep_on_priority);
@@ -704,7 +769,7 @@ static void __rpc_queue_timer_fn(struct timer_list *t)
spin_lock(&queue->lock);
expires = now = jiffies;
list_for_each_entry_safe(task, n, &queue->timer_list.list, u.tk_wait.timer_list) {
- timeo = task->u.tk_wait.expires;
+ timeo = task->tk_timeout;
if (time_after_eq(now, timeo)) {
dprintk("RPC: %5u timeout\n", task->tk_pid);
task->tk_status = -ETIMEDOUT;
@@ -730,8 +795,7 @@ static void __rpc_atrun(struct rpc_task *task)
*/
void rpc_delay(struct rpc_task *task, unsigned long delay)
{
- task->tk_timeout = delay;
- rpc_sleep_on(&delay_queue, task, __rpc_atrun);
+ rpc_sleep_on_timeout(&delay_queue, task, __rpc_atrun, jiffies + delay);
}
EXPORT_SYMBOL_GPL(rpc_delay);
@@ -759,8 +823,7 @@ static void
rpc_reset_task_statistics(struct rpc_task *task)
{
task->tk_timeouts = 0;
- task->tk_flags &= ~(RPC_CALL_MAJORSEEN|RPC_TASK_KILLED|RPC_TASK_SENT);
-
+ task->tk_flags &= ~(RPC_CALL_MAJORSEEN|RPC_TASK_SENT);
rpc_init_task_statistics(task);
}
@@ -773,7 +836,6 @@ void rpc_exit_task(struct rpc_task *task)
if (task->tk_ops->rpc_call_done != NULL) {
task->tk_ops->rpc_call_done(task, task->tk_calldata);
if (task->tk_action != NULL) {
- WARN_ON(RPC_ASSASSINATED(task));
/* Always release the RPC slot and buffer memory */
xprt_release(task);
rpc_reset_task_statistics(task);
@@ -781,6 +843,19 @@ void rpc_exit_task(struct rpc_task *task)
}
}
+void rpc_signal_task(struct rpc_task *task)
+{
+ struct rpc_wait_queue *queue;
+
+ if (!RPC_IS_ACTIVATED(task))
+ return;
+ set_bit(RPC_TASK_SIGNALLED, &task->tk_runstate);
+ smp_mb__after_atomic();
+ queue = READ_ONCE(task->tk_waitqueue);
+ if (queue)
+ rpc_wake_up_queued_task_set_status(queue, task, -ERESTARTSYS);
+}
+
void rpc_exit(struct rpc_task *task, int status)
{
task->tk_status = status;
@@ -836,6 +911,13 @@ static void __rpc_execute(struct rpc_task *task)
*/
if (!RPC_IS_QUEUED(task))
continue;
+
+ /*
+ * Signalled tasks should exit rather than sleep.
+ */
+ if (RPC_SIGNALLED(task))
+ rpc_exit(task, -ERESTARTSYS);
+
/*
* The queue->lock protects against races with
* rpc_make_runnable().
@@ -861,7 +943,7 @@ static void __rpc_execute(struct rpc_task *task)
status = out_of_line_wait_on_bit(&task->tk_runstate,
RPC_TASK_QUEUED, rpc_wait_bit_killable,
TASK_KILLABLE);
- if (status == -ERESTARTSYS) {
+ if (status < 0) {
/*
* When a sync task receives a signal, it exits with
* -ERESTARTSYS. In order to catch any callbacks that
@@ -869,7 +951,7 @@ static void __rpc_execute(struct rpc_task *task)
* break the loop here, but go around once more.
*/
dprintk("RPC: %5u got signal\n", task->tk_pid);
- task->tk_flags |= RPC_TASK_KILLED;
+ set_bit(RPC_TASK_SIGNALLED, &task->tk_runstate);
rpc_exit(task, -ERESTARTSYS);
}
dprintk("RPC: %5u sync task resuming\n", task->tk_pid);
diff --git a/net/sunrpc/socklib.c b/net/sunrpc/socklib.c
index 7e55cfc69697..9faea12624a6 100644
--- a/net/sunrpc/socklib.c
+++ b/net/sunrpc/socklib.c
@@ -106,7 +106,7 @@ xdr_partial_copy_from_skb(struct xdr_buf *xdr, unsigned int base, struct xdr_skb
/* ACL likes to be lazy in allocating pages - ACLs
* are small by default but can get huge. */
if ((xdr->flags & XDRBUF_SPARSE_PAGES) && *ppage == NULL) {
- *ppage = alloc_page(GFP_ATOMIC);
+ *ppage = alloc_page(GFP_NOWAIT | __GFP_NOWARN);
if (unlikely(*ppage == NULL)) {
if (copied == 0)
copied = -ENOMEM;
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index d7117d241460..a9d40bc7ebed 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -73,6 +73,15 @@ static void xprt_destroy(struct rpc_xprt *xprt);
static DEFINE_SPINLOCK(xprt_list_lock);
static LIST_HEAD(xprt_list);
+static unsigned long xprt_request_timeout(const struct rpc_rqst *req)
+{
+ unsigned long timeout = jiffies + req->rq_timeout;
+
+ if (time_before(timeout, req->rq_majortimeo))
+ return timeout;
+ return req->rq_majortimeo;
+}
+
/**
* xprt_register_transport - register a transport implementation
* @transport: transport to register
@@ -209,9 +218,12 @@ out_unlock:
out_sleep:
dprintk("RPC: %5u failed to lock transport %p\n",
task->tk_pid, xprt);
- task->tk_timeout = RPC_IS_SOFT(task) ? req->rq_timeout : 0;
task->tk_status = -EAGAIN;
- rpc_sleep_on(&xprt->sending, task, NULL);
+ if (RPC_IS_SOFT(task))
+ rpc_sleep_on_timeout(&xprt->sending, task, NULL,
+ xprt_request_timeout(req));
+ else
+ rpc_sleep_on(&xprt->sending, task, NULL);
return 0;
}
EXPORT_SYMBOL_GPL(xprt_reserve_xprt);
@@ -273,9 +285,12 @@ out_unlock:
xprt_clear_locked(xprt);
out_sleep:
dprintk("RPC: %5u failed to lock transport %p\n", task->tk_pid, xprt);
- task->tk_timeout = RPC_IS_SOFT(task) ? req->rq_timeout : 0;
task->tk_status = -EAGAIN;
- rpc_sleep_on(&xprt->sending, task, NULL);
+ if (RPC_IS_SOFT(task))
+ rpc_sleep_on_timeout(&xprt->sending, task, NULL,
+ xprt_request_timeout(req));
+ else
+ rpc_sleep_on(&xprt->sending, task, NULL);
return 0;
}
EXPORT_SYMBOL_GPL(xprt_reserve_xprt_cong);
@@ -554,53 +569,44 @@ bool xprt_write_space(struct rpc_xprt *xprt)
}
EXPORT_SYMBOL_GPL(xprt_write_space);
-/**
- * xprt_set_retrans_timeout_def - set a request's retransmit timeout
- * @task: task whose timeout is to be set
- *
- * Set a request's retransmit timeout based on the transport's
- * default timeout parameters. Used by transports that don't adjust
- * the retransmit timeout based on round-trip time estimation.
- */
-void xprt_set_retrans_timeout_def(struct rpc_task *task)
+static unsigned long xprt_abs_ktime_to_jiffies(ktime_t abstime)
{
- task->tk_timeout = task->tk_rqstp->rq_timeout;
+ s64 delta = ktime_to_ns(ktime_get() - abstime);
+ return likely(delta >= 0) ?
+ jiffies - nsecs_to_jiffies(delta) :
+ jiffies + nsecs_to_jiffies(-delta);
}
-EXPORT_SYMBOL_GPL(xprt_set_retrans_timeout_def);
-/**
- * xprt_set_retrans_timeout_rtt - set a request's retransmit timeout
- * @task: task whose timeout is to be set
- *
- * Set a request's retransmit timeout using the RTT estimator.
- */
-void xprt_set_retrans_timeout_rtt(struct rpc_task *task)
+static unsigned long xprt_calc_majortimeo(struct rpc_rqst *req)
{
- int timer = task->tk_msg.rpc_proc->p_timer;
- struct rpc_clnt *clnt = task->tk_client;
- struct rpc_rtt *rtt = clnt->cl_rtt;
- struct rpc_rqst *req = task->tk_rqstp;
- unsigned long max_timeout = clnt->cl_timeout->to_maxval;
+ const struct rpc_timeout *to = req->rq_task->tk_client->cl_timeout;
+ unsigned long majortimeo = req->rq_timeout;
- task->tk_timeout = rpc_calc_rto(rtt, timer);
- task->tk_timeout <<= rpc_ntimeo(rtt, timer) + req->rq_retries;
- if (task->tk_timeout > max_timeout || task->tk_timeout == 0)
- task->tk_timeout = max_timeout;
+ if (to->to_exponential)
+ majortimeo <<= to->to_retries;
+ else
+ majortimeo += to->to_increment * to->to_retries;
+ if (majortimeo > to->to_maxval || majortimeo == 0)
+ majortimeo = to->to_maxval;
+ return majortimeo;
}
-EXPORT_SYMBOL_GPL(xprt_set_retrans_timeout_rtt);
static void xprt_reset_majortimeo(struct rpc_rqst *req)
{
- const struct rpc_timeout *to = req->rq_task->tk_client->cl_timeout;
+ req->rq_majortimeo += xprt_calc_majortimeo(req);
+}
- req->rq_majortimeo = req->rq_timeout;
- if (to->to_exponential)
- req->rq_majortimeo <<= to->to_retries;
+static void xprt_init_majortimeo(struct rpc_task *task, struct rpc_rqst *req)
+{
+ unsigned long time_init;
+ struct rpc_xprt *xprt = req->rq_xprt;
+
+ if (likely(xprt && xprt_connected(xprt)))
+ time_init = jiffies;
else
- req->rq_majortimeo += to->to_increment * to->to_retries;
- if (req->rq_majortimeo > to->to_maxval || req->rq_majortimeo == 0)
- req->rq_majortimeo = to->to_maxval;
- req->rq_majortimeo += jiffies;
+ time_init = xprt_abs_ktime_to_jiffies(task->tk_start);
+ req->rq_timeout = task->tk_client->cl_timeout->to_initval;
+ req->rq_majortimeo = time_init + xprt_calc_majortimeo(req);
}
/**
@@ -822,9 +828,9 @@ void xprt_connect(struct rpc_task *task)
xprt->ops->close(xprt);
if (!xprt_connected(xprt)) {
- task->tk_timeout = task->tk_rqstp->rq_timeout;
task->tk_rqstp->rq_connect_cookie = xprt->connect_cookie;
- rpc_sleep_on(&xprt->pending, task, NULL);
+ rpc_sleep_on_timeout(&xprt->pending, task, NULL,
+ xprt_request_timeout(task->tk_rqstp));
if (test_bit(XPRT_CLOSING, &xprt->state))
return;
@@ -949,7 +955,7 @@ xprt_is_pinned_rqst(struct rpc_rqst *req)
* @req: Request to pin
*
* Caller must ensure this is atomic with the call to xprt_lookup_rqst()
- * so should be holding the xprt receive lock.
+ * so should be holding xprt->queue_lock.
*/
void xprt_pin_rqst(struct rpc_rqst *req)
{
@@ -961,7 +967,7 @@ EXPORT_SYMBOL_GPL(xprt_pin_rqst);
* xprt_unpin_rqst - Unpin a request on the transport receive list
* @req: Request to pin
*
- * Caller should be holding the xprt receive lock.
+ * Caller should be holding xprt->queue_lock.
*/
void xprt_unpin_rqst(struct rpc_rqst *req)
{
@@ -1017,7 +1023,6 @@ xprt_request_enqueue_receive(struct rpc_task *task)
set_bit(RPC_TASK_NEED_RECV, &task->tk_runstate);
spin_unlock(&xprt->queue_lock);
- xprt_reset_majortimeo(req);
/* Turn off autodisconnect */
del_singleshot_timer_sync(&xprt->timer);
}
@@ -1103,6 +1108,49 @@ static void xprt_timer(struct rpc_task *task)
}
/**
+ * xprt_wait_for_reply_request_def - wait for reply
+ * @task: pointer to rpc_task
+ *
+ * Set a request's retransmit timeout based on the transport's
+ * default timeout parameters. Used by transports that don't adjust
+ * the retransmit timeout based on round-trip time estimation,
+ * and put the task to sleep on the pending queue.
+ */
+void xprt_wait_for_reply_request_def(struct rpc_task *task)
+{
+ struct rpc_rqst *req = task->tk_rqstp;
+
+ rpc_sleep_on_timeout(&req->rq_xprt->pending, task, xprt_timer,
+ xprt_request_timeout(req));
+}
+EXPORT_SYMBOL_GPL(xprt_wait_for_reply_request_def);
+
+/**
+ * xprt_wait_for_reply_request_rtt - wait for reply using RTT estimator
+ * @task: pointer to rpc_task
+ *
+ * Set a request's retransmit timeout using the RTT estimator,
+ * and put the task to sleep on the pending queue.
+ */
+void xprt_wait_for_reply_request_rtt(struct rpc_task *task)
+{
+ int timer = task->tk_msg.rpc_proc->p_timer;
+ struct rpc_clnt *clnt = task->tk_client;
+ struct rpc_rtt *rtt = clnt->cl_rtt;
+ struct rpc_rqst *req = task->tk_rqstp;
+ unsigned long max_timeout = clnt->cl_timeout->to_maxval;
+ unsigned long timeout;
+
+ timeout = rpc_calc_rto(rtt, timer);
+ timeout <<= rpc_ntimeo(rtt, timer) + req->rq_retries;
+ if (timeout > max_timeout || timeout == 0)
+ timeout = max_timeout;
+ rpc_sleep_on_timeout(&req->rq_xprt->pending, task, xprt_timer,
+ jiffies + timeout);
+}
+EXPORT_SYMBOL_GPL(xprt_wait_for_reply_request_rtt);
+
+/**
* xprt_request_wait_receive - wait for the reply to an RPC request
* @task: RPC task about to send a request
*
@@ -1121,8 +1169,7 @@ void xprt_request_wait_receive(struct rpc_task *task)
*/
spin_lock(&xprt->queue_lock);
if (test_bit(RPC_TASK_NEED_RECV, &task->tk_runstate)) {
- xprt->ops->set_retrans_timeout(task);
- rpc_sleep_on(&xprt->pending, task, xprt_timer);
+ xprt->ops->wait_for_reply_request(task);
/*
* Send an extra queue wakeup call if the
* connection was dropped in case the call to
@@ -1337,6 +1384,10 @@ xprt_request_transmit(struct rpc_rqst *req, struct rpc_task *snd_task)
if (status < 0)
goto out_dequeue;
}
+ if (RPC_SIGNALLED(task)) {
+ status = -ERESTARTSYS;
+ goto out_dequeue;
+ }
}
/*
@@ -1605,7 +1656,6 @@ xprt_request_init(struct rpc_task *task)
struct rpc_xprt *xprt = task->tk_xprt;
struct rpc_rqst *req = task->tk_rqstp;
- req->rq_timeout = task->tk_client->cl_timeout->to_initval;
req->rq_task = task;
req->rq_xprt = xprt;
req->rq_buffer = NULL;
@@ -1618,7 +1668,7 @@ xprt_request_init(struct rpc_task *task)
req->rq_snd_buf.bvec = NULL;
req->rq_rcv_buf.bvec = NULL;
req->rq_release_snd_buf = NULL;
- xprt_reset_majortimeo(req);
+ xprt_init_majortimeo(task, req);
dprintk("RPC: %5u reserved req %p xid %08x\n", task->tk_pid,
req, ntohl(req->rq_xid));
}
@@ -1647,7 +1697,6 @@ void xprt_reserve(struct rpc_task *task)
if (task->tk_rqstp != NULL)
return;
- task->tk_timeout = 0;
task->tk_status = -EAGAIN;
if (!xprt_throttle_congested(xprt, task))
xprt_do_reserve(xprt, task);
@@ -1670,7 +1719,6 @@ void xprt_retry_reserve(struct rpc_task *task)
if (task->tk_rqstp != NULL)
return;
- task->tk_timeout = 0;
task->tk_status = -EAGAIN;
xprt_do_reserve(xprt, task);
}
@@ -1827,7 +1875,9 @@ found:
xprt->idle_timeout = 0;
INIT_WORK(&xprt->task_cleanup, xprt_autoclose);
if (xprt_has_timer(xprt))
- timer_setup(&xprt->timer, xprt_init_autodisconnect, 0);
+ timer_setup(&xprt->timer,
+ xprt_init_autodisconnect,
+ TIMER_DEFERRABLE);
else
timer_setup(&xprt->timer, NULL, 0);
diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c
index d79b18c1f4cd..ce986591f213 100644
--- a/net/sunrpc/xprtrdma/backchannel.c
+++ b/net/sunrpc/xprtrdma/backchannel.c
@@ -19,45 +19,6 @@
#undef RPCRDMA_BACKCHANNEL_DEBUG
-static int rpcrdma_bc_setup_reqs(struct rpcrdma_xprt *r_xprt,
- unsigned int count)
-{
- struct rpc_xprt *xprt = &r_xprt->rx_xprt;
- struct rpcrdma_req *req;
- struct rpc_rqst *rqst;
- unsigned int i;
-
- for (i = 0; i < (count << 1); i++) {
- struct rpcrdma_regbuf *rb;
- size_t size;
-
- req = rpcrdma_create_req(r_xprt);
- if (IS_ERR(req))
- return PTR_ERR(req);
- rqst = &req->rl_slot;
-
- rqst->rq_xprt = xprt;
- INIT_LIST_HEAD(&rqst->rq_bc_list);
- __set_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state);
- spin_lock(&xprt->bc_pa_lock);
- list_add(&rqst->rq_bc_pa_list, &xprt->bc_pa_list);
- spin_unlock(&xprt->bc_pa_lock);
-
- size = r_xprt->rx_data.inline_rsize;
- rb = rpcrdma_alloc_regbuf(size, DMA_TO_DEVICE, GFP_KERNEL);
- if (IS_ERR(rb))
- goto out_fail;
- req->rl_sendbuf = rb;
- xdr_buf_init(&rqst->rq_snd_buf, rb->rg_base,
- min_t(size_t, size, PAGE_SIZE));
- }
- return 0;
-
-out_fail:
- rpcrdma_req_destroy(req);
- return -ENOMEM;
-}
-
/**
* xprt_rdma_bc_setup - Pre-allocate resources for handling backchannel requests
* @xprt: transport associated with these backchannel resources
@@ -68,34 +29,10 @@ out_fail:
int xprt_rdma_bc_setup(struct rpc_xprt *xprt, unsigned int reqs)
{
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
- int rc;
- /* The backchannel reply path returns each rpc_rqst to the
- * bc_pa_list _after_ the reply is sent. If the server is
- * faster than the client, it can send another backward
- * direction request before the rpc_rqst is returned to the
- * list. The client rejects the request in this case.
- *
- * Twice as many rpc_rqsts are prepared to ensure there is
- * always an rpc_rqst available as soon as a reply is sent.
- */
- if (reqs > RPCRDMA_BACKWARD_WRS >> 1)
- goto out_err;
-
- rc = rpcrdma_bc_setup_reqs(r_xprt, reqs);
- if (rc)
- goto out_free;
-
- r_xprt->rx_buf.rb_bc_srv_max_requests = reqs;
+ r_xprt->rx_buf.rb_bc_srv_max_requests = RPCRDMA_BACKWARD_WRS >> 1;
trace_xprtrdma_cb_setup(r_xprt, reqs);
return 0;
-
-out_free:
- xprt_rdma_bc_destroy(xprt, reqs);
-
-out_err:
- pr_err("RPC: %s: setup backchannel transport failed\n", __func__);
- return -ENOMEM;
}
/**
@@ -107,10 +44,10 @@ out_err:
size_t xprt_rdma_bc_maxpayload(struct rpc_xprt *xprt)
{
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
- struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data;
+ struct rpcrdma_ep *ep = &r_xprt->rx_ep;
size_t maxmsg;
- maxmsg = min_t(unsigned int, cdata->inline_rsize, cdata->inline_wsize);
+ maxmsg = min_t(unsigned int, ep->rep_inline_send, ep->rep_inline_recv);
maxmsg = min_t(unsigned int, maxmsg, PAGE_SIZE);
return maxmsg - RPCRDMA_HDRLEN_MIN;
}
@@ -123,7 +60,7 @@ static int rpcrdma_bc_marshal_reply(struct rpc_rqst *rqst)
rpcrdma_set_xdrlen(&req->rl_hdrbuf, 0);
xdr_init_encode(&req->rl_stream, &req->rl_hdrbuf,
- req->rl_rdmabuf->rg_base, rqst);
+ rdmab_data(req->rl_rdmabuf), rqst);
p = xdr_reserve_space(&req->rl_stream, 28);
if (unlikely(!p))
@@ -223,6 +160,43 @@ void xprt_rdma_bc_free_rqst(struct rpc_rqst *rqst)
spin_unlock(&xprt->bc_pa_lock);
}
+static struct rpc_rqst *rpcrdma_bc_rqst_get(struct rpcrdma_xprt *r_xprt)
+{
+ struct rpc_xprt *xprt = &r_xprt->rx_xprt;
+ struct rpcrdma_req *req;
+ struct rpc_rqst *rqst;
+ size_t size;
+
+ spin_lock(&xprt->bc_pa_lock);
+ rqst = list_first_entry_or_null(&xprt->bc_pa_list, struct rpc_rqst,
+ rq_bc_pa_list);
+ if (!rqst)
+ goto create_req;
+ list_del(&rqst->rq_bc_pa_list);
+ spin_unlock(&xprt->bc_pa_lock);
+ return rqst;
+
+create_req:
+ spin_unlock(&xprt->bc_pa_lock);
+
+ /* Set a limit to prevent a remote from overrunning our resources.
+ */
+ if (xprt->bc_alloc_count >= RPCRDMA_BACKWARD_WRS)
+ return NULL;
+
+ size = min_t(size_t, r_xprt->rx_ep.rep_inline_recv, PAGE_SIZE);
+ req = rpcrdma_req_create(r_xprt, size, GFP_KERNEL);
+ if (!req)
+ return NULL;
+
+ xprt->bc_alloc_count++;
+ rqst = &req->rl_slot;
+ rqst->rq_xprt = xprt;
+ __set_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state);
+ xdr_buf_init(&rqst->rq_snd_buf, rdmab_data(req->rl_sendbuf), size);
+ return rqst;
+}
+
/**
* rpcrdma_bc_receive_call - Handle a backward direction call
* @r_xprt: transport receiving the call
@@ -254,18 +228,10 @@ void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt,
pr_info("RPC: %s: %*ph\n", __func__, size, p);
#endif
- /* Grab a free bc rqst */
- spin_lock(&xprt->bc_pa_lock);
- if (list_empty(&xprt->bc_pa_list)) {
- spin_unlock(&xprt->bc_pa_lock);
+ rqst = rpcrdma_bc_rqst_get(r_xprt);
+ if (!rqst)
goto out_overflow;
- }
- rqst = list_first_entry(&xprt->bc_pa_list,
- struct rpc_rqst, rq_bc_pa_list);
- list_del(&rqst->rq_bc_pa_list);
- spin_unlock(&xprt->bc_pa_lock);
- /* Prepare rqst */
rqst->rq_reply_bytes_recvd = 0;
rqst->rq_xid = *p;
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index 52cb6c1b0c2b..794ba4ca0994 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -82,13 +82,13 @@
/**
* frwr_is_supported - Check if device supports FRWR
- * @ia: interface adapter to check
+ * @device: interface adapter to check
*
* Returns true if device supports FRWR, otherwise false
*/
-bool frwr_is_supported(struct rpcrdma_ia *ia)
+bool frwr_is_supported(struct ib_device *device)
{
- struct ib_device_attr *attrs = &ia->ri_device->attrs;
+ struct ib_device_attr *attrs = &device->attrs;
if (!(attrs->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS))
goto out_not_supported;
@@ -98,7 +98,7 @@ bool frwr_is_supported(struct rpcrdma_ia *ia)
out_not_supported:
pr_info("rpcrdma: 'frwr' mode is not supported by device %s\n",
- ia->ri_device->name);
+ device->name);
return false;
}
@@ -131,7 +131,7 @@ frwr_mr_recycle_worker(struct work_struct *work)
if (mr->mr_dir != DMA_NONE) {
trace_xprtrdma_mr_unmap(mr);
- ib_dma_unmap_sg(r_xprt->rx_ia.ri_device,
+ ib_dma_unmap_sg(r_xprt->rx_ia.ri_id->device,
mr->mr_sg, mr->mr_nents, mr->mr_dir);
mr->mr_dir = DMA_NONE;
}
@@ -194,12 +194,11 @@ out_list_err:
* frwr_open - Prepare an endpoint for use with FRWR
* @ia: interface adapter this endpoint will use
* @ep: endpoint to prepare
- * @cdata: transport parameters
*
* On success, sets:
* ep->rep_attr.cap.max_send_wr
* ep->rep_attr.cap.max_recv_wr
- * cdata->max_requests
+ * ep->rep_max_requests
* ia->ri_max_segs
*
* And these FRWR-related fields:
@@ -208,10 +207,9 @@ out_list_err:
*
* On failure, a negative errno is returned.
*/
-int frwr_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
- struct rpcrdma_create_data_internal *cdata)
+int frwr_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep)
{
- struct ib_device_attr *attrs = &ia->ri_device->attrs;
+ struct ib_device_attr *attrs = &ia->ri_id->device->attrs;
int max_qp_wr, depth, delta;
ia->ri_mrtype = IB_MR_TYPE_MEM_REG;
@@ -253,24 +251,23 @@ int frwr_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
} while (delta > 0);
}
- max_qp_wr = ia->ri_device->attrs.max_qp_wr;
+ max_qp_wr = ia->ri_id->device->attrs.max_qp_wr;
max_qp_wr -= RPCRDMA_BACKWARD_WRS;
max_qp_wr -= 1;
if (max_qp_wr < RPCRDMA_MIN_SLOT_TABLE)
return -ENOMEM;
- if (cdata->max_requests > max_qp_wr)
- cdata->max_requests = max_qp_wr;
- ep->rep_attr.cap.max_send_wr = cdata->max_requests * depth;
+ if (ep->rep_max_requests > max_qp_wr)
+ ep->rep_max_requests = max_qp_wr;
+ ep->rep_attr.cap.max_send_wr = ep->rep_max_requests * depth;
if (ep->rep_attr.cap.max_send_wr > max_qp_wr) {
- cdata->max_requests = max_qp_wr / depth;
- if (!cdata->max_requests)
+ ep->rep_max_requests = max_qp_wr / depth;
+ if (!ep->rep_max_requests)
return -EINVAL;
- ep->rep_attr.cap.max_send_wr = cdata->max_requests *
- depth;
+ ep->rep_attr.cap.max_send_wr = ep->rep_max_requests * depth;
}
ep->rep_attr.cap.max_send_wr += RPCRDMA_BACKWARD_WRS;
ep->rep_attr.cap.max_send_wr += 1; /* for ib_drain_sq */
- ep->rep_attr.cap.max_recv_wr = cdata->max_requests;
+ ep->rep_attr.cap.max_recv_wr = ep->rep_max_requests;
ep->rep_attr.cap.max_recv_wr += RPCRDMA_BACKWARD_WRS;
ep->rep_attr.cap.max_recv_wr += 1; /* for ib_drain_rq */
@@ -300,15 +297,6 @@ size_t frwr_maxpages(struct rpcrdma_xprt *r_xprt)
(ia->ri_max_segs - 2) * ia->ri_max_frwr_depth);
}
-static void
-__frwr_sendcompletion_flush(struct ib_wc *wc, const char *wr)
-{
- if (wc->status != IB_WC_WR_FLUSH_ERR)
- pr_err("rpcrdma: %s: %s (%u/0x%x)\n",
- wr, ib_wc_status_msg(wc->status),
- wc->status, wc->vendor_err);
-}
-
/**
* frwr_wc_fastreg - Invoked by RDMA provider for a flushed FastReg WC
* @cq: completion queue (ignored)
@@ -323,10 +311,8 @@ frwr_wc_fastreg(struct ib_cq *cq, struct ib_wc *wc)
container_of(cqe, struct rpcrdma_frwr, fr_cqe);
/* WARNING: Only wr_cqe and status are reliable at this point */
- if (wc->status != IB_WC_SUCCESS) {
+ if (wc->status != IB_WC_SUCCESS)
frwr->fr_state = FRWR_FLUSHED_FR;
- __frwr_sendcompletion_flush(wc, "fastreg");
- }
trace_xprtrdma_wc_fastreg(wc, frwr);
}
@@ -344,10 +330,8 @@ frwr_wc_localinv(struct ib_cq *cq, struct ib_wc *wc)
fr_cqe);
/* WARNING: Only wr_cqe and status are reliable at this point */
- if (wc->status != IB_WC_SUCCESS) {
+ if (wc->status != IB_WC_SUCCESS)
frwr->fr_state = FRWR_FLUSHED_LI;
- __frwr_sendcompletion_flush(wc, "localinv");
- }
trace_xprtrdma_wc_li(wc, frwr);
}
@@ -366,12 +350,10 @@ frwr_wc_localinv_wake(struct ib_cq *cq, struct ib_wc *wc)
fr_cqe);
/* WARNING: Only wr_cqe and status are reliable at this point */
- if (wc->status != IB_WC_SUCCESS) {
+ if (wc->status != IB_WC_SUCCESS)
frwr->fr_state = FRWR_FLUSHED_LI;
- __frwr_sendcompletion_flush(wc, "localinv");
- }
- complete(&frwr->fr_linv_done);
trace_xprtrdma_wc_li_wake(wc, frwr);
+ complete(&frwr->fr_linv_done);
}
/**
@@ -436,7 +418,8 @@ struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt,
}
mr->mr_dir = rpcrdma_data_dir(writing);
- mr->mr_nents = ib_dma_map_sg(ia->ri_device, mr->mr_sg, i, mr->mr_dir);
+ mr->mr_nents =
+ ib_dma_map_sg(ia->ri_id->device, mr->mr_sg, i, mr->mr_dir);
if (!mr->mr_nents)
goto out_dmamap_err;
@@ -466,7 +449,7 @@ struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt,
return seg;
out_dmamap_err:
- frwr->fr_state = FRWR_IS_INVALID;
+ mr->mr_dir = DMA_NONE;
trace_xprtrdma_frwr_sgerr(mr, i);
rpcrdma_mr_put(mr);
return ERR_PTR(-EIO);
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index 6c1fb270f127..85115a2e2639 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -105,16 +105,23 @@ static unsigned int rpcrdma_max_reply_header_size(unsigned int maxsegs)
return size;
}
+/**
+ * rpcrdma_set_max_header_sizes - Initialize inline payload sizes
+ * @r_xprt: transport instance to initialize
+ *
+ * The max_inline fields contain the maximum size of an RPC message
+ * so the marshaling code doesn't have to repeat this calculation
+ * for every RPC.
+ */
void rpcrdma_set_max_header_sizes(struct rpcrdma_xprt *r_xprt)
{
- struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data;
- struct rpcrdma_ia *ia = &r_xprt->rx_ia;
- unsigned int maxsegs = ia->ri_max_segs;
-
- ia->ri_max_inline_write = cdata->inline_wsize -
- rpcrdma_max_call_header_size(maxsegs);
- ia->ri_max_inline_read = cdata->inline_rsize -
- rpcrdma_max_reply_header_size(maxsegs);
+ unsigned int maxsegs = r_xprt->rx_ia.ri_max_segs;
+ struct rpcrdma_ep *ep = &r_xprt->rx_ep;
+
+ ep->rep_max_inline_send =
+ ep->rep_inline_send - rpcrdma_max_call_header_size(maxsegs);
+ ep->rep_max_inline_recv =
+ ep->rep_inline_recv - rpcrdma_max_reply_header_size(maxsegs);
}
/* The client can send a request inline as long as the RPCRDMA header
@@ -131,7 +138,7 @@ static bool rpcrdma_args_inline(struct rpcrdma_xprt *r_xprt,
struct xdr_buf *xdr = &rqst->rq_snd_buf;
unsigned int count, remaining, offset;
- if (xdr->len > r_xprt->rx_ia.ri_max_inline_write)
+ if (xdr->len > r_xprt->rx_ep.rep_max_inline_send)
return false;
if (xdr->page_len) {
@@ -159,9 +166,7 @@ static bool rpcrdma_args_inline(struct rpcrdma_xprt *r_xprt,
static bool rpcrdma_results_inline(struct rpcrdma_xprt *r_xprt,
struct rpc_rqst *rqst)
{
- struct rpcrdma_ia *ia = &r_xprt->rx_ia;
-
- return rqst->rq_rcv_buf.buflen <= ia->ri_max_inline_read;
+ return rqst->rq_rcv_buf.buflen <= r_xprt->rx_ep.rep_max_inline_recv;
}
/* The client is required to provide a Reply chunk if the maximum
@@ -173,10 +178,9 @@ rpcrdma_nonpayload_inline(const struct rpcrdma_xprt *r_xprt,
const struct rpc_rqst *rqst)
{
const struct xdr_buf *buf = &rqst->rq_rcv_buf;
- const struct rpcrdma_ia *ia = &r_xprt->rx_ia;
- return buf->head[0].iov_len + buf->tail[0].iov_len <
- ia->ri_max_inline_read;
+ return (buf->head[0].iov_len + buf->tail[0].iov_len) <
+ r_xprt->rx_ep.rep_max_inline_recv;
}
/* Split @vec on page boundaries into SGEs. FMR registers pages, not
@@ -238,7 +242,7 @@ rpcrdma_convert_iovs(struct rpcrdma_xprt *r_xprt, struct xdr_buf *xdrbuf,
*/
if (unlikely(xdrbuf->flags & XDRBUF_SPARSE_PAGES)) {
if (!*ppages)
- *ppages = alloc_page(GFP_ATOMIC);
+ *ppages = alloc_page(GFP_NOWAIT | __GFP_NOWARN);
if (!*ppages)
return -ENOBUFS;
}
@@ -508,50 +512,45 @@ rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
}
/**
- * rpcrdma_unmap_sendctx - DMA-unmap Send buffers
+ * rpcrdma_sendctx_unmap - DMA-unmap Send buffer
* @sc: sendctx containing SGEs to unmap
*
*/
-void
-rpcrdma_unmap_sendctx(struct rpcrdma_sendctx *sc)
+void rpcrdma_sendctx_unmap(struct rpcrdma_sendctx *sc)
{
- struct rpcrdma_ia *ia = &sc->sc_xprt->rx_ia;
struct ib_sge *sge;
- unsigned int count;
/* The first two SGEs contain the transport header and
* the inline buffer. These are always left mapped so
* they can be cheaply re-used.
*/
- sge = &sc->sc_sges[2];
- for (count = sc->sc_unmap_count; count; ++sge, --count)
- ib_dma_unmap_page(ia->ri_device,
- sge->addr, sge->length, DMA_TO_DEVICE);
+ for (sge = &sc->sc_sges[2]; sc->sc_unmap_count;
+ ++sge, --sc->sc_unmap_count)
+ ib_dma_unmap_page(sc->sc_device, sge->addr, sge->length,
+ DMA_TO_DEVICE);
- if (test_and_clear_bit(RPCRDMA_REQ_F_TX_RESOURCES, &sc->sc_req->rl_flags)) {
- smp_mb__after_atomic();
+ if (test_and_clear_bit(RPCRDMA_REQ_F_TX_RESOURCES,
+ &sc->sc_req->rl_flags))
wake_up_bit(&sc->sc_req->rl_flags, RPCRDMA_REQ_F_TX_RESOURCES);
- }
}
/* Prepare an SGE for the RPC-over-RDMA transport header.
*/
-static bool
-rpcrdma_prepare_hdr_sge(struct rpcrdma_ia *ia, struct rpcrdma_req *req,
- u32 len)
+static bool rpcrdma_prepare_hdr_sge(struct rpcrdma_xprt *r_xprt,
+ struct rpcrdma_req *req, u32 len)
{
struct rpcrdma_sendctx *sc = req->rl_sendctx;
struct rpcrdma_regbuf *rb = req->rl_rdmabuf;
struct ib_sge *sge = sc->sc_sges;
- if (!rpcrdma_dma_map_regbuf(ia, rb))
+ if (!rpcrdma_regbuf_dma_map(r_xprt, rb))
goto out_regbuf;
sge->addr = rdmab_addr(rb);
sge->length = len;
sge->lkey = rdmab_lkey(rb);
- ib_dma_sync_single_for_device(rdmab_device(rb), sge->addr,
- sge->length, DMA_TO_DEVICE);
+ ib_dma_sync_single_for_device(rdmab_device(rb), sge->addr, sge->length,
+ DMA_TO_DEVICE);
sc->sc_wr.num_sge++;
return true;
@@ -563,23 +562,23 @@ out_regbuf:
/* Prepare the Send SGEs. The head and tail iovec, and each entry
* in the page list, gets its own SGE.
*/
-static bool
-rpcrdma_prepare_msg_sges(struct rpcrdma_ia *ia, struct rpcrdma_req *req,
- struct xdr_buf *xdr, enum rpcrdma_chunktype rtype)
+static bool rpcrdma_prepare_msg_sges(struct rpcrdma_xprt *r_xprt,
+ struct rpcrdma_req *req,
+ struct xdr_buf *xdr,
+ enum rpcrdma_chunktype rtype)
{
struct rpcrdma_sendctx *sc = req->rl_sendctx;
unsigned int sge_no, page_base, len, remaining;
struct rpcrdma_regbuf *rb = req->rl_sendbuf;
- struct ib_device *device = ia->ri_device;
struct ib_sge *sge = sc->sc_sges;
- u32 lkey = ia->ri_pd->local_dma_lkey;
struct page *page, **ppages;
/* The head iovec is straightforward, as it is already
* DMA-mapped. Sync the content that has changed.
*/
- if (!rpcrdma_dma_map_regbuf(ia, rb))
+ if (!rpcrdma_regbuf_dma_map(r_xprt, rb))
goto out_regbuf;
+ sc->sc_device = rdmab_device(rb);
sge_no = 1;
sge[sge_no].addr = rdmab_addr(rb);
sge[sge_no].length = xdr->head[0].iov_len;
@@ -626,13 +625,14 @@ rpcrdma_prepare_msg_sges(struct rpcrdma_ia *ia, struct rpcrdma_req *req,
goto out_mapping_overflow;
len = min_t(u32, PAGE_SIZE - page_base, remaining);
- sge[sge_no].addr = ib_dma_map_page(device, *ppages,
- page_base, len,
- DMA_TO_DEVICE);
- if (ib_dma_mapping_error(device, sge[sge_no].addr))
+ sge[sge_no].addr =
+ ib_dma_map_page(rdmab_device(rb), *ppages,
+ page_base, len, DMA_TO_DEVICE);
+ if (ib_dma_mapping_error(rdmab_device(rb),
+ sge[sge_no].addr))
goto out_mapping_err;
sge[sge_no].length = len;
- sge[sge_no].lkey = lkey;
+ sge[sge_no].lkey = rdmab_lkey(rb);
sc->sc_unmap_count++;
ppages++;
@@ -653,13 +653,13 @@ rpcrdma_prepare_msg_sges(struct rpcrdma_ia *ia, struct rpcrdma_req *req,
map_tail:
sge_no++;
- sge[sge_no].addr = ib_dma_map_page(device, page,
- page_base, len,
- DMA_TO_DEVICE);
- if (ib_dma_mapping_error(device, sge[sge_no].addr))
+ sge[sge_no].addr =
+ ib_dma_map_page(rdmab_device(rb), page, page_base, len,
+ DMA_TO_DEVICE);
+ if (ib_dma_mapping_error(rdmab_device(rb), sge[sge_no].addr))
goto out_mapping_err;
sge[sge_no].length = len;
- sge[sge_no].lkey = lkey;
+ sge[sge_no].lkey = rdmab_lkey(rb);
sc->sc_unmap_count++;
}
@@ -674,12 +674,12 @@ out_regbuf:
return false;
out_mapping_overflow:
- rpcrdma_unmap_sendctx(sc);
+ rpcrdma_sendctx_unmap(sc);
pr_err("rpcrdma: too many Send SGEs (%u)\n", sge_no);
return false;
out_mapping_err:
- rpcrdma_unmap_sendctx(sc);
+ rpcrdma_sendctx_unmap(sc);
trace_xprtrdma_dma_maperr(sge[sge_no].addr);
return false;
}
@@ -699,7 +699,7 @@ rpcrdma_prepare_send_sges(struct rpcrdma_xprt *r_xprt,
struct rpcrdma_req *req, u32 hdrlen,
struct xdr_buf *xdr, enum rpcrdma_chunktype rtype)
{
- req->rl_sendctx = rpcrdma_sendctx_get_locked(&r_xprt->rx_buf);
+ req->rl_sendctx = rpcrdma_sendctx_get_locked(r_xprt);
if (!req->rl_sendctx)
return -EAGAIN;
req->rl_sendctx->sc_wr.num_sge = 0;
@@ -707,11 +707,11 @@ rpcrdma_prepare_send_sges(struct rpcrdma_xprt *r_xprt,
req->rl_sendctx->sc_req = req;
__clear_bit(RPCRDMA_REQ_F_TX_RESOURCES, &req->rl_flags);
- if (!rpcrdma_prepare_hdr_sge(&r_xprt->rx_ia, req, hdrlen))
+ if (!rpcrdma_prepare_hdr_sge(r_xprt, req, hdrlen))
return -EIO;
if (rtype != rpcrdma_areadch)
- if (!rpcrdma_prepare_msg_sges(&r_xprt->rx_ia, req, xdr, rtype))
+ if (!rpcrdma_prepare_msg_sges(r_xprt, req, xdr, rtype))
return -EIO;
return 0;
@@ -747,8 +747,8 @@ rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst)
int ret;
rpcrdma_set_xdrlen(&req->rl_hdrbuf, 0);
- xdr_init_encode(xdr, &req->rl_hdrbuf,
- req->rl_rdmabuf->rg_base, rqst);
+ xdr_init_encode(xdr, &req->rl_hdrbuf, rdmab_data(req->rl_rdmabuf),
+ rqst);
/* Fixed header fields */
ret = -EMSGSIZE;
@@ -876,6 +876,7 @@ rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst)
return 0;
out_err:
+ trace_xprtrdma_marshal_failed(rqst, ret);
switch (ret) {
case -EAGAIN:
xprt_wait_for_buffer_space(rqst->rq_xprt);
diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
index 907464c2a9f0..bed57d8b5c19 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
@@ -261,7 +261,7 @@ static const struct rpc_xprt_ops xprt_rdma_bc_procs = {
.buf_alloc = xprt_rdma_bc_allocate,
.buf_free = xprt_rdma_bc_free,
.send_request = xprt_rdma_bc_send_request,
- .set_retrans_timeout = xprt_set_retrans_timeout_def,
+ .wait_for_reply_request = xprt_wait_for_reply_request_def,
.close = xprt_rdma_bc_close,
.destroy = xprt_rdma_bc_put,
.print_stats = xprt_rdma_print_stats
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 5d261353bd90..1f73a6a7e43c 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -68,9 +68,9 @@
* tunables
*/
-static unsigned int xprt_rdma_slot_table_entries = RPCRDMA_DEF_SLOT_TABLE;
+unsigned int xprt_rdma_slot_table_entries = RPCRDMA_DEF_SLOT_TABLE;
unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE;
-static unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE;
+unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE;
unsigned int xprt_rdma_memreg_strategy = RPCRDMA_FRWR;
int xprt_rdma_pad_optimize;
@@ -288,7 +288,7 @@ xprt_rdma_destroy(struct rpc_xprt *xprt)
cancel_delayed_work_sync(&r_xprt->rx_connect_worker);
- rpcrdma_ep_destroy(&r_xprt->rx_ep, &r_xprt->rx_ia);
+ rpcrdma_ep_destroy(r_xprt);
rpcrdma_buffer_destroy(&r_xprt->rx_buf);
rpcrdma_ia_close(&r_xprt->rx_ia);
@@ -311,10 +311,8 @@ static const struct rpc_timeout xprt_rdma_default_timeout = {
static struct rpc_xprt *
xprt_setup_rdma(struct xprt_create *args)
{
- struct rpcrdma_create_data_internal cdata;
struct rpc_xprt *xprt;
struct rpcrdma_xprt *new_xprt;
- struct rpcrdma_ep *new_ep;
struct sockaddr *sap;
int rc;
@@ -349,40 +347,12 @@ xprt_setup_rdma(struct xprt_create *args)
xprt_set_bound(xprt);
xprt_rdma_format_addresses(xprt, sap);
- cdata.max_requests = xprt_rdma_slot_table_entries;
-
- cdata.rsize = RPCRDMA_MAX_SEGS * PAGE_SIZE; /* RDMA write max */
- cdata.wsize = RPCRDMA_MAX_SEGS * PAGE_SIZE; /* RDMA read max */
-
- cdata.inline_wsize = xprt_rdma_max_inline_write;
- if (cdata.inline_wsize > cdata.wsize)
- cdata.inline_wsize = cdata.wsize;
-
- cdata.inline_rsize = xprt_rdma_max_inline_read;
- if (cdata.inline_rsize > cdata.rsize)
- cdata.inline_rsize = cdata.rsize;
-
- /*
- * Create new transport instance, which includes initialized
- * o ia
- * o endpoint
- * o buffers
- */
-
new_xprt = rpcx_to_rdmax(xprt);
-
rc = rpcrdma_ia_open(new_xprt);
if (rc)
goto out1;
- /*
- * initialize and create ep
- */
- new_xprt->rx_data = cdata;
- new_ep = &new_xprt->rx_ep;
-
- rc = rpcrdma_ep_create(&new_xprt->rx_ep,
- &new_xprt->rx_ia, &new_xprt->rx_data);
+ rc = rpcrdma_ep_create(new_xprt);
if (rc)
goto out2;
@@ -413,7 +383,7 @@ out4:
rpcrdma_buffer_destroy(&new_xprt->rx_buf);
rc = -ENODEV;
out3:
- rpcrdma_ep_destroy(new_ep, &new_xprt->rx_ia);
+ rpcrdma_ep_destroy(new_xprt);
out2:
rpcrdma_ia_close(&new_xprt->rx_ia);
out1:
@@ -585,52 +555,15 @@ xprt_rdma_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *rqst)
rpc_wake_up_next(&xprt->backlog);
}
-static bool
-rpcrdma_get_sendbuf(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
- size_t size, gfp_t flags)
+static bool rpcrdma_check_regbuf(struct rpcrdma_xprt *r_xprt,
+ struct rpcrdma_regbuf *rb, size_t size,
+ gfp_t flags)
{
- struct rpcrdma_regbuf *rb;
-
- if (req->rl_sendbuf && rdmab_length(req->rl_sendbuf) >= size)
- return true;
-
- rb = rpcrdma_alloc_regbuf(size, DMA_TO_DEVICE, flags);
- if (IS_ERR(rb))
- return false;
-
- rpcrdma_free_regbuf(req->rl_sendbuf);
- r_xprt->rx_stats.hardway_register_count += size;
- req->rl_sendbuf = rb;
- return true;
-}
-
-/* The rq_rcv_buf is used only if a Reply chunk is necessary.
- * The decision to use a Reply chunk is made later in
- * rpcrdma_marshal_req. This buffer is registered at that time.
- *
- * Otherwise, the associated RPC Reply arrives in a separate
- * Receive buffer, arbitrarily chosen by the HCA. The buffer
- * allocated here for the RPC Reply is not utilized in that
- * case. See rpcrdma_inline_fixup.
- *
- * A regbuf is used here to remember the buffer size.
- */
-static bool
-rpcrdma_get_recvbuf(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
- size_t size, gfp_t flags)
-{
- struct rpcrdma_regbuf *rb;
-
- if (req->rl_recvbuf && rdmab_length(req->rl_recvbuf) >= size)
- return true;
-
- rb = rpcrdma_alloc_regbuf(size, DMA_NONE, flags);
- if (IS_ERR(rb))
- return false;
-
- rpcrdma_free_regbuf(req->rl_recvbuf);
- r_xprt->rx_stats.hardway_register_count += size;
- req->rl_recvbuf = rb;
+ if (unlikely(rdmab_length(rb) < size)) {
+ if (!rpcrdma_regbuf_realloc(rb, size, flags))
+ return false;
+ r_xprt->rx_stats.hardway_register_count += size;
+ }
return true;
}
@@ -655,13 +588,15 @@ xprt_rdma_allocate(struct rpc_task *task)
if (RPC_IS_SWAPPER(task))
flags = __GFP_MEMALLOC | GFP_NOWAIT | __GFP_NOWARN;
- if (!rpcrdma_get_sendbuf(r_xprt, req, rqst->rq_callsize, flags))
+ if (!rpcrdma_check_regbuf(r_xprt, req->rl_sendbuf, rqst->rq_callsize,
+ flags))
goto out_fail;
- if (!rpcrdma_get_recvbuf(r_xprt, req, rqst->rq_rcvsize, flags))
+ if (!rpcrdma_check_regbuf(r_xprt, req->rl_recvbuf, rqst->rq_rcvsize,
+ flags))
goto out_fail;
- rqst->rq_buffer = req->rl_sendbuf->rg_base;
- rqst->rq_rbuffer = req->rl_recvbuf->rg_base;
+ rqst->rq_buffer = rdmab_data(req->rl_sendbuf);
+ rqst->rq_rbuffer = rdmab_data(req->rl_recvbuf);
trace_xprtrdma_op_allocate(task, req);
return 0;
@@ -815,7 +750,7 @@ static const struct rpc_xprt_ops xprt_rdma_procs = {
.alloc_slot = xprt_rdma_alloc_slot,
.free_slot = xprt_rdma_free_slot,
.release_request = xprt_release_rqst_cong, /* ditto */
- .set_retrans_timeout = xprt_set_retrans_timeout_def, /* ditto */
+ .wait_for_reply_request = xprt_wait_for_reply_request_def, /* ditto */
.timer = xprt_rdma_timer,
.rpcbind = rpcb_getport_async, /* sunrpc/rpcb_clnt.c */
.set_port = xprt_rdma_set_port,
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 30cfc0efe699..bef5eac8ab38 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -76,11 +76,16 @@
static void rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx *sc);
static void rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt);
static void rpcrdma_mrs_destroy(struct rpcrdma_buffer *buf);
-static int rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt, bool temp);
-static void rpcrdma_dma_unmap_regbuf(struct rpcrdma_regbuf *rb);
+static struct rpcrdma_regbuf *
+rpcrdma_regbuf_alloc(size_t size, enum dma_data_direction direction,
+ gfp_t flags);
+static void rpcrdma_regbuf_dma_unmap(struct rpcrdma_regbuf *rb);
+static void rpcrdma_regbuf_free(struct rpcrdma_regbuf *rb);
static void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp);
-/* Wait for outstanding transport work to finish.
+/* Wait for outstanding transport work to finish. ib_drain_qp
+ * handles the drains in the wrong order for us, so open code
+ * them here.
*/
static void rpcrdma_xprt_drain(struct rpcrdma_xprt *r_xprt)
{
@@ -132,11 +137,6 @@ rpcrdma_wc_send(struct ib_cq *cq, struct ib_wc *wc)
/* WARNING: Only wr_cqe and status are reliable at this point */
trace_xprtrdma_wc_send(sc, wc);
- if (wc->status != IB_WC_SUCCESS && wc->status != IB_WC_WR_FLUSH_ERR)
- pr_err("rpcrdma: Send: %s (%u/0x%x)\n",
- ib_wc_status_msg(wc->status),
- wc->status, wc->vendor_err);
-
rpcrdma_sendctx_put_locked(sc);
}
@@ -174,10 +174,6 @@ rpcrdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
return;
out_flushed:
- if (wc->status != IB_WC_WR_FLUSH_ERR)
- pr_err("rpcrdma: Recv: %s (%u/0x%x)\n",
- ib_wc_status_msg(wc->status),
- wc->status, wc->vendor_err);
rpcrdma_recv_buffer_put(rep);
}
@@ -185,7 +181,6 @@ static void
rpcrdma_update_connect_private(struct rpcrdma_xprt *r_xprt,
struct rdma_conn_param *param)
{
- struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data;
const struct rpcrdma_connect_private *pmsg = param->private_data;
unsigned int rsize, wsize;
@@ -202,12 +197,13 @@ rpcrdma_update_connect_private(struct rpcrdma_xprt *r_xprt,
wsize = rpcrdma_decode_buffer_size(pmsg->cp_recv_size);
}
- if (rsize < cdata->inline_rsize)
- cdata->inline_rsize = rsize;
- if (wsize < cdata->inline_wsize)
- cdata->inline_wsize = wsize;
- dprintk("RPC: %s: max send %u, max recv %u\n",
- __func__, cdata->inline_wsize, cdata->inline_rsize);
+ if (rsize < r_xprt->rx_ep.rep_inline_recv)
+ r_xprt->rx_ep.rep_inline_recv = rsize;
+ if (wsize < r_xprt->rx_ep.rep_inline_send)
+ r_xprt->rx_ep.rep_inline_send = wsize;
+ dprintk("RPC: %s: max send %u, max recv %u\n", __func__,
+ r_xprt->rx_ep.rep_inline_send,
+ r_xprt->rx_ep.rep_inline_recv);
rpcrdma_set_max_header_sizes(r_xprt);
}
@@ -247,7 +243,7 @@ rpcrdma_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event)
case RDMA_CM_EVENT_DEVICE_REMOVAL:
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
pr_info("rpcrdma: removing device %s for %s:%s\n",
- ia->ri_device->name,
+ ia->ri_id->device->name,
rpcrdma_addrstr(r_xprt), rpcrdma_portstr(r_xprt));
#endif
set_bit(RPCRDMA_IAF_REMOVING, &ia->ri_flags);
@@ -256,7 +252,6 @@ rpcrdma_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event)
wait_for_completion(&ia->ri_remove_done);
ia->ri_id = NULL;
- ia->ri_device = NULL;
/* Return 1 to ensure the core destroys the id. */
return 1;
case RDMA_CM_EVENT_ESTABLISHED:
@@ -291,7 +286,7 @@ disconnected:
dprintk("RPC: %s: %s:%s on %s/frwr: %s\n", __func__,
rpcrdma_addrstr(r_xprt), rpcrdma_portstr(r_xprt),
- ia->ri_device->name, rdma_event_msg(event->event));
+ ia->ri_id->device->name, rdma_event_msg(event->event));
return 0;
}
@@ -370,9 +365,8 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt)
rc = PTR_ERR(ia->ri_id);
goto out_err;
}
- ia->ri_device = ia->ri_id->device;
- ia->ri_pd = ib_alloc_pd(ia->ri_device, 0);
+ ia->ri_pd = ib_alloc_pd(ia->ri_id->device, 0);
if (IS_ERR(ia->ri_pd)) {
rc = PTR_ERR(ia->ri_pd);
pr_err("rpcrdma: ib_alloc_pd() returned %d\n", rc);
@@ -381,12 +375,12 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt)
switch (xprt_rdma_memreg_strategy) {
case RPCRDMA_FRWR:
- if (frwr_is_supported(ia))
+ if (frwr_is_supported(ia->ri_id->device))
break;
/*FALLTHROUGH*/
default:
pr_err("rpcrdma: Device %s does not support memreg mode %d\n",
- ia->ri_device->name, xprt_rdma_memreg_strategy);
+ ia->ri_id->device->name, xprt_rdma_memreg_strategy);
rc = -EINVAL;
goto out_err;
}
@@ -438,11 +432,11 @@ rpcrdma_ia_remove(struct rpcrdma_ia *ia)
* mappings and MRs are gone.
*/
list_for_each_entry(rep, &buf->rb_recv_bufs, rr_list)
- rpcrdma_dma_unmap_regbuf(rep->rr_rdmabuf);
+ rpcrdma_regbuf_dma_unmap(rep->rr_rdmabuf);
list_for_each_entry(req, &buf->rb_allreqs, rl_all) {
- rpcrdma_dma_unmap_regbuf(req->rl_rdmabuf);
- rpcrdma_dma_unmap_regbuf(req->rl_sendbuf);
- rpcrdma_dma_unmap_regbuf(req->rl_recvbuf);
+ rpcrdma_regbuf_dma_unmap(req->rl_rdmabuf);
+ rpcrdma_regbuf_dma_unmap(req->rl_sendbuf);
+ rpcrdma_regbuf_dma_unmap(req->rl_recvbuf);
}
rpcrdma_mrs_destroy(buf);
ib_dealloc_pd(ia->ri_pd);
@@ -468,7 +462,6 @@ rpcrdma_ia_close(struct rpcrdma_ia *ia)
rdma_destroy_id(ia->ri_id);
}
ia->ri_id = NULL;
- ia->ri_device = NULL;
/* If the pd is still busy, xprtrdma missed freeing a resource */
if (ia->ri_pd && !IS_ERR(ia->ri_pd))
@@ -476,19 +469,26 @@ rpcrdma_ia_close(struct rpcrdma_ia *ia)
ia->ri_pd = NULL;
}
-/*
- * Create unconnected endpoint.
+/**
+ * rpcrdma_ep_create - Create unconnected endpoint
+ * @r_xprt: transport to instantiate
+ *
+ * Returns zero on success, or a negative errno.
*/
-int
-rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
- struct rpcrdma_create_data_internal *cdata)
+int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt)
{
+ struct rpcrdma_ep *ep = &r_xprt->rx_ep;
+ struct rpcrdma_ia *ia = &r_xprt->rx_ia;
struct rpcrdma_connect_private *pmsg = &ep->rep_cm_private;
struct ib_cq *sendcq, *recvcq;
unsigned int max_sge;
int rc;
- max_sge = min_t(unsigned int, ia->ri_device->attrs.max_send_sge,
+ ep->rep_max_requests = xprt_rdma_slot_table_entries;
+ ep->rep_inline_send = xprt_rdma_max_inline_write;
+ ep->rep_inline_recv = xprt_rdma_max_inline_read;
+
+ max_sge = min_t(unsigned int, ia->ri_id->device->attrs.max_send_sge,
RPCRDMA_MAX_SEND_SGES);
if (max_sge < RPCRDMA_MIN_SEND_SGES) {
pr_warn("rpcrdma: HCA provides only %d send SGEs\n", max_sge);
@@ -496,7 +496,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
}
ia->ri_max_send_sges = max_sge;
- rc = frwr_open(ia, ep, cdata);
+ rc = frwr_open(ia, ep);
if (rc)
return rc;
@@ -518,23 +518,21 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
ep->rep_attr.cap.max_send_sge,
ep->rep_attr.cap.max_recv_sge);
- /* set trigger for requesting send completion */
- ep->rep_send_batch = min_t(unsigned int, RPCRDMA_MAX_SEND_BATCH,
- cdata->max_requests >> 2);
+ ep->rep_send_batch = ep->rep_max_requests >> 3;
ep->rep_send_count = ep->rep_send_batch;
init_waitqueue_head(&ep->rep_connect_wait);
ep->rep_receive_count = 0;
- sendcq = ib_alloc_cq(ia->ri_device, NULL,
+ sendcq = ib_alloc_cq(ia->ri_id->device, NULL,
ep->rep_attr.cap.max_send_wr + 1,
- ia->ri_device->num_comp_vectors > 1 ? 1 : 0,
+ ia->ri_id->device->num_comp_vectors > 1 ? 1 : 0,
IB_POLL_WORKQUEUE);
if (IS_ERR(sendcq)) {
rc = PTR_ERR(sendcq);
goto out1;
}
- recvcq = ib_alloc_cq(ia->ri_device, NULL,
+ recvcq = ib_alloc_cq(ia->ri_id->device, NULL,
ep->rep_attr.cap.max_recv_wr + 1,
0, IB_POLL_WORKQUEUE);
if (IS_ERR(recvcq)) {
@@ -552,15 +550,15 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
pmsg->cp_magic = rpcrdma_cmp_magic;
pmsg->cp_version = RPCRDMA_CMP_VERSION;
pmsg->cp_flags |= RPCRDMA_CMP_F_SND_W_INV_OK;
- pmsg->cp_send_size = rpcrdma_encode_buffer_size(cdata->inline_wsize);
- pmsg->cp_recv_size = rpcrdma_encode_buffer_size(cdata->inline_rsize);
+ pmsg->cp_send_size = rpcrdma_encode_buffer_size(ep->rep_inline_send);
+ pmsg->cp_recv_size = rpcrdma_encode_buffer_size(ep->rep_inline_recv);
ep->rep_remote_cma.private_data = pmsg;
ep->rep_remote_cma.private_data_len = sizeof(*pmsg);
/* Client offers RDMA Read but does not initiate */
ep->rep_remote_cma.initiator_depth = 0;
ep->rep_remote_cma.responder_resources =
- min_t(int, U8_MAX, ia->ri_device->attrs.max_qp_rd_atom);
+ min_t(int, U8_MAX, ia->ri_id->device->attrs.max_qp_rd_atom);
/* Limit transport retries so client can detect server
* GID changes quickly. RPC layer handles re-establishing
@@ -583,16 +581,16 @@ out1:
return rc;
}
-/*
- * rpcrdma_ep_destroy
+/**
+ * rpcrdma_ep_destroy - Disconnect and destroy endpoint.
+ * @r_xprt: transport instance to shut down
*
- * Disconnect and destroy endpoint. After this, the only
- * valid operations on the ep are to free it (if dynamically
- * allocated) or re-create it.
*/
-void
-rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
+void rpcrdma_ep_destroy(struct rpcrdma_xprt *r_xprt)
{
+ struct rpcrdma_ep *ep = &r_xprt->rx_ep;
+ struct rpcrdma_ia *ia = &r_xprt->rx_ia;
+
if (ia->ri_id && ia->ri_id->qp) {
rpcrdma_ep_disconnect(ep, ia);
rdma_destroy_qp(ia->ri_id);
@@ -622,7 +620,7 @@ rpcrdma_ep_recreate_xprt(struct rpcrdma_xprt *r_xprt,
goto out1;
rc = -ENOMEM;
- err = rpcrdma_ep_create(ep, ia, &r_xprt->rx_data);
+ err = rpcrdma_ep_create(r_xprt);
if (err) {
pr_err("rpcrdma: rpcrdma_ep_create returned %d\n", err);
goto out2;
@@ -639,7 +637,7 @@ rpcrdma_ep_recreate_xprt(struct rpcrdma_xprt *r_xprt,
return 0;
out3:
- rpcrdma_ep_destroy(ep, ia);
+ rpcrdma_ep_destroy(r_xprt);
out2:
rpcrdma_ia_close(ia);
out1:
@@ -672,7 +670,7 @@ rpcrdma_ep_reconnect(struct rpcrdma_xprt *r_xprt, struct rpcrdma_ep *ep,
*/
old = id;
rc = -ENETUNREACH;
- if (ia->ri_device != id->device) {
+ if (ia->ri_id->device != id->device) {
pr_err("rpcrdma: can't reconnect on different device!\n");
goto out_destroy;
}
@@ -796,8 +794,8 @@ rpcrdma_ep_disconnect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
*/
/* rpcrdma_sendctxs_destroy() assumes caller has already quiesced
- * queue activity, and ib_drain_qp has flushed all remaining Send
- * requests.
+ * queue activity, and rpcrdma_xprt_drain has flushed all remaining
+ * Send requests.
*/
static void rpcrdma_sendctxs_destroy(struct rpcrdma_buffer *buf)
{
@@ -867,20 +865,20 @@ static unsigned long rpcrdma_sendctx_next(struct rpcrdma_buffer *buf,
/**
* rpcrdma_sendctx_get_locked - Acquire a send context
- * @buf: transport buffers from which to acquire an unused context
+ * @r_xprt: controlling transport instance
*
* Returns pointer to a free send completion context; or NULL if
* the queue is empty.
*
* Usage: Called to acquire an SGE array before preparing a Send WR.
*
- * The caller serializes calls to this function (per rpcrdma_buffer),
- * and provides an effective memory barrier that flushes the new value
+ * The caller serializes calls to this function (per transport), and
+ * provides an effective memory barrier that flushes the new value
* of rb_sc_head.
*/
-struct rpcrdma_sendctx *rpcrdma_sendctx_get_locked(struct rpcrdma_buffer *buf)
+struct rpcrdma_sendctx *rpcrdma_sendctx_get_locked(struct rpcrdma_xprt *r_xprt)
{
- struct rpcrdma_xprt *r_xprt;
+ struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
struct rpcrdma_sendctx *sc;
unsigned long next_head;
@@ -905,7 +903,6 @@ out_emptyq:
* backing up. Cause the caller to pause and try again.
*/
set_bit(RPCRDMA_BUF_F_EMPTY_SCQ, &buf->rb_flags);
- r_xprt = container_of(buf, struct rpcrdma_xprt, rx_buf);
r_xprt->rx_stats.empty_sendctx_q++;
return NULL;
}
@@ -917,7 +914,7 @@ out_emptyq:
* Usage: Called from Send completion to return a sendctxt
* to the queue.
*
- * The caller serializes calls to this function (per rpcrdma_buffer).
+ * The caller serializes calls to this function (per transport).
*/
static void
rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx *sc)
@@ -925,7 +922,7 @@ rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx *sc)
struct rpcrdma_buffer *buf = &sc->sc_xprt->rx_buf;
unsigned long next_tail;
- /* Unmap SGEs of previously completed by unsignaled
+ /* Unmap SGEs of previously completed but unsignaled
* Sends by walking up the queue until @sc is found.
*/
next_tail = buf->rb_sc_tail;
@@ -933,7 +930,7 @@ rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx *sc)
next_tail = rpcrdma_sendctx_next(buf, next_tail);
/* ORDER: item must be accessed _before_ tail is updated */
- rpcrdma_unmap_sendctx(buf->rb_sc_ctxs[next_tail]);
+ rpcrdma_sendctx_unmap(buf->rb_sc_ctxs[next_tail]);
} while (buf->rb_sc_ctxs[next_tail] != sc);
@@ -996,54 +993,70 @@ rpcrdma_mr_refresh_worker(struct work_struct *work)
rpcrdma_mrs_create(r_xprt);
}
-struct rpcrdma_req *
-rpcrdma_create_req(struct rpcrdma_xprt *r_xprt)
+/**
+ * rpcrdma_req_create - Allocate an rpcrdma_req object
+ * @r_xprt: controlling r_xprt
+ * @size: initial size, in bytes, of send and receive buffers
+ * @flags: GFP flags passed to memory allocators
+ *
+ * Returns an allocated and fully initialized rpcrdma_req or NULL.
+ */
+struct rpcrdma_req *rpcrdma_req_create(struct rpcrdma_xprt *r_xprt, size_t size,
+ gfp_t flags)
{
struct rpcrdma_buffer *buffer = &r_xprt->rx_buf;
struct rpcrdma_regbuf *rb;
struct rpcrdma_req *req;
- req = kzalloc(sizeof(*req), GFP_KERNEL);
+ req = kzalloc(sizeof(*req), flags);
if (req == NULL)
- return ERR_PTR(-ENOMEM);
+ goto out1;
- rb = rpcrdma_alloc_regbuf(RPCRDMA_HDRBUF_SIZE,
- DMA_TO_DEVICE, GFP_KERNEL);
- if (IS_ERR(rb)) {
- kfree(req);
- return ERR_PTR(-ENOMEM);
- }
+ rb = rpcrdma_regbuf_alloc(RPCRDMA_HDRBUF_SIZE, DMA_TO_DEVICE, flags);
+ if (!rb)
+ goto out2;
req->rl_rdmabuf = rb;
- xdr_buf_init(&req->rl_hdrbuf, rb->rg_base, rdmab_length(rb));
+ xdr_buf_init(&req->rl_hdrbuf, rdmab_data(rb), rdmab_length(rb));
+
+ req->rl_sendbuf = rpcrdma_regbuf_alloc(size, DMA_TO_DEVICE, flags);
+ if (!req->rl_sendbuf)
+ goto out3;
+
+ req->rl_recvbuf = rpcrdma_regbuf_alloc(size, DMA_NONE, flags);
+ if (!req->rl_recvbuf)
+ goto out4;
+
req->rl_buffer = buffer;
INIT_LIST_HEAD(&req->rl_registered);
-
spin_lock(&buffer->rb_lock);
list_add(&req->rl_all, &buffer->rb_allreqs);
spin_unlock(&buffer->rb_lock);
return req;
+
+out4:
+ kfree(req->rl_sendbuf);
+out3:
+ kfree(req->rl_rdmabuf);
+out2:
+ kfree(req);
+out1:
+ return NULL;
}
-static int
-rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt, bool temp)
+static bool rpcrdma_rep_create(struct rpcrdma_xprt *r_xprt, bool temp)
{
- struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data;
struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
struct rpcrdma_rep *rep;
- int rc;
- rc = -ENOMEM;
rep = kzalloc(sizeof(*rep), GFP_KERNEL);
if (rep == NULL)
goto out;
- rep->rr_rdmabuf = rpcrdma_alloc_regbuf(cdata->inline_rsize,
+ rep->rr_rdmabuf = rpcrdma_regbuf_alloc(r_xprt->rx_ep.rep_inline_recv,
DMA_FROM_DEVICE, GFP_KERNEL);
- if (IS_ERR(rep->rr_rdmabuf)) {
- rc = PTR_ERR(rep->rr_rdmabuf);
+ if (!rep->rr_rdmabuf)
goto out_free;
- }
- xdr_buf_init(&rep->rr_hdrbuf, rep->rr_rdmabuf->rg_base,
+ xdr_buf_init(&rep->rr_hdrbuf, rdmab_data(rep->rr_rdmabuf),
rdmab_length(rep->rr_rdmabuf));
rep->rr_cqe.done = rpcrdma_wc_receive;
@@ -1058,22 +1071,27 @@ rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt, bool temp)
spin_lock(&buf->rb_lock);
list_add(&rep->rr_list, &buf->rb_recv_bufs);
spin_unlock(&buf->rb_lock);
- return 0;
+ return true;
out_free:
kfree(rep);
out:
- return rc;
+ return false;
}
-int
-rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
+/**
+ * rpcrdma_buffer_create - Create initial set of req/rep objects
+ * @r_xprt: transport instance to (re)initialize
+ *
+ * Returns zero on success, otherwise a negative errno.
+ */
+int rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
{
struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
int i, rc;
buf->rb_flags = 0;
- buf->rb_max_requests = r_xprt->rx_data.max_requests;
+ buf->rb_max_requests = r_xprt->rx_ep.rep_max_requests;
buf->rb_bc_srv_max_requests = 0;
spin_lock_init(&buf->rb_mrlock);
spin_lock_init(&buf->rb_lock);
@@ -1086,16 +1104,15 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
INIT_LIST_HEAD(&buf->rb_send_bufs);
INIT_LIST_HEAD(&buf->rb_allreqs);
+
+ rc = -ENOMEM;
for (i = 0; i < buf->rb_max_requests; i++) {
struct rpcrdma_req *req;
- req = rpcrdma_create_req(r_xprt);
- if (IS_ERR(req)) {
- dprintk("RPC: %s: request buffer %d alloc"
- " failed\n", __func__, i);
- rc = PTR_ERR(req);
+ req = rpcrdma_req_create(r_xprt, RPCRDMA_V1_DEF_INLINE_SIZE,
+ GFP_KERNEL);
+ if (!req)
goto out;
- }
list_add(&req->rl_list, &buf->rb_send_bufs);
}
@@ -1121,10 +1138,9 @@ out:
return rc;
}
-static void
-rpcrdma_destroy_rep(struct rpcrdma_rep *rep)
+static void rpcrdma_rep_destroy(struct rpcrdma_rep *rep)
{
- rpcrdma_free_regbuf(rep->rr_rdmabuf);
+ rpcrdma_regbuf_free(rep->rr_rdmabuf);
kfree(rep);
}
@@ -1140,9 +1156,9 @@ rpcrdma_req_destroy(struct rpcrdma_req *req)
{
list_del(&req->rl_all);
- rpcrdma_free_regbuf(req->rl_recvbuf);
- rpcrdma_free_regbuf(req->rl_sendbuf);
- rpcrdma_free_regbuf(req->rl_rdmabuf);
+ rpcrdma_regbuf_free(req->rl_recvbuf);
+ rpcrdma_regbuf_free(req->rl_sendbuf);
+ rpcrdma_regbuf_free(req->rl_rdmabuf);
kfree(req);
}
@@ -1180,7 +1196,7 @@ rpcrdma_mrs_destroy(struct rpcrdma_buffer *buf)
* rpcrdma_buffer_destroy - Release all hw resources
* @buf: root control block for resources
*
- * ORDERING: relies on a prior ib_drain_qp :
+ * ORDERING: relies on a prior rpcrdma_xprt_drain :
* - No more Send or Receive completions can occur
* - All MRs, reps, and reqs are returned to their free lists
*/
@@ -1202,7 +1218,7 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
rep = list_first_entry(&buf->rb_recv_bufs,
struct rpcrdma_rep, rr_list);
list_del(&rep->rr_list);
- rpcrdma_destroy_rep(rep);
+ rpcrdma_rep_destroy(rep);
}
while (!list_empty(&buf->rb_send_bufs)) {
@@ -1281,7 +1297,7 @@ rpcrdma_mr_unmap_and_put(struct rpcrdma_mr *mr)
if (mr->mr_dir != DMA_NONE) {
trace_xprtrdma_mr_unmap(mr);
- ib_dma_unmap_sg(r_xprt->rx_ia.ri_device,
+ ib_dma_unmap_sg(r_xprt->rx_ia.ri_id->device,
mr->mr_sg, mr->mr_nents, mr->mr_dir);
mr->mr_dir = DMA_NONE;
}
@@ -1331,7 +1347,7 @@ rpcrdma_buffer_put(struct rpcrdma_req *req)
}
spin_unlock(&buffers->rb_lock);
if (rep)
- rpcrdma_destroy_rep(rep);
+ rpcrdma_rep_destroy(rep);
}
/*
@@ -1348,69 +1364,90 @@ rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep)
list_add(&rep->rr_list, &buffers->rb_recv_bufs);
spin_unlock(&buffers->rb_lock);
} else {
- rpcrdma_destroy_rep(rep);
+ rpcrdma_rep_destroy(rep);
}
}
-/**
- * rpcrdma_alloc_regbuf - allocate and DMA-map memory for SEND/RECV buffers
- * @size: size of buffer to be allocated, in bytes
- * @direction: direction of data movement
- * @flags: GFP flags
- *
- * Returns an ERR_PTR, or a pointer to a regbuf, a buffer that
- * can be persistently DMA-mapped for I/O.
+/* Returns a pointer to a rpcrdma_regbuf object, or NULL.
*
* xprtrdma uses a regbuf for posting an outgoing RDMA SEND, or for
* receiving the payload of RDMA RECV operations. During Long Calls
* or Replies they may be registered externally via frwr_map.
*/
-struct rpcrdma_regbuf *
-rpcrdma_alloc_regbuf(size_t size, enum dma_data_direction direction,
+static struct rpcrdma_regbuf *
+rpcrdma_regbuf_alloc(size_t size, enum dma_data_direction direction,
gfp_t flags)
{
struct rpcrdma_regbuf *rb;
- rb = kmalloc(sizeof(*rb) + size, flags);
- if (rb == NULL)
- return ERR_PTR(-ENOMEM);
+ rb = kmalloc(sizeof(*rb), flags);
+ if (!rb)
+ return NULL;
+ rb->rg_data = kmalloc(size, flags);
+ if (!rb->rg_data) {
+ kfree(rb);
+ return NULL;
+ }
rb->rg_device = NULL;
rb->rg_direction = direction;
rb->rg_iov.length = size;
-
return rb;
}
/**
- * __rpcrdma_map_regbuf - DMA-map a regbuf
- * @ia: controlling rpcrdma_ia
+ * rpcrdma_regbuf_realloc - re-allocate a SEND/RECV buffer
+ * @rb: regbuf to reallocate
+ * @size: size of buffer to be allocated, in bytes
+ * @flags: GFP flags
+ *
+ * Returns true if reallocation was successful. If false is
+ * returned, @rb is left untouched.
+ */
+bool rpcrdma_regbuf_realloc(struct rpcrdma_regbuf *rb, size_t size, gfp_t flags)
+{
+ void *buf;
+
+ buf = kmalloc(size, flags);
+ if (!buf)
+ return false;
+
+ rpcrdma_regbuf_dma_unmap(rb);
+ kfree(rb->rg_data);
+
+ rb->rg_data = buf;
+ rb->rg_iov.length = size;
+ return true;
+}
+
+/**
+ * __rpcrdma_regbuf_dma_map - DMA-map a regbuf
+ * @r_xprt: controlling transport instance
* @rb: regbuf to be mapped
+ *
+ * Returns true if the buffer is now DMA mapped to @r_xprt's device
*/
-bool
-__rpcrdma_dma_map_regbuf(struct rpcrdma_ia *ia, struct rpcrdma_regbuf *rb)
+bool __rpcrdma_regbuf_dma_map(struct rpcrdma_xprt *r_xprt,
+ struct rpcrdma_regbuf *rb)
{
- struct ib_device *device = ia->ri_device;
+ struct ib_device *device = r_xprt->rx_ia.ri_id->device;
if (rb->rg_direction == DMA_NONE)
return false;
- rb->rg_iov.addr = ib_dma_map_single(device,
- (void *)rb->rg_base,
- rdmab_length(rb),
- rb->rg_direction);
+ rb->rg_iov.addr = ib_dma_map_single(device, rdmab_data(rb),
+ rdmab_length(rb), rb->rg_direction);
if (ib_dma_mapping_error(device, rdmab_addr(rb))) {
trace_xprtrdma_dma_maperr(rdmab_addr(rb));
return false;
}
rb->rg_device = device;
- rb->rg_iov.lkey = ia->ri_pd->local_dma_lkey;
+ rb->rg_iov.lkey = r_xprt->rx_ia.ri_pd->local_dma_lkey;
return true;
}
-static void
-rpcrdma_dma_unmap_regbuf(struct rpcrdma_regbuf *rb)
+static void rpcrdma_regbuf_dma_unmap(struct rpcrdma_regbuf *rb)
{
if (!rb)
return;
@@ -1418,19 +1455,16 @@ rpcrdma_dma_unmap_regbuf(struct rpcrdma_regbuf *rb)
if (!rpcrdma_regbuf_is_mapped(rb))
return;
- ib_dma_unmap_single(rb->rg_device, rdmab_addr(rb),
- rdmab_length(rb), rb->rg_direction);
+ ib_dma_unmap_single(rb->rg_device, rdmab_addr(rb), rdmab_length(rb),
+ rb->rg_direction);
rb->rg_device = NULL;
}
-/**
- * rpcrdma_free_regbuf - deregister and free registered buffer
- * @rb: regbuf to be deregistered and freed
- */
-void
-rpcrdma_free_regbuf(struct rpcrdma_regbuf *rb)
+static void rpcrdma_regbuf_free(struct rpcrdma_regbuf *rb)
{
- rpcrdma_dma_unmap_regbuf(rb);
+ rpcrdma_regbuf_dma_unmap(rb);
+ if (rb)
+ kfree(rb->rg_data);
kfree(rb);
}
@@ -1497,17 +1531,15 @@ rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp)
list_del(&rep->rr_list);
spin_unlock(&buf->rb_lock);
if (!rep) {
- if (rpcrdma_create_rep(r_xprt, temp))
+ if (!rpcrdma_rep_create(r_xprt, temp))
break;
continue;
}
rb = rep->rr_rdmabuf;
- if (!rpcrdma_regbuf_is_mapped(rb)) {
- if (!__rpcrdma_dma_map_regbuf(&r_xprt->rx_ia, rb)) {
- rpcrdma_recv_buffer_put(rep);
- break;
- }
+ if (!rpcrdma_regbuf_dma_map(r_xprt, rb)) {
+ rpcrdma_recv_buffer_put(rep);
+ break;
}
trace_xprtrdma_post_recv(rep->rr_recv_wr.wr_cqe);
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index 10f6593e1a6a..d1e0749bcbc4 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -66,20 +66,17 @@
* Interface Adapter -- one per transport instance
*/
struct rpcrdma_ia {
- struct ib_device *ri_device;
struct rdma_cm_id *ri_id;
struct ib_pd *ri_pd;
- struct completion ri_done;
- struct completion ri_remove_done;
int ri_async_rc;
unsigned int ri_max_segs;
unsigned int ri_max_frwr_depth;
- unsigned int ri_max_inline_write;
- unsigned int ri_max_inline_read;
unsigned int ri_max_send_sges;
bool ri_implicit_roundup;
enum ib_mr_type ri_mrtype;
unsigned long ri_flags;
+ struct completion ri_done;
+ struct completion ri_remove_done;
};
enum {
@@ -93,22 +90,29 @@ enum {
struct rpcrdma_ep {
unsigned int rep_send_count;
unsigned int rep_send_batch;
+ unsigned int rep_max_inline_send;
+ unsigned int rep_max_inline_recv;
int rep_connected;
struct ib_qp_init_attr rep_attr;
wait_queue_head_t rep_connect_wait;
struct rpcrdma_connect_private rep_cm_private;
struct rdma_conn_param rep_remote_cma;
+ unsigned int rep_max_requests; /* set by /proc */
+ unsigned int rep_inline_send; /* negotiated */
+ unsigned int rep_inline_recv; /* negotiated */
int rep_receive_count;
};
/* Pre-allocate extra Work Requests for handling backward receives
* and sends. This is a fixed value because the Work Queues are
- * allocated when the forward channel is set up.
+ * allocated when the forward channel is set up, long before the
+ * backchannel is provisioned. This value is two times
+ * NFS4_DEF_CB_SLOT_TABLE_SIZE.
*/
#if defined(CONFIG_SUNRPC_BACKCHANNEL)
-#define RPCRDMA_BACKWARD_WRS (8)
+#define RPCRDMA_BACKWARD_WRS (32)
#else
-#define RPCRDMA_BACKWARD_WRS (0)
+#define RPCRDMA_BACKWARD_WRS (0)
#endif
/* Registered buffer -- registered kmalloc'd memory for RDMA SEND/RECV
@@ -121,33 +125,34 @@ struct rpcrdma_regbuf {
struct ib_sge rg_iov;
struct ib_device *rg_device;
enum dma_data_direction rg_direction;
- __be32 rg_base[0] __attribute__ ((aligned(256)));
+ void *rg_data;
};
-static inline u64
-rdmab_addr(struct rpcrdma_regbuf *rb)
+static inline u64 rdmab_addr(struct rpcrdma_regbuf *rb)
{
return rb->rg_iov.addr;
}
-static inline u32
-rdmab_length(struct rpcrdma_regbuf *rb)
+static inline u32 rdmab_length(struct rpcrdma_regbuf *rb)
{
return rb->rg_iov.length;
}
-static inline u32
-rdmab_lkey(struct rpcrdma_regbuf *rb)
+static inline u32 rdmab_lkey(struct rpcrdma_regbuf *rb)
{
return rb->rg_iov.lkey;
}
-static inline struct ib_device *
-rdmab_device(struct rpcrdma_regbuf *rb)
+static inline struct ib_device *rdmab_device(struct rpcrdma_regbuf *rb)
{
return rb->rg_device;
}
+static inline void *rdmab_data(const struct rpcrdma_regbuf *rb)
+{
+ return rb->rg_data;
+}
+
#define RPCRDMA_DEF_GFP (GFP_NOIO | __GFP_NOWARN)
/* To ensure a transport can always make forward progress,
@@ -222,34 +227,18 @@ struct rpcrdma_xprt;
struct rpcrdma_sendctx {
struct ib_send_wr sc_wr;
struct ib_cqe sc_cqe;
+ struct ib_device *sc_device;
struct rpcrdma_xprt *sc_xprt;
struct rpcrdma_req *sc_req;
unsigned int sc_unmap_count;
struct ib_sge sc_sges[];
};
-/* Limit the number of SGEs that can be unmapped during one
- * Send completion. This caps the amount of work a single
- * completion can do before returning to the provider.
- *
- * Setting this to zero disables Send completion batching.
- */
-enum {
- RPCRDMA_MAX_SEND_BATCH = 7,
-};
-
/*
* struct rpcrdma_mr - external memory region metadata
*
* An external memory region is any buffer or page that is registered
* on the fly (ie, not pre-registered).
- *
- * Each rpcrdma_buffer has a list of free MWs anchored in rb_mrs. During
- * call_allocate, rpcrdma_buffer_get() assigns one to each segment in
- * an rpcrdma_req. Then rpcrdma_register_external() grabs these to keep
- * track of registration metadata while each RPC is pending.
- * rpcrdma_deregister_external() uses this metadata to unmap and
- * release these resources when an RPC is complete.
*/
enum rpcrdma_frwr_state {
FRWR_IS_INVALID, /* ready to be used */
@@ -419,20 +408,6 @@ enum {
};
/*
- * Internal structure for transport instance creation. This
- * exists primarily for modularity.
- *
- * This data should be set with mount options
- */
-struct rpcrdma_create_data_internal {
- unsigned int max_requests; /* max requests (slots) in flight */
- unsigned int rsize; /* mount rsize - max read hdr+data */
- unsigned int wsize; /* mount wsize - max write hdr+data */
- unsigned int inline_rsize; /* max non-rdma read data payload */
- unsigned int inline_wsize; /* max non-rdma write data payload */
-};
-
-/*
* Statistics for RPCRDMA
*/
struct rpcrdma_stats {
@@ -476,13 +451,11 @@ struct rpcrdma_xprt {
struct rpcrdma_ia rx_ia;
struct rpcrdma_ep rx_ep;
struct rpcrdma_buffer rx_buf;
- struct rpcrdma_create_data_internal rx_data;
struct delayed_work rx_connect_worker;
struct rpcrdma_stats rx_stats;
};
#define rpcx_to_rdmax(x) container_of(x, struct rpcrdma_xprt, rx_xprt)
-#define rpcx_to_rdmad(x) (rpcx_to_rdmax(x)->rx_data)
static inline const char *
rpcrdma_addrstr(const struct rpcrdma_xprt *r_xprt)
@@ -516,9 +489,8 @@ void rpcrdma_ia_close(struct rpcrdma_ia *);
/*
* Endpoint calls - xprtrdma/verbs.c
*/
-int rpcrdma_ep_create(struct rpcrdma_ep *, struct rpcrdma_ia *,
- struct rpcrdma_create_data_internal *);
-void rpcrdma_ep_destroy(struct rpcrdma_ep *, struct rpcrdma_ia *);
+int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt);
+void rpcrdma_ep_destroy(struct rpcrdma_xprt *r_xprt);
int rpcrdma_ep_connect(struct rpcrdma_ep *, struct rpcrdma_ia *);
void rpcrdma_ep_disconnect(struct rpcrdma_ep *, struct rpcrdma_ia *);
@@ -528,11 +500,12 @@ int rpcrdma_ep_post(struct rpcrdma_ia *, struct rpcrdma_ep *,
/*
* Buffer calls - xprtrdma/verbs.c
*/
-struct rpcrdma_req *rpcrdma_create_req(struct rpcrdma_xprt *);
+struct rpcrdma_req *rpcrdma_req_create(struct rpcrdma_xprt *r_xprt, size_t size,
+ gfp_t flags);
void rpcrdma_req_destroy(struct rpcrdma_req *req);
int rpcrdma_buffer_create(struct rpcrdma_xprt *);
void rpcrdma_buffer_destroy(struct rpcrdma_buffer *);
-struct rpcrdma_sendctx *rpcrdma_sendctx_get_locked(struct rpcrdma_buffer *buf);
+struct rpcrdma_sendctx *rpcrdma_sendctx_get_locked(struct rpcrdma_xprt *r_xprt);
struct rpcrdma_mr *rpcrdma_mr_get(struct rpcrdma_xprt *r_xprt);
void rpcrdma_mr_put(struct rpcrdma_mr *mr);
@@ -548,23 +521,34 @@ struct rpcrdma_req *rpcrdma_buffer_get(struct rpcrdma_buffer *);
void rpcrdma_buffer_put(struct rpcrdma_req *);
void rpcrdma_recv_buffer_put(struct rpcrdma_rep *);
-struct rpcrdma_regbuf *rpcrdma_alloc_regbuf(size_t, enum dma_data_direction,
- gfp_t);
-bool __rpcrdma_dma_map_regbuf(struct rpcrdma_ia *, struct rpcrdma_regbuf *);
-void rpcrdma_free_regbuf(struct rpcrdma_regbuf *);
+bool rpcrdma_regbuf_realloc(struct rpcrdma_regbuf *rb, size_t size,
+ gfp_t flags);
+bool __rpcrdma_regbuf_dma_map(struct rpcrdma_xprt *r_xprt,
+ struct rpcrdma_regbuf *rb);
-static inline bool
-rpcrdma_regbuf_is_mapped(struct rpcrdma_regbuf *rb)
+/**
+ * rpcrdma_regbuf_is_mapped - check if buffer is DMA mapped
+ *
+ * Returns true if the buffer is now mapped to rb->rg_device.
+ */
+static inline bool rpcrdma_regbuf_is_mapped(struct rpcrdma_regbuf *rb)
{
return rb->rg_device != NULL;
}
-static inline bool
-rpcrdma_dma_map_regbuf(struct rpcrdma_ia *ia, struct rpcrdma_regbuf *rb)
+/**
+ * rpcrdma_regbuf_dma_map - DMA-map a regbuf
+ * @r_xprt: controlling transport instance
+ * @rb: regbuf to be mapped
+ *
+ * Returns true if the buffer is currently DMA mapped.
+ */
+static inline bool rpcrdma_regbuf_dma_map(struct rpcrdma_xprt *r_xprt,
+ struct rpcrdma_regbuf *rb)
{
if (likely(rpcrdma_regbuf_is_mapped(rb)))
return true;
- return __rpcrdma_dma_map_regbuf(ia, rb);
+ return __rpcrdma_regbuf_dma_map(r_xprt, rb);
}
/*
@@ -579,9 +563,8 @@ rpcrdma_data_dir(bool writing)
/* Memory registration calls xprtrdma/frwr_ops.c
*/
-bool frwr_is_supported(struct rpcrdma_ia *);
-int frwr_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
- struct rpcrdma_create_data_internal *cdata);
+bool frwr_is_supported(struct ib_device *device);
+int frwr_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep);
int frwr_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr);
void frwr_release_mr(struct rpcrdma_mr *mr);
size_t frwr_maxpages(struct rpcrdma_xprt *r_xprt);
@@ -610,7 +593,7 @@ int rpcrdma_prepare_send_sges(struct rpcrdma_xprt *r_xprt,
struct rpcrdma_req *req, u32 hdrlen,
struct xdr_buf *xdr,
enum rpcrdma_chunktype rtype);
-void rpcrdma_unmap_sendctx(struct rpcrdma_sendctx *sc);
+void rpcrdma_sendctx_unmap(struct rpcrdma_sendctx *sc);
int rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst);
void rpcrdma_set_max_header_sizes(struct rpcrdma_xprt *);
void rpcrdma_complete_rqst(struct rpcrdma_rep *rep);
@@ -627,7 +610,9 @@ static inline void rpcrdma_set_xdrlen(struct xdr_buf *xdr, size_t len)
/* RPC/RDMA module init - xprtrdma/transport.c
*/
+extern unsigned int xprt_rdma_slot_table_entries;
extern unsigned int xprt_rdma_max_inline_read;
+extern unsigned int xprt_rdma_max_inline_write;
void xprt_rdma_format_addresses(struct rpc_xprt *xprt, struct sockaddr *sap);
void xprt_rdma_free_addresses(struct rpc_xprt *xprt);
void xprt_rdma_close(struct rpc_xprt *xprt);
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 732d4b57411a..c69951ed2ebc 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -2017,6 +2017,7 @@ static void xs_local_connect(struct rpc_xprt *xprt, struct rpc_task *task)
* we'll need to figure out how to pass a namespace to
* connect.
*/
+ task->tk_rpc_status = -ENOTCONN;
rpc_exit(task, -ENOTCONN);
return;
}
@@ -2690,7 +2691,7 @@ static const struct rpc_xprt_ops xs_local_ops = {
.buf_free = rpc_free,
.prepare_request = xs_stream_prepare_request,
.send_request = xs_local_send_request,
- .set_retrans_timeout = xprt_set_retrans_timeout_def,
+ .wait_for_reply_request = xprt_wait_for_reply_request_def,
.close = xs_close,
.destroy = xs_destroy,
.print_stats = xs_local_print_stats,
@@ -2710,7 +2711,7 @@ static const struct rpc_xprt_ops xs_udp_ops = {
.buf_alloc = rpc_malloc,
.buf_free = rpc_free,
.send_request = xs_udp_send_request,
- .set_retrans_timeout = xprt_set_retrans_timeout_rtt,
+ .wait_for_reply_request = xprt_wait_for_reply_request_rtt,
.timer = xs_udp_timer,
.release_request = xprt_release_rqst_cong,
.close = xs_close,
@@ -2733,7 +2734,7 @@ static const struct rpc_xprt_ops xs_tcp_ops = {
.buf_free = rpc_free,
.prepare_request = xs_stream_prepare_request,
.send_request = xs_tcp_send_request,
- .set_retrans_timeout = xprt_set_retrans_timeout_def,
+ .wait_for_reply_request = xprt_wait_for_reply_request_def,
.close = xs_tcp_shutdown,
.destroy = xs_destroy,
.set_connect_timeout = xs_tcp_set_connect_timeout,
@@ -2761,7 +2762,7 @@ static const struct rpc_xprt_ops bc_tcp_ops = {
.buf_alloc = bc_malloc,
.buf_free = bc_free,
.send_request = bc_send_request,
- .set_retrans_timeout = xprt_set_retrans_timeout_def,
+ .wait_for_reply_request = xprt_wait_for_reply_request_def,
.close = bc_close,
.destroy = bc_destroy,
.print_stats = xs_tcp_print_stats,