diff options
author | Chuck Lever <chuck.lever@oracle.com> | 2016-06-29 13:53:43 -0400 |
---|---|---|
committer | Anna Schumaker <Anna.Schumaker@Netapp.com> | 2016-07-11 15:50:43 -0400 |
commit | 7a89f9c626e337ba6528d8a2829b228c933877fb (patch) | |
tree | fb37fd67ab882087a162d6ae0d1966909f7ee4e6 /net/sunrpc/xprtrdma/frwr_ops.c | |
parent | 3d4cf35bd4fab56c3aa0ec4323fccb24970aaf79 (diff) | |
download | lwn-7a89f9c626e337ba6528d8a2829b228c933877fb.tar.gz lwn-7a89f9c626e337ba6528d8a2829b228c933877fb.zip |
xprtrdma: Honor ->send_request API contract
Commit c93c62231cf5 ("xprtrdma: Disconnect on registration failure")
added a disconnect for some RPC marshaling failures. This is needed
only in a handful of cases, but it was triggering for simple stuff
like temporary resource shortages. Try to straighten this out.
Fix up the lower layers so they don't return -ENOMEM or other error
codes that the RPC client's FSM doesn't explicitly recognize.
Also fix up the places in the send_request path that do want a
disconnect. For example, when ib_post_send or ib_post_recv fail,
this is a sign that there is a send or receive queue resource
miscalculation. That should be rare, and is a sign of a software
bug. But xprtrdma can recover: disconnect to reset the transport and
start over.
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Tested-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
Diffstat (limited to 'net/sunrpc/xprtrdma/frwr_ops.c')
-rw-r--r-- | net/sunrpc/xprtrdma/frwr_ops.c | 13 |
1 files changed, 7 insertions, 6 deletions
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c index fc2826b3518c..d7613db9185d 100644 --- a/net/sunrpc/xprtrdma/frwr_ops.c +++ b/net/sunrpc/xprtrdma/frwr_ops.c @@ -382,7 +382,7 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, rpcrdma_defer_mr_recovery(mw); mw = rpcrdma_get_mw(r_xprt); if (!mw) - return -ENOMEM; + return -ENOBUFS; } while (mw->frmr.fr_state != FRMR_IS_INVALID); frmr = &mw->frmr; frmr->fr_state = FRMR_IS_VALID; @@ -456,18 +456,18 @@ out_dmamap_err: pr_err("rpcrdma: failed to dma map sg %p sg_nents %u\n", mw->mw_sg, mw->mw_nents); rpcrdma_defer_mr_recovery(mw); - return -ENOMEM; + return -EIO; out_mapmr_err: pr_err("rpcrdma: failed to map mr %p (%u/%u)\n", frmr->fr_mr, n, mw->mw_nents); - rc = n < 0 ? n : -EIO; rpcrdma_defer_mr_recovery(mw); - return rc; + return -EIO; out_senderr: + pr_err("rpcrdma: FRMR registration ib_post_send returned %i\n", rc); rpcrdma_defer_mr_recovery(mw); - return rc; + return -ENOTCONN; } static struct ib_send_wr * @@ -569,7 +569,8 @@ unmap: return; reset_mrs: - pr_warn("%s: ib_post_send failed %i\n", __func__, rc); + pr_err("rpcrdma: FRMR invalidate ib_post_send returned %i\n", rc); + rdma_disconnect(ia->ri_id); /* Find and reset the MRs in the LOCAL_INV WRs that did not * get posted. This is synchronous, and slow. |