diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2019-07-18 14:32:33 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2019-07-18 14:32:33 -0700 |
commit | 6860c981b9672324cb53b883cfda8d2ea1445ff1 (patch) | |
tree | d97a47712479bc2f886012d7f2da8b00ac504f3a /net/sunrpc/xprtrdma/verbs.c | |
parent | 0570bc8b7c9b41deba6f61ac218922e7168ad648 (diff) | |
parent | d5b9216fd5114be4ed98ca9c1ecc5f164cd8cf5e (diff) | |
download | lwn-6860c981b9672324cb53b883cfda8d2ea1445ff1.tar.gz lwn-6860c981b9672324cb53b883cfda8d2ea1445ff1.zip |
Merge tag 'nfs-for-5.3-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs
Pull NFS client updates from Trond Myklebust:
"Highlights include:
Stable fixes:
- SUNRPC: Ensure bvecs are re-synced when we re-encode the RPC
request
- Fix an Oops in ff_layout_track_ds_error due to a PTR_ERR()
dereference
- Revert buggy NFS readdirplus optimisation
- NFSv4: Handle the special Linux file open access mode
- pnfs: Fix a problem where we gratuitously start doing I/O through
the MDS
Features:
- Allow NFS client to set up multiple TCP connections to the server
using a new 'nconnect=X' mount option. Queue length is used to
balance load.
- Enhance statistics reporting to report on all transports when using
multiple connections.
- Speed up SUNRPC by removing bh-safe spinlocks
- Add a mechanism to allow NFSv4 to request that containers set a
unique per-host identifier for when the hostname is not set.
- Ensure NFSv4 updates the lease_time after a clientid update
Bugfixes and cleanup:
- Fix use-after-free in rpcrdma_post_recvs
- Fix a memory leak when nfs_match_client() is interrupted
- Fix buggy file access checking in NFSv4 open for execute
- disable unsupported client side deduplication
- Fix spurious client disconnections
- Fix occasional RDMA transport deadlock
- Various RDMA cleanups
- Various tracepoint fixes
- Fix the TCP callback channel to guarantee the server can actually
send the number of callback requests that was negotiated at mount
time"
* tag 'nfs-for-5.3-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs: (68 commits)
pnfs/flexfiles: Add tracepoints for detecting pnfs fallback to MDS
pnfs: Fix a problem where we gratuitously start doing I/O through the MDS
SUNRPC: Optimise transport balancing code
SUNRPC: Ensure the bvecs are reset when we re-encode the RPC request
pnfs/flexfiles: Fix PTR_ERR() dereferences in ff_layout_track_ds_error
NFSv4: Don't use the zero stateid with layoutget
SUNRPC: Fix up backchannel slot table accounting
SUNRPC: Fix initialisation of struct rpc_xprt_switch
SUNRPC: Skip zero-refcount transports
SUNRPC: Replace division by multiplication in calculation of queue length
NFSv4: Validate the stateid before applying it to state recovery
nfs4.0: Refetch lease_time after clientid update
nfs4: Rename nfs41_setup_state_renewal
nfs4: Make nfs4_proc_get_lease_time available for nfs4.0
nfs: Fix copy-and-paste error in debug message
NFS: Replace 16 seq_printf() calls by seq_puts()
NFS: Use seq_putc() in nfs_show_stats()
Revert "NFS: readdirplus optimization by cache mechanism" (memleak)
SUNRPC: Fix transport accounting when caller specifies an rpc_xprt
NFS: Record task, client ID, and XID in xdr_status trace points
...
Diffstat (limited to 'net/sunrpc/xprtrdma/verbs.c')
-rw-r--r-- | net/sunrpc/xprtrdma/verbs.c | 115 |
1 files changed, 51 insertions, 64 deletions
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 84bb37924540..805b1f35e1ca 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -89,14 +89,12 @@ static void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp); */ static void rpcrdma_xprt_drain(struct rpcrdma_xprt *r_xprt) { - struct rpcrdma_buffer *buf = &r_xprt->rx_buf; struct rpcrdma_ia *ia = &r_xprt->rx_ia; /* Flush Receives, then wait for deferred Reply work * to complete. */ ib_drain_rq(ia->ri_id->qp); - drain_workqueue(buf->rb_completion_wq); /* Deferred Reply processing might have scheduled * local invalidations. @@ -901,7 +899,7 @@ out_emptyq: * completions recently. This is a sign the Send Queue is * backing up. Cause the caller to pause and try again. */ - set_bit(RPCRDMA_BUF_F_EMPTY_SCQ, &buf->rb_flags); + xprt_wait_for_buffer_space(&r_xprt->rx_xprt); r_xprt->rx_stats.empty_sendctx_q++; return NULL; } @@ -936,10 +934,7 @@ rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx *sc) /* Paired with READ_ONCE */ smp_store_release(&buf->rb_sc_tail, next_tail); - if (test_and_clear_bit(RPCRDMA_BUF_F_EMPTY_SCQ, &buf->rb_flags)) { - smp_mb__after_atomic(); - xprt_write_space(&sc->sc_xprt->rx_xprt); - } + xprt_write_space(&sc->sc_xprt->rx_xprt); } static void @@ -977,8 +972,6 @@ rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt) r_xprt->rx_stats.mrs_allocated += count; spin_unlock(&buf->rb_mrlock); trace_xprtrdma_createmrs(r_xprt, count); - - xprt_write_space(&r_xprt->rx_xprt); } static void @@ -990,6 +983,7 @@ rpcrdma_mr_refresh_worker(struct work_struct *work) rx_buf); rpcrdma_mrs_create(r_xprt); + xprt_write_space(&r_xprt->rx_xprt); } /** @@ -1025,7 +1019,6 @@ struct rpcrdma_req *rpcrdma_req_create(struct rpcrdma_xprt *r_xprt, size_t size, if (!req->rl_recvbuf) goto out4; - req->rl_buffer = buffer; INIT_LIST_HEAD(&req->rl_registered); spin_lock(&buffer->rb_lock); list_add(&req->rl_all, &buffer->rb_allreqs); @@ -1042,9 +1035,9 @@ out1: return NULL; } -static bool rpcrdma_rep_create(struct rpcrdma_xprt *r_xprt, bool temp) +static struct rpcrdma_rep *rpcrdma_rep_create(struct rpcrdma_xprt *r_xprt, + bool temp) { - struct rpcrdma_buffer *buf = &r_xprt->rx_buf; struct rpcrdma_rep *rep; rep = kzalloc(sizeof(*rep), GFP_KERNEL); @@ -1055,27 +1048,22 @@ static bool rpcrdma_rep_create(struct rpcrdma_xprt *r_xprt, bool temp) DMA_FROM_DEVICE, GFP_KERNEL); if (!rep->rr_rdmabuf) goto out_free; + xdr_buf_init(&rep->rr_hdrbuf, rdmab_data(rep->rr_rdmabuf), rdmab_length(rep->rr_rdmabuf)); - rep->rr_cqe.done = rpcrdma_wc_receive; rep->rr_rxprt = r_xprt; - INIT_WORK(&rep->rr_work, rpcrdma_deferred_completion); rep->rr_recv_wr.next = NULL; rep->rr_recv_wr.wr_cqe = &rep->rr_cqe; rep->rr_recv_wr.sg_list = &rep->rr_rdmabuf->rg_iov; rep->rr_recv_wr.num_sge = 1; rep->rr_temp = temp; - - spin_lock(&buf->rb_lock); - list_add(&rep->rr_list, &buf->rb_recv_bufs); - spin_unlock(&buf->rb_lock); - return true; + return rep; out_free: kfree(rep); out: - return false; + return NULL; } /** @@ -1089,7 +1077,6 @@ int rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt) struct rpcrdma_buffer *buf = &r_xprt->rx_buf; int i, rc; - buf->rb_flags = 0; buf->rb_max_requests = r_xprt->rx_ep.rep_max_requests; buf->rb_bc_srv_max_requests = 0; spin_lock_init(&buf->rb_mrlock); @@ -1122,15 +1109,6 @@ int rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt) if (rc) goto out; - buf->rb_completion_wq = alloc_workqueue("rpcrdma-%s", - WQ_MEM_RECLAIM | WQ_HIGHPRI, - 0, - r_xprt->rx_xprt.address_strings[RPC_DISPLAY_ADDR]); - if (!buf->rb_completion_wq) { - rc = -ENOMEM; - goto out; - } - return 0; out: rpcrdma_buffer_destroy(buf); @@ -1204,11 +1182,6 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) { cancel_delayed_work_sync(&buf->rb_refresh_worker); - if (buf->rb_completion_wq) { - destroy_workqueue(buf->rb_completion_wq); - buf->rb_completion_wq = NULL; - } - rpcrdma_sendctxs_destroy(buf); while (!list_empty(&buf->rb_recv_bufs)) { @@ -1325,13 +1298,12 @@ rpcrdma_buffer_get(struct rpcrdma_buffer *buffers) /** * rpcrdma_buffer_put - Put request/reply buffers back into pool + * @buffers: buffer pool * @req: object to return * */ -void -rpcrdma_buffer_put(struct rpcrdma_req *req) +void rpcrdma_buffer_put(struct rpcrdma_buffer *buffers, struct rpcrdma_req *req) { - struct rpcrdma_buffer *buffers = req->rl_buffer; struct rpcrdma_rep *rep = req->rl_reply; req->rl_reply = NULL; @@ -1484,8 +1456,7 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia, struct ib_send_wr *send_wr = &req->rl_sendctx->sc_wr; int rc; - if (!ep->rep_send_count || - test_bit(RPCRDMA_REQ_F_TX_RESOURCES, &req->rl_flags)) { + if (!ep->rep_send_count || kref_read(&req->rl_kref) > 1) { send_wr->send_flags |= IB_SEND_SIGNALED; ep->rep_send_count = ep->rep_send_batch; } else { @@ -1505,11 +1476,13 @@ rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp) { struct rpcrdma_buffer *buf = &r_xprt->rx_buf; struct rpcrdma_ep *ep = &r_xprt->rx_ep; - struct ib_recv_wr *wr, *bad_wr; + struct ib_recv_wr *i, *wr, *bad_wr; + struct rpcrdma_rep *rep; int needed, count, rc; rc = 0; count = 0; + needed = buf->rb_credits + (buf->rb_bc_srv_max_requests << 1); if (ep->rep_receive_count > needed) goto out; @@ -1517,51 +1490,65 @@ rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp) if (!temp) needed += RPCRDMA_MAX_RECV_BATCH; - count = 0; + /* fast path: all needed reps can be found on the free list */ wr = NULL; + spin_lock(&buf->rb_lock); while (needed) { - struct rpcrdma_regbuf *rb; - struct rpcrdma_rep *rep; - - spin_lock(&buf->rb_lock); rep = list_first_entry_or_null(&buf->rb_recv_bufs, struct rpcrdma_rep, rr_list); - if (likely(rep)) - list_del(&rep->rr_list); - spin_unlock(&buf->rb_lock); - if (!rep) { - if (!rpcrdma_rep_create(r_xprt, temp)) - break; - continue; - } + if (!rep) + break; - rb = rep->rr_rdmabuf; - if (!rpcrdma_regbuf_dma_map(r_xprt, rb)) { - rpcrdma_recv_buffer_put(rep); + list_del(&rep->rr_list); + rep->rr_recv_wr.next = wr; + wr = &rep->rr_recv_wr; + --needed; + } + spin_unlock(&buf->rb_lock); + + while (needed) { + rep = rpcrdma_rep_create(r_xprt, temp); + if (!rep) break; - } - trace_xprtrdma_post_recv(rep->rr_recv_wr.wr_cqe); rep->rr_recv_wr.next = wr; wr = &rep->rr_recv_wr; - ++count; --needed; } - if (!count) + if (!wr) goto out; + for (i = wr; i; i = i->next) { + rep = container_of(i, struct rpcrdma_rep, rr_recv_wr); + + if (!rpcrdma_regbuf_dma_map(r_xprt, rep->rr_rdmabuf)) + goto release_wrs; + + trace_xprtrdma_post_recv(rep->rr_recv_wr.wr_cqe); + ++count; + } + rc = ib_post_recv(r_xprt->rx_ia.ri_id->qp, wr, (const struct ib_recv_wr **)&bad_wr); +out: + trace_xprtrdma_post_recvs(r_xprt, count, rc); if (rc) { - for (wr = bad_wr; wr; wr = wr->next) { + for (wr = bad_wr; wr;) { struct rpcrdma_rep *rep; rep = container_of(wr, struct rpcrdma_rep, rr_recv_wr); + wr = wr->next; rpcrdma_recv_buffer_put(rep); --count; } } ep->rep_receive_count += count; -out: - trace_xprtrdma_post_recvs(r_xprt, count, rc); + return; + +release_wrs: + for (i = wr; i;) { + rep = container_of(i, struct rpcrdma_rep, rr_recv_wr); + i = i->next; + rpcrdma_recv_buffer_put(rep); + } } |