summaryrefslogtreecommitdiff
path: root/drivers/infiniband/sw/rxe/rxe_resp.c
diff options
context:
space:
mode:
authorBob Pearson <rpearsonhpe@gmail.com>2023-04-04 23:26:11 -0500
committerJason Gunthorpe <jgg@nvidia.com>2023-04-17 16:34:04 -0300
commitf605f26ea196a3b49bea249330cbd18dba61a33e (patch)
treefc7bb3a98ec7549d60e7be5ae6db058ad219bb4a /drivers/infiniband/sw/rxe/rxe_resp.c
parent7b560b89a08d35c23dfc95dc44aee10651c8b9a0 (diff)
downloadlwn-f605f26ea196a3b49bea249330cbd18dba61a33e.tar.gz
lwn-f605f26ea196a3b49bea249330cbd18dba61a33e.zip
RDMA/rxe: Protect QP state with qp->state_lock
Currently the rxe driver makes little effort to make the changes to qp state (which includes qp->attr.qp_state, qp->attr.sq_draining and qp->valid) atomic between different client threads and IO threads. In particular a common template is for an RDMA application to call ib_modify_qp() to move a qp to ERR state and then wait until all the packet and work queues have drained before calling ib_destroy_qp(). None of these state changes are protected by locks to assure that the changes are executed atomically and that memory barriers are included. This has been observed to lead to incorrect behavior around qp cleanup. This patch continues the work of the previous patches in this series and adds locking code around qp state changes and lookups. Link: https://lore.kernel.org/r/20230405042611.6467-5-rpearsonhpe@gmail.com Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Diffstat (limited to 'drivers/infiniband/sw/rxe/rxe_resp.c')
-rw-r--r--drivers/infiniband/sw/rxe/rxe_resp.c12
1 files changed, 10 insertions, 2 deletions
diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c
index 67eac616235c..68f6cd188d8e 100644
--- a/drivers/infiniband/sw/rxe/rxe_resp.c
+++ b/drivers/infiniband/sw/rxe/rxe_resp.c
@@ -1137,8 +1137,13 @@ static enum resp_states do_complete(struct rxe_qp *qp,
return RESPST_ERR_CQ_OVERFLOW;
finish:
- if (unlikely(qp_state(qp) == IB_QPS_ERR))
+ spin_lock_bh(&qp->state_lock);
+ if (unlikely(qp_state(qp) == IB_QPS_ERR)) {
+ spin_unlock_bh(&qp->state_lock);
return RESPST_CHK_RESOURCE;
+ }
+ spin_unlock_bh(&qp->state_lock);
+
if (unlikely(!pkt))
return RESPST_DONE;
if (qp_type(qp) == IB_QPT_RC)
@@ -1464,14 +1469,17 @@ int rxe_responder(struct rxe_qp *qp)
struct rxe_pkt_info *pkt = NULL;
int ret;
+ spin_lock_bh(&qp->state_lock);
if (!qp->valid || qp_state(qp) == IB_QPS_ERR ||
- qp_state(qp) == IB_QPS_RESET) {
+ qp_state(qp) == IB_QPS_RESET) {
bool notify = qp->valid && (qp_state(qp) == IB_QPS_ERR);
drain_req_pkts(qp);
flush_recv_queue(qp, notify);
+ spin_unlock_bh(&qp->state_lock);
goto exit;
}
+ spin_unlock_bh(&qp->state_lock);
qp->resp.aeth_syndrome = AETH_ACK_UNLIMITED;