diff options
author | Max Gurtovoy <maxg@mellanox.com> | 2019-06-11 18:52:56 +0300 |
---|---|---|
committer | Jason Gunthorpe <jgg@mellanox.com> | 2019-06-24 11:49:28 -0300 |
commit | 2563e2f30acb4c914fc475331e476fa920eb4245 (patch) | |
tree | 8bf0e8f9fc409fa7e0f20859b581ca3ce08919f2 /drivers/infiniband/hw/mlx5/mr.c | |
parent | de0ae958deb5e6af35c4c6a4679d4fe9896a98ca (diff) | |
download | lwn-2563e2f30acb4c914fc475331e476fa920eb4245.tar.gz lwn-2563e2f30acb4c914fc475331e476fa920eb4245.zip |
RDMA/mlx5: Use PA mapping for PI handover
If possibe, avoid doing a UMR operation to register data and protection
buffers (via MTT/KLM mkeys). Instead, use the local DMA key and map the
SG lists using PA access. This is safe, since the internal key for data
and protection never exposed to the remote server (only signature key
might be exposed). If PA mappings are not possible, perform mapping
using MTT/KLM descriptors.
The setup of the tested benchmark (using iSER ULP):
- 2 servers with 24 cores (1 initiator and 1 target)
- ConnectX-4/ConnectX-5 adapters
- 24 target sessions with 1 LUN each
- ramdisk backstore
- PI active
Performance results running fio (24 jobs, 128 iodepth) using
write_generate=1 and read_verify=1 (w/w.o patch):
bs IOPS(read) IOPS(write)
---- ---------- ----------
512 1266.4K/1262.4K 1720.1K/1732.1K
4k 793139/570902 1129.6K/773982
32k 72660/72086 97229/96164
Using write_generate=0 and read_verify=0 (w/w.o patch):
bs IOPS(read) IOPS(write)
---- ---------- ----------
512 1590.2K/1600.1K 1828.2K/1830.3K
4k 1078.1K/937272 1142.1K/815304
32k 77012/77369 98125/97435
Signed-off-by: Max Gurtovoy <maxg@mellanox.com>
Signed-off-by: Israel Rukshin <israelr@mellanox.com>
Suggested-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Diffstat (limited to 'drivers/infiniband/hw/mlx5/mr.c')
-rw-r--r-- | drivers/infiniband/hw/mlx5/mr.c | 63 |
1 files changed, 59 insertions, 4 deletions
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 8b40abd0070b..f2ef89e48afa 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -2002,6 +2002,40 @@ done: } static int +mlx5_ib_map_pa_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg, + int data_sg_nents, unsigned int *data_sg_offset, + struct scatterlist *meta_sg, int meta_sg_nents, + unsigned int *meta_sg_offset) +{ + struct mlx5_ib_mr *mr = to_mmr(ibmr); + unsigned int sg_offset = 0; + int n = 0; + + mr->meta_length = 0; + if (data_sg_nents == 1) { + n++; + mr->ndescs = 1; + if (data_sg_offset) + sg_offset = *data_sg_offset; + mr->data_length = sg_dma_len(data_sg) - sg_offset; + mr->data_iova = sg_dma_address(data_sg) + sg_offset; + if (meta_sg_nents == 1) { + n++; + mr->meta_ndescs = 1; + if (meta_sg_offset) + sg_offset = *meta_sg_offset; + else + sg_offset = 0; + mr->meta_length = sg_dma_len(meta_sg) - sg_offset; + mr->pi_iova = sg_dma_address(meta_sg) + sg_offset; + } + ibmr->length = mr->data_length + mr->meta_length; + } + + return n; +} + +static int mlx5_ib_sg_to_klms(struct mlx5_ib_mr *mr, struct scatterlist *sgl, unsigned short sg_nents, @@ -2099,7 +2133,6 @@ mlx5_ib_map_mtt_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg, struct mlx5_ib_mr *mr = to_mmr(ibmr); struct mlx5_ib_mr *pi_mr = mr->mtt_mr; int n; - u64 iova; pi_mr->ndescs = 0; pi_mr->meta_ndescs = 0; @@ -2115,13 +2148,14 @@ mlx5_ib_map_mtt_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg, if (n != data_sg_nents) return n; - iova = pi_mr->ibmr.iova; + pi_mr->data_iova = pi_mr->ibmr.iova; pi_mr->data_length = pi_mr->ibmr.length; pi_mr->ibmr.length = pi_mr->data_length; ibmr->length = pi_mr->data_length; if (meta_sg_nents) { u64 page_mask = ~((u64)ibmr->page_size - 1); + u64 iova = pi_mr->data_iova; n += ib_sg_to_pages(&pi_mr->ibmr, meta_sg, meta_sg_nents, meta_sg_offset, mlx5_set_page_pi); @@ -2181,6 +2215,7 @@ mlx5_ib_map_klm_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg, DMA_TO_DEVICE); /* This is zero-based memory region */ + pi_mr->data_iova = 0; pi_mr->ibmr.iova = 0; pi_mr->pi_iova = pi_mr->data_length; ibmr->length = pi_mr->ibmr.length; @@ -2194,11 +2229,27 @@ int mlx5_ib_map_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg, unsigned int *meta_sg_offset) { struct mlx5_ib_mr *mr = to_mmr(ibmr); - struct mlx5_ib_mr *pi_mr = mr->mtt_mr; + struct mlx5_ib_mr *pi_mr = NULL; int n; WARN_ON(ibmr->type != IB_MR_TYPE_INTEGRITY); + mr->ndescs = 0; + mr->data_length = 0; + mr->data_iova = 0; + mr->meta_ndescs = 0; + mr->pi_iova = 0; + /* + * As a performance optimization, if possible, there is no need to + * perform UMR operation to register the data/metadata buffers. + * First try to map the sg lists to PA descriptors with local_dma_lkey. + * Fallback to UMR only in case of a failure. + */ + n = mlx5_ib_map_pa_mr_sg_pi(ibmr, data_sg, data_sg_nents, + data_sg_offset, meta_sg, meta_sg_nents, + meta_sg_offset); + if (n == data_sg_nents + meta_sg_nents) + goto out; /* * As a performance optimization, if possible, there is no need to map * the sg lists to KLM descriptors. First try to map the sg lists to MTT @@ -2207,6 +2258,7 @@ int mlx5_ib_map_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg, * (especially in high load). * Use KLM (indirect access) only if it's mandatory. */ + pi_mr = mr->mtt_mr; n = mlx5_ib_map_mtt_mr_sg_pi(ibmr, data_sg, data_sg_nents, data_sg_offset, meta_sg, meta_sg_nents, meta_sg_offset); @@ -2224,7 +2276,10 @@ out: /* This is zero-based memory region */ ibmr->iova = 0; mr->pi_mr = pi_mr; - ibmr->sig_attrs->meta_length = pi_mr->meta_length; + if (pi_mr) + ibmr->sig_attrs->meta_length = pi_mr->meta_length; + else + ibmr->sig_attrs->meta_length = mr->meta_length; return 0; } |