summaryrefslogtreecommitdiff
path: root/drivers/infiniband/sw
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2026-04-20 11:20:35 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2026-04-20 11:20:35 -0700
commit4b0b946019e7376752456380b67e54eea2f10a7c (patch)
tree813f922af517b8624d6c780f9be0d1d041734e92 /drivers/infiniband/sw
parenta5d1079c28a5bc6caa30ef4099ef04ed17d2c6aa (diff)
parent9091e3b59f2bef11c0a841096327565ae0ca220b (diff)
downloadlwn-4b0b946019e7376752456380b67e54eea2f10a7c.tar.gz
lwn-4b0b946019e7376752456380b67e54eea2f10a7c.zip
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma
Pull rdma updates from Jason Gunthorpe: "The usual collection of driver changes, more core infrastructure updates that typical this cycle: - Minor cleanups and kernel-doc fixes in bnxt_re, hns, rdmavt, efa, ocrdma, erdma, rtrs, hfi1, ionic, and pvrdma - New udata validation framework and driver updates - Modernize CQ creation interface in mlx4 and mlx5, manage CQ umem in core - Promote UMEM to a core component, split out DMA block iterator logic - Introduce FRMR pools with aging, statistics, pinned handles, and netlink control and use it in mlx5 - Add PCIe TLP emulation support in mlx5 - Extend umem to work with revocable pinned dmabuf's and use it in irdma - More net namespace improvements for rxe - GEN4 hardware support in irdma - First steps to MW and UC support in mana_ib - Support for CQ umem and doorbells in bnxt_re - Drop opa_vnic driver from hfi1 Fixes: - IB/core zero dmac neighbor resolution race - GID table memory free - rxe pad/ICRC validation and r_key async errors - mlx4 external umem for CQ - umem DMA attributes on unmap - mana_ib RX steering on RSS QP destroy" * tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma: (116 commits) RDMA/core: Fix user CQ creation for drivers without create_cq RDMA/ionic: bound node_desc sysfs read with %.64s IB/core: Fix zero dmac race in neighbor resolution RDMA/mana_ib: Support memory windows RDMA/rxe: Validate pad and ICRC before payload_size() in rxe_rcv RDMA/core: Prefer NLA_NUL_STRING RDMA/core: Fix memory free for GID table RDMA/hns: Remove the duplicate calls to ib_copy_validate_udata_in() RDMA: Remove redundant = {} for udata req structs RDMA/irdma: Add missing comp_mask check in alloc_ucontext RDMA/hns: Add missing comp_mask check in create_qp RDMA/mlx5: Pull comp_mask validation into ib_copy_validate_udata_in_cm() RDMA: Use ib_copy_validate_udata_in_cm() for zero comp_mask RDMA/hns: Use ib_copy_validate_udata_in() RDMA/mlx4: Use ib_copy_validate_udata_in() for QP RDMA/mlx4: Use ib_copy_validate_udata_in() RDMA/mlx5: Use ib_copy_validate_udata_in() for MW RDMA/mlx5: Use ib_copy_validate_udata_in() for SRQ RDMA/pvrdma: Use ib_copy_validate_udata_in() for srq RDMA: Use ib_copy_validate_udata_in() for implicit full structs ...
Diffstat (limited to 'drivers/infiniband/sw')
-rw-r--r--drivers/infiniband/sw/rdmavt/cq.c4
-rw-r--r--drivers/infiniband/sw/rdmavt/cq.h2
-rw-r--r--drivers/infiniband/sw/rdmavt/mcast.c1
-rw-r--r--drivers/infiniband/sw/rdmavt/mmap.c22
-rw-r--r--drivers/infiniband/sw/rdmavt/qp.c2
-rw-r--r--drivers/infiniband/sw/rdmavt/vt.c10
-rw-r--r--drivers/infiniband/sw/rxe/Makefile3
-rw-r--r--drivers/infiniband/sw/rxe/rxe.c38
-rw-r--r--drivers/infiniband/sw/rxe/rxe.h2
-rw-r--r--drivers/infiniband/sw/rxe/rxe_cq.c31
-rw-r--r--drivers/infiniband/sw/rxe/rxe_loc.h3
-rw-r--r--drivers/infiniband/sw/rxe/rxe_net.c144
-rw-r--r--drivers/infiniband/sw/rxe/rxe_net.h9
-rw-r--r--drivers/infiniband/sw/rxe/rxe_ns.c124
-rw-r--r--drivers/infiniband/sw/rxe/rxe_ns.h26
-rw-r--r--drivers/infiniband/sw/rxe/rxe_odp.c2
-rw-r--r--drivers/infiniband/sw/rxe/rxe_recv.c3
-rw-r--r--drivers/infiniband/sw/rxe/rxe_resp.c56
-rw-r--r--drivers/infiniband/sw/rxe/rxe_task.c2
-rw-r--r--drivers/infiniband/sw/rxe/rxe_verbs.c33
-rw-r--r--drivers/infiniband/sw/rxe/rxe_verbs.h1
-rw-r--r--drivers/infiniband/sw/siw/siw_verbs.c6
22 files changed, 388 insertions, 136 deletions
diff --git a/drivers/infiniband/sw/rdmavt/cq.c b/drivers/infiniband/sw/rdmavt/cq.c
index e7835ca70e2b..30904c6ae852 100644
--- a/drivers/infiniband/sw/rdmavt/cq.c
+++ b/drivers/infiniband/sw/rdmavt/cq.c
@@ -337,7 +337,7 @@ int rvt_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags)
*
* Return: 0 for success.
*/
-int rvt_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
+int rvt_resize_cq(struct ib_cq *ibcq, unsigned int cqe, struct ib_udata *udata)
{
struct rvt_cq *cq = ibcq_to_rvtcq(ibcq);
u32 head, tail, n;
@@ -349,7 +349,7 @@ int rvt_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
struct rvt_k_cq_wc *k_wc = NULL;
struct rvt_k_cq_wc *old_k_wc = NULL;
- if (cqe < 1 || cqe > rdi->dparms.props.max_cqe)
+ if (cqe > rdi->dparms.props.max_cqe)
return -EINVAL;
/*
diff --git a/drivers/infiniband/sw/rdmavt/cq.h b/drivers/infiniband/sw/rdmavt/cq.h
index 4028702a7b2f..82c902c98c8e 100644
--- a/drivers/infiniband/sw/rdmavt/cq.h
+++ b/drivers/infiniband/sw/rdmavt/cq.h
@@ -13,7 +13,7 @@ int rvt_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
struct uverbs_attr_bundle *attrs);
int rvt_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata);
int rvt_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags);
-int rvt_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata);
+int rvt_resize_cq(struct ib_cq *ibcq, unsigned int cqe, struct ib_udata *udata);
int rvt_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry);
int rvt_driver_cq_init(void);
void rvt_cq_exit(void);
diff --git a/drivers/infiniband/sw/rdmavt/mcast.c b/drivers/infiniband/sw/rdmavt/mcast.c
index 1fda344d2056..b41fe4c069dd 100644
--- a/drivers/infiniband/sw/rdmavt/mcast.c
+++ b/drivers/infiniband/sw/rdmavt/mcast.c
@@ -49,7 +49,6 @@ static void rvt_mcast_qp_free(struct rvt_mcast_qp *mqp)
{
struct rvt_qp *qp = mqp->qp;
- /* Notify hfi1_destroy_qp() if it is waiting. */
rvt_put_qp(qp);
kfree(mqp);
diff --git a/drivers/infiniband/sw/rdmavt/mmap.c b/drivers/infiniband/sw/rdmavt/mmap.c
index 46e3b3e0643a..473f464f33fa 100644
--- a/drivers/infiniband/sw/rdmavt/mmap.c
+++ b/drivers/infiniband/sw/rdmavt/mmap.c
@@ -9,6 +9,11 @@
#include <rdma/uverbs_ioctl.h>
#include "mmap.h"
+/* number of reserved mmaps for the driver */
+#define MMAP_RESERVED 256
+/* start point for dynamic offsets */
+#define MMAP_OFFSET_START (MMAP_RESERVED * PAGE_SIZE)
+
/**
* rvt_mmap_init - init link list and lock for mem map
* @rdi: rvt dev struct
@@ -17,7 +22,7 @@ void rvt_mmap_init(struct rvt_dev_info *rdi)
{
INIT_LIST_HEAD(&rdi->pending_mmaps);
spin_lock_init(&rdi->pending_lock);
- rdi->mmap_offset = PAGE_SIZE;
+ rdi->mmap_offset = MMAP_OFFSET_START;
spin_lock_init(&rdi->mmap_offset_lock);
}
@@ -73,6 +78,13 @@ int rvt_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
struct rvt_mmap_info *ip, *pp;
int ret = -EINVAL;
+ /* call driver if in reserved range */
+ if (offset < MMAP_OFFSET_START) {
+ if (rdi->driver_f.mmap)
+ return rdi->driver_f.mmap(context, vma);
+ return -EINVAL;
+ }
+
/*
* Search the device's list of objects waiting for a mmap call.
* Normally, this list is very short since a call to create a
@@ -129,9 +141,9 @@ struct rvt_mmap_info *rvt_create_mmap_info(struct rvt_dev_info *rdi, u32 size,
spin_lock_irq(&rdi->mmap_offset_lock);
if (rdi->mmap_offset == 0)
- rdi->mmap_offset = ALIGN(PAGE_SIZE, SHMLBA);
+ rdi->mmap_offset = MMAP_OFFSET_START;
ip->offset = rdi->mmap_offset;
- rdi->mmap_offset += ALIGN(size, SHMLBA);
+ rdi->mmap_offset += PAGE_SIZE;
spin_unlock_irq(&rdi->mmap_offset_lock);
INIT_LIST_HEAD(&ip->pending_mmaps);
@@ -159,9 +171,9 @@ void rvt_update_mmap_info(struct rvt_dev_info *rdi, struct rvt_mmap_info *ip,
spin_lock_irq(&rdi->mmap_offset_lock);
if (rdi->mmap_offset == 0)
- rdi->mmap_offset = PAGE_SIZE;
+ rdi->mmap_offset = MMAP_OFFSET_START;
ip->offset = rdi->mmap_offset;
- rdi->mmap_offset += size;
+ rdi->mmap_offset += PAGE_SIZE;
spin_unlock_irq(&rdi->mmap_offset_lock);
ip->size = size;
diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c
index 3c7ee7ddc5dd..816624e0991a 100644
--- a/drivers/infiniband/sw/rdmavt/qp.c
+++ b/drivers/infiniband/sw/rdmavt/qp.c
@@ -2705,7 +2705,7 @@ int rvt_qp_iter_next(struct rvt_qp_iter *iter)
struct rvt_ibport *rvp;
int pidx;
- pidx = n % rdi->ibdev.phys_port_cnt;
+ pidx = n / 2; /* QP0 and QP1 */
rvp = rdi->ports[pidx];
qp = rcu_dereference(rvp->qp[n & 1]);
} else {
diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c
index 0c28b412d81a..40aa64208364 100644
--- a/drivers/infiniband/sw/rdmavt/vt.c
+++ b/drivers/infiniband/sw/rdmavt/vt.c
@@ -244,6 +244,10 @@ static int rvt_query_gid(struct ib_device *ibdev, u32 port_num,
*/
static int rvt_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata)
{
+ struct rvt_dev_info *rdi = ib_to_rvt(uctx->device);
+
+ if (rdi->driver_f.alloc_ucontext)
+ return rdi->driver_f.alloc_ucontext(uctx, udata);
return 0;
}
@@ -253,6 +257,10 @@ static int rvt_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata)
*/
static void rvt_dealloc_ucontext(struct ib_ucontext *context)
{
+ struct rvt_dev_info *rdi = ib_to_rvt(context->device);
+
+ if (rdi->driver_f.dealloc_ucontext)
+ rdi->driver_f.dealloc_ucontext(context);
return;
}
@@ -367,7 +375,7 @@ static const struct ib_device_ops rvt_dev_ops = {
.query_srq = rvt_query_srq,
.reg_user_mr = rvt_reg_user_mr,
.req_notify_cq = rvt_req_notify_cq,
- .resize_cq = rvt_resize_cq,
+ .resize_user_cq = rvt_resize_cq,
INIT_RDMA_OBJ_SIZE(ib_ah, rvt_ah, ibah),
INIT_RDMA_OBJ_SIZE(ib_cq, rvt_cq, ibcq),
diff --git a/drivers/infiniband/sw/rxe/Makefile b/drivers/infiniband/sw/rxe/Makefile
index 93134f1d1d0c..3977f4f13258 100644
--- a/drivers/infiniband/sw/rxe/Makefile
+++ b/drivers/infiniband/sw/rxe/Makefile
@@ -22,6 +22,7 @@ rdma_rxe-y := \
rxe_mcast.o \
rxe_task.o \
rxe_net.o \
- rxe_hw_counters.o
+ rxe_hw_counters.o \
+ rxe_ns.o
rdma_rxe-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += rxe_odp.o
diff --git a/drivers/infiniband/sw/rxe/rxe.c b/drivers/infiniband/sw/rxe/rxe.c
index e891199cbdef..b0714f9abe3d 100644
--- a/drivers/infiniband/sw/rxe/rxe.c
+++ b/drivers/infiniband/sw/rxe/rxe.c
@@ -8,6 +8,8 @@
#include <net/addrconf.h>
#include "rxe.h"
#include "rxe_loc.h"
+#include "rxe_net.h"
+#include "rxe_ns.h"
MODULE_AUTHOR("Bob Pearson, Frank Zago, John Groves, Kamal Heib");
MODULE_DESCRIPTION("Soft RDMA transport");
@@ -200,6 +202,8 @@ void rxe_set_mtu(struct rxe_dev *rxe, unsigned int ndev_mtu)
port->mtu_cap = ib_mtu_enum_to_int(mtu);
}
+static struct rdma_link_ops rxe_link_ops;
+
/* called by ifc layer to create new rxe device.
* The caller should allocate memory for rxe by calling ib_alloc_device.
*/
@@ -208,6 +212,7 @@ int rxe_add(struct rxe_dev *rxe, unsigned int mtu, const char *ibdev_name,
{
rxe_init(rxe, ndev);
rxe_set_mtu(rxe, mtu);
+ rxe->ib_dev.link_ops = &rxe_link_ops;
return rxe_register_device(rxe, ibdev_name, ndev);
}
@@ -231,6 +236,10 @@ static int rxe_newlink(const char *ibdev_name, struct net_device *ndev)
goto err;
}
+ err = rxe_net_init(ndev);
+ if (err)
+ return err;
+
err = rxe_net_add(ibdev_name, ndev);
if (err) {
rxe_err("failed to add %s\n", ndev->name);
@@ -240,9 +249,17 @@ err:
return err;
}
+static int rxe_dellink(struct ib_device *dev)
+{
+ rxe_net_del(dev);
+
+ return 0;
+}
+
static struct rdma_link_ops rxe_link_ops = {
.type = "rxe",
.newlink = rxe_newlink,
+ .dellink = rxe_dellink,
};
static int __init rxe_module_init(void)
@@ -253,15 +270,24 @@ static int __init rxe_module_init(void)
if (err)
return err;
- err = rxe_net_init();
- if (err) {
- rxe_destroy_wq();
- return err;
- }
+ err = rxe_namespace_init();
+ if (err)
+ goto err_destroy_wq;
+
+ err = rxe_register_notifier();
+ if (err)
+ goto err_namespace_exit;
rdma_link_register(&rxe_link_ops);
+
pr_info("loaded\n");
return 0;
+
+err_namespace_exit:
+ rxe_namespace_exit();
+err_destroy_wq:
+ rxe_destroy_wq();
+ return err;
}
static void __exit rxe_module_exit(void)
@@ -271,6 +297,8 @@ static void __exit rxe_module_exit(void)
rxe_net_exit();
rxe_destroy_wq();
+ rxe_namespace_exit();
+
pr_info("unloaded\n");
}
diff --git a/drivers/infiniband/sw/rxe/rxe.h b/drivers/infiniband/sw/rxe/rxe.h
index ff8cd53f5f28..c56bae376c7f 100644
--- a/drivers/infiniband/sw/rxe/rxe.h
+++ b/drivers/infiniband/sw/rxe/rxe.h
@@ -121,4 +121,6 @@ void rxe_port_up(struct rxe_dev *rxe);
void rxe_port_down(struct rxe_dev *rxe);
void rxe_set_port_state(struct rxe_dev *rxe);
+extern struct workqueue_struct *rxe_wq;
+
#endif /* RXE_H */
diff --git a/drivers/infiniband/sw/rxe/rxe_cq.c b/drivers/infiniband/sw/rxe/rxe_cq.c
index fffd144d509e..eaf7802a5cbe 100644
--- a/drivers/infiniband/sw/rxe/rxe_cq.c
+++ b/drivers/infiniband/sw/rxe/rxe_cq.c
@@ -8,37 +8,6 @@
#include "rxe_loc.h"
#include "rxe_queue.h"
-int rxe_cq_chk_attr(struct rxe_dev *rxe, struct rxe_cq *cq,
- int cqe, int comp_vector)
-{
- int count;
-
- if (cqe <= 0) {
- rxe_dbg_dev(rxe, "cqe(%d) <= 0\n", cqe);
- goto err1;
- }
-
- if (cqe > rxe->attr.max_cqe) {
- rxe_dbg_dev(rxe, "cqe(%d) > max_cqe(%d)\n",
- cqe, rxe->attr.max_cqe);
- goto err1;
- }
-
- if (cq) {
- count = queue_count(cq->queue, QUEUE_TYPE_TO_CLIENT);
- if (cqe < count) {
- rxe_dbg_cq(cq, "cqe(%d) < current # elements in queue (%d)\n",
- cqe, count);
- goto err1;
- }
- }
-
- return 0;
-
-err1:
- return -EINVAL;
-}
-
int rxe_cq_from_init(struct rxe_dev *rxe, struct rxe_cq *cq, int cqe,
int comp_vector, struct ib_udata *udata,
struct rxe_create_cq_resp __user *uresp)
diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h
index 7992290886e1..e095c12699cb 100644
--- a/drivers/infiniband/sw/rxe/rxe_loc.h
+++ b/drivers/infiniband/sw/rxe/rxe_loc.h
@@ -18,9 +18,6 @@ void rxe_av_fill_ip_info(struct rxe_av *av, struct rdma_ah_attr *attr);
struct rxe_av *rxe_get_av(struct rxe_pkt_info *pkt, struct rxe_ah **ahp);
/* rxe_cq.c */
-int rxe_cq_chk_attr(struct rxe_dev *rxe, struct rxe_cq *cq,
- int cqe, int comp_vector);
-
int rxe_cq_from_init(struct rxe_dev *rxe, struct rxe_cq *cq, int cqe,
int comp_vector, struct ib_udata *udata,
struct rxe_create_cq_resp __user *uresp);
diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c
index cbc646a30003..50a2cb5405e2 100644
--- a/drivers/infiniband/sw/rxe/rxe_net.c
+++ b/drivers/infiniband/sw/rxe/rxe_net.c
@@ -17,8 +17,11 @@
#include "rxe.h"
#include "rxe_net.h"
#include "rxe_loc.h"
+#include "rxe_ns.h"
-static struct rxe_recv_sockets recv_sockets;
+#ifndef SK_REF_FOR_TUNNEL
+#define SK_REF_FOR_TUNNEL 2
+#endif
#ifdef CONFIG_DEBUG_LOCK_ALLOC
/*
@@ -101,20 +104,20 @@ static inline void rxe_reclassify_recv_socket(struct socket *sock)
}
static struct dst_entry *rxe_find_route4(struct rxe_qp *qp,
+ struct net *net,
struct net_device *ndev,
struct in_addr *saddr,
struct in_addr *daddr)
{
struct rtable *rt;
- struct flowi4 fl = { { 0 } };
+ struct flowi4 fl = {};
- memset(&fl, 0, sizeof(fl));
fl.flowi4_oif = ndev->ifindex;
memcpy(&fl.saddr, saddr, sizeof(*saddr));
memcpy(&fl.daddr, daddr, sizeof(*daddr));
fl.flowi4_proto = IPPROTO_UDP;
- rt = ip_route_output_key(&init_net, &fl);
+ rt = ip_route_output_key(net, &fl);
if (IS_ERR(rt)) {
rxe_dbg_qp(qp, "no route to %pI4\n", &daddr->s_addr);
return NULL;
@@ -125,22 +128,20 @@ static struct dst_entry *rxe_find_route4(struct rxe_qp *qp,
#if IS_ENABLED(CONFIG_IPV6)
static struct dst_entry *rxe_find_route6(struct rxe_qp *qp,
+ struct net *net,
struct net_device *ndev,
struct in6_addr *saddr,
struct in6_addr *daddr)
{
struct dst_entry *ndst;
- struct flowi6 fl6 = { { 0 } };
+ struct flowi6 fl6 = {};
- memset(&fl6, 0, sizeof(fl6));
fl6.flowi6_oif = ndev->ifindex;
memcpy(&fl6.saddr, saddr, sizeof(*saddr));
memcpy(&fl6.daddr, daddr, sizeof(*daddr));
fl6.flowi6_proto = IPPROTO_UDP;
- ndst = ip6_dst_lookup_flow(sock_net(recv_sockets.sk6->sk),
- recv_sockets.sk6->sk, &fl6,
- NULL);
+ ndst = ip6_dst_lookup_flow(net, rxe_ns_pernet_sk6(net), &fl6, NULL);
if (IS_ERR(ndst)) {
rxe_dbg_qp(qp, "no route to %pI6\n", daddr);
return NULL;
@@ -160,6 +161,7 @@ put:
#else
static struct dst_entry *rxe_find_route6(struct rxe_qp *qp,
+ struct net *net,
struct net_device *ndev,
struct in6_addr *saddr,
struct in6_addr *daddr)
@@ -174,6 +176,7 @@ static struct dst_entry *rxe_find_route(struct net_device *ndev,
struct rxe_av *av)
{
struct dst_entry *dst = NULL;
+ struct net *net;
if (qp_type(qp) == IB_QPT_RC)
dst = sk_dst_get(qp->sk->sk);
@@ -182,20 +185,22 @@ static struct dst_entry *rxe_find_route(struct net_device *ndev,
if (dst)
dst_release(dst);
+ net = dev_net(ndev);
+
if (av->network_type == RXE_NETWORK_TYPE_IPV4) {
struct in_addr *saddr;
struct in_addr *daddr;
saddr = &av->sgid_addr._sockaddr_in.sin_addr;
daddr = &av->dgid_addr._sockaddr_in.sin_addr;
- dst = rxe_find_route4(qp, ndev, saddr, daddr);
+ dst = rxe_find_route4(qp, net, ndev, saddr, daddr);
} else if (av->network_type == RXE_NETWORK_TYPE_IPV6) {
struct in6_addr *saddr6;
struct in6_addr *daddr6;
saddr6 = &av->sgid_addr._sockaddr_in6.sin6_addr;
daddr6 = &av->dgid_addr._sockaddr_in6.sin6_addr;
- dst = rxe_find_route6(qp, ndev, saddr6, daddr6);
+ dst = rxe_find_route6(qp, net, ndev, saddr6, daddr6);
#if IS_ENABLED(CONFIG_IPV6)
if (dst)
qp->dst_cookie =
@@ -624,6 +629,43 @@ int rxe_net_add(const char *ibdev_name, struct net_device *ndev)
return 0;
}
+static void rxe_sock_put(struct sock *sk,
+ void (*set_sk)(struct net *, struct sock *),
+ struct net *net)
+{
+ if (refcount_read(&sk->sk_refcnt) > SK_REF_FOR_TUNNEL) {
+ __sock_put(sk);
+ } else {
+ rxe_release_udp_tunnel(sk->sk_socket);
+ sk = NULL;
+ set_sk(net, sk);
+ }
+}
+
+void rxe_net_del(struct ib_device *dev)
+{
+ struct rxe_dev *rxe = container_of(dev, struct rxe_dev, ib_dev);
+ struct net_device *ndev;
+ struct sock *sk;
+ struct net *net;
+
+ ndev = rxe_ib_device_get_netdev(&rxe->ib_dev);
+ if (!ndev)
+ return;
+
+ net = dev_net(ndev);
+
+ sk = rxe_ns_pernet_sk4(net);
+ if (sk)
+ rxe_sock_put(sk, rxe_ns_pernet_set_sk4, net);
+
+ sk = rxe_ns_pernet_sk6(net);
+ if (sk)
+ rxe_sock_put(sk, rxe_ns_pernet_set_sk6, net);
+
+ dev_put(ndev);
+}
+
static void rxe_port_event(struct rxe_dev *rxe,
enum ib_event_type event)
{
@@ -680,6 +722,7 @@ static int rxe_notify(struct notifier_block *not_blk,
switch (event) {
case NETDEV_UNREGISTER:
ib_unregister_device_queued(&rxe->ib_dev);
+ rxe_net_del(&rxe->ib_dev);
break;
case NETDEV_CHANGEMTU:
rxe_dbg_dev(rxe, "%s changed mtu to %d\n", ndev->name, ndev->mtu);
@@ -709,66 +752,97 @@ static struct notifier_block rxe_net_notifier = {
.notifier_call = rxe_notify,
};
-static int rxe_net_ipv4_init(void)
+static int rxe_net_ipv4_init(struct net *net)
{
- recv_sockets.sk4 = rxe_setup_udp_tunnel(&init_net,
- htons(ROCE_V2_UDP_DPORT), false);
- if (IS_ERR(recv_sockets.sk4)) {
- recv_sockets.sk4 = NULL;
+ struct sock *sk;
+ struct socket *sock;
+
+ sk = rxe_ns_pernet_sk4(net);
+ if (sk) {
+ sock_hold(sk);
+ return 0;
+ }
+
+ sock = rxe_setup_udp_tunnel(net, htons(ROCE_V2_UDP_DPORT), false);
+ if (IS_ERR(sock)) {
pr_err("Failed to create IPv4 UDP tunnel\n");
return -1;
}
+ rxe_ns_pernet_set_sk4(net, sock->sk);
return 0;
}
-static int rxe_net_ipv6_init(void)
+static int rxe_net_ipv6_init(struct net *net)
{
#if IS_ENABLED(CONFIG_IPV6)
+ struct sock *sk;
+ struct socket *sock;
- recv_sockets.sk6 = rxe_setup_udp_tunnel(&init_net,
- htons(ROCE_V2_UDP_DPORT), true);
- if (PTR_ERR(recv_sockets.sk6) == -EAFNOSUPPORT) {
- recv_sockets.sk6 = NULL;
+ sk = rxe_ns_pernet_sk6(net);
+ if (sk) {
+ sock_hold(sk);
+ return 0;
+ }
+
+ sock = rxe_setup_udp_tunnel(net, htons(ROCE_V2_UDP_DPORT), true);
+ if (PTR_ERR(sock) == -EAFNOSUPPORT) {
pr_warn("IPv6 is not supported, can not create a UDPv6 socket\n");
return 0;
}
- if (IS_ERR(recv_sockets.sk6)) {
- recv_sockets.sk6 = NULL;
+ if (IS_ERR(sock)) {
pr_err("Failed to create IPv6 UDP tunnel\n");
return -1;
}
+
+ rxe_ns_pernet_set_sk6(net, sock->sk);
+
#endif
return 0;
}
+int rxe_register_notifier(void)
+{
+ int err;
+
+ err = register_netdevice_notifier(&rxe_net_notifier);
+ if (err) {
+ pr_err("Failed to register netdev notifier\n");
+ return -1;
+ }
+
+ return 0;
+}
+
void rxe_net_exit(void)
{
- rxe_release_udp_tunnel(recv_sockets.sk6);
- rxe_release_udp_tunnel(recv_sockets.sk4);
unregister_netdevice_notifier(&rxe_net_notifier);
}
-int rxe_net_init(void)
+int rxe_net_init(struct net_device *ndev)
{
+ struct net *net;
+ struct sock *sk;
int err;
- recv_sockets.sk6 = NULL;
+ net = dev_net(ndev);
- err = rxe_net_ipv4_init();
+ err = rxe_net_ipv4_init(net);
if (err)
return err;
- err = rxe_net_ipv6_init();
+
+ err = rxe_net_ipv6_init(net);
if (err)
goto err_out;
- err = register_netdevice_notifier(&rxe_net_notifier);
- if (err) {
- pr_err("Failed to register netdev notifier\n");
- goto err_out;
- }
+
return 0;
+
err_out:
- rxe_net_exit();
+ /* If ipv6 error, release ipv4 resource */
+ sk = rxe_ns_pernet_sk4(net);
+ if (sk)
+ rxe_sock_put(sk, rxe_ns_pernet_set_sk4, net);
+
return err;
}
diff --git a/drivers/infiniband/sw/rxe/rxe_net.h b/drivers/infiniband/sw/rxe/rxe_net.h
index 45d80d00f86b..56249677d692 100644
--- a/drivers/infiniband/sw/rxe/rxe_net.h
+++ b/drivers/infiniband/sw/rxe/rxe_net.h
@@ -11,14 +11,11 @@
#include <net/if_inet6.h>
#include <linux/module.h>
-struct rxe_recv_sockets {
- struct socket *sk4;
- struct socket *sk6;
-};
-
int rxe_net_add(const char *ibdev_name, struct net_device *ndev);
+void rxe_net_del(struct ib_device *dev);
-int rxe_net_init(void);
+int rxe_register_notifier(void);
+int rxe_net_init(struct net_device *ndev);
void rxe_net_exit(void);
#endif /* RXE_NET_H */
diff --git a/drivers/infiniband/sw/rxe/rxe_ns.c b/drivers/infiniband/sw/rxe/rxe_ns.c
new file mode 100644
index 000000000000..8b9d734229b2
--- /dev/null
+++ b/drivers/infiniband/sw/rxe/rxe_ns.c
@@ -0,0 +1,124 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+
+#include <net/sock.h>
+#include <net/netns/generic.h>
+#include <net/net_namespace.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/pid_namespace.h>
+#include <net/udp_tunnel.h>
+
+#include "rxe_ns.h"
+
+/*
+ * Per network namespace data
+ */
+struct rxe_ns_sock {
+ struct sock __rcu *rxe_sk4;
+ struct sock __rcu *rxe_sk6;
+};
+
+/*
+ * Index to store custom data for each network namespace.
+ */
+static unsigned int rxe_pernet_id;
+
+/*
+ * Called for every existing and added network namespaces
+ */
+static int rxe_ns_init(struct net *net)
+{
+ /* defer socket create in the namespace to the first
+ * device create.
+ */
+
+ return 0;
+}
+
+static void rxe_ns_exit(struct net *net)
+{
+ /* called when the network namespace is removed
+ */
+ struct rxe_ns_sock *ns_sk = net_generic(net, rxe_pernet_id);
+ struct sock *sk;
+
+ rcu_read_lock();
+ sk = rcu_dereference(ns_sk->rxe_sk4);
+ rcu_read_unlock();
+ if (sk) {
+ rcu_assign_pointer(ns_sk->rxe_sk4, NULL);
+ udp_tunnel_sock_release(sk->sk_socket);
+ }
+
+#if IS_ENABLED(CONFIG_IPV6)
+ rcu_read_lock();
+ sk = rcu_dereference(ns_sk->rxe_sk6);
+ rcu_read_unlock();
+ if (sk) {
+ rcu_assign_pointer(ns_sk->rxe_sk6, NULL);
+ udp_tunnel_sock_release(sk->sk_socket);
+ }
+#endif
+}
+
+/*
+ * callback to make the module network namespace aware
+ */
+static struct pernet_operations rxe_net_ops = {
+ .init = rxe_ns_init,
+ .exit = rxe_ns_exit,
+ .id = &rxe_pernet_id,
+ .size = sizeof(struct rxe_ns_sock),
+};
+
+struct sock *rxe_ns_pernet_sk4(struct net *net)
+{
+ struct rxe_ns_sock *ns_sk = net_generic(net, rxe_pernet_id);
+ struct sock *sk;
+
+ rcu_read_lock();
+ sk = rcu_dereference(ns_sk->rxe_sk4);
+ rcu_read_unlock();
+
+ return sk;
+}
+
+void rxe_ns_pernet_set_sk4(struct net *net, struct sock *sk)
+{
+ struct rxe_ns_sock *ns_sk = net_generic(net, rxe_pernet_id);
+
+ rcu_assign_pointer(ns_sk->rxe_sk4, sk);
+ synchronize_rcu();
+}
+
+#if IS_ENABLED(CONFIG_IPV6)
+struct sock *rxe_ns_pernet_sk6(struct net *net)
+{
+ struct rxe_ns_sock *ns_sk = net_generic(net, rxe_pernet_id);
+ struct sock *sk;
+
+ rcu_read_lock();
+ sk = rcu_dereference(ns_sk->rxe_sk6);
+ rcu_read_unlock();
+
+ return sk;
+}
+
+void rxe_ns_pernet_set_sk6(struct net *net, struct sock *sk)
+{
+ struct rxe_ns_sock *ns_sk = net_generic(net, rxe_pernet_id);
+
+ rcu_assign_pointer(ns_sk->rxe_sk6, sk);
+ synchronize_rcu();
+}
+#endif /* IPV6 */
+
+int rxe_namespace_init(void)
+{
+ return register_pernet_subsys(&rxe_net_ops);
+}
+
+void rxe_namespace_exit(void)
+{
+ unregister_pernet_subsys(&rxe_net_ops);
+}
diff --git a/drivers/infiniband/sw/rxe/rxe_ns.h b/drivers/infiniband/sw/rxe/rxe_ns.h
new file mode 100644
index 000000000000..4da2709e6b71
--- /dev/null
+++ b/drivers/infiniband/sw/rxe/rxe_ns.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+
+#ifndef RXE_NS_H
+#define RXE_NS_H
+
+struct sock *rxe_ns_pernet_sk4(struct net *net);
+void rxe_ns_pernet_set_sk4(struct net *net, struct sock *sk);
+
+#if IS_ENABLED(CONFIG_IPV6)
+void rxe_ns_pernet_set_sk6(struct net *net, struct sock *sk);
+struct sock *rxe_ns_pernet_sk6(struct net *net);
+#else /* IPv6 */
+static inline struct sock *rxe_ns_pernet_sk6(struct net *net)
+{
+ return NULL;
+}
+
+static inline void rxe_ns_pernet_set_sk6(struct net *net, struct sock *sk)
+{
+}
+#endif /* IPv6 */
+
+int rxe_namespace_init(void);
+void rxe_namespace_exit(void);
+
+#endif /* RXE_NS_H */
diff --git a/drivers/infiniband/sw/rxe/rxe_odp.c b/drivers/infiniband/sw/rxe/rxe_odp.c
index bc11b1ec59ac..ff904d5e54a7 100644
--- a/drivers/infiniband/sw/rxe/rxe_odp.c
+++ b/drivers/infiniband/sw/rxe/rxe_odp.c
@@ -545,7 +545,7 @@ static int rxe_ib_advise_mr_prefetch(struct ib_pd *ibpd,
work->frags[i].mr = mr;
}
- queue_work(system_unbound_wq, &work->work);
+ queue_work(rxe_wq, &work->work);
return 0;
diff --git a/drivers/infiniband/sw/rxe/rxe_recv.c b/drivers/infiniband/sw/rxe/rxe_recv.c
index 5861e4244049..f79214738c2b 100644
--- a/drivers/infiniband/sw/rxe/rxe_recv.c
+++ b/drivers/infiniband/sw/rxe/rxe_recv.c
@@ -330,7 +330,8 @@ void rxe_rcv(struct sk_buff *skb)
pkt->qp = NULL;
pkt->mask |= rxe_opcode[pkt->opcode].mask;
- if (unlikely(skb->len < header_size(pkt)))
+ if (unlikely(pkt->paylen < header_size(pkt) + bth_pad(pkt) +
+ RXE_ICRC_SIZE))
goto drop;
err = hdr_check(pkt);
diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c
index 711f73e0bbb1..9faf8c09aa8e 100644
--- a/drivers/infiniband/sw/rxe/rxe_resp.c
+++ b/drivers/infiniband/sw/rxe/rxe_resp.c
@@ -37,6 +37,7 @@ static char *resp_state_name[] = {
[RESPST_ERR_MISSING_OPCODE_LAST_D1E] = "ERR_MISSING_OPCODE_LAST_D1E",
[RESPST_ERR_TOO_MANY_RDMA_ATM_REQ] = "ERR_TOO_MANY_RDMA_ATM_REQ",
[RESPST_ERR_RNR] = "ERR_RNR",
+ [RESPST_ERR_RKEY_VIOLATION_EVENT] = "ERR_RKEY_VIOLATION_EVENT",
[RESPST_ERR_RKEY_VIOLATION] = "ERR_RKEY_VIOLATION",
[RESPST_ERR_INVALIDATE_RKEY] = "ERR_INVALIDATE_RKEY_VIOLATION",
[RESPST_ERR_LENGTH] = "ERR_LENGTH",
@@ -423,6 +424,19 @@ static void qp_resp_from_atmeth(struct rxe_qp *qp, struct rxe_pkt_info *pkt)
qp->resp.resid = sizeof(u64);
}
+/* Transition to an rkey violation state. C9-222.1 requires an async event
+ * at the responder, but only if the error cannot be attached to an RX WQE.
+ * WRITE_WITH_IMM is the only op that might have that more precise RX WQE
+ * to pin the error on.
+ */
+static enum resp_states get_rkey_violation_state(struct rxe_pkt_info *pkt)
+{
+ if (pkt->mask & RXE_IMMDT_MASK)
+ return RESPST_ERR_RKEY_VIOLATION;
+
+ return RESPST_ERR_RKEY_VIOLATION_EVENT;
+}
+
/* resolve the packet rkey to qp->resp.mr or set qp->resp.mr to NULL
* if an invalid rkey is received or the rdma length is zero. For middle
* or last packets use the stored value of mr.
@@ -486,14 +500,14 @@ static enum resp_states check_rkey(struct rxe_qp *qp,
mw = rxe_lookup_mw(qp, access, rkey);
if (!mw) {
rxe_dbg_qp(qp, "no MW matches rkey %#x\n", rkey);
- state = RESPST_ERR_RKEY_VIOLATION;
+ state = get_rkey_violation_state(pkt);
goto err;
}
mr = mw->mr;
if (!mr) {
rxe_dbg_qp(qp, "MW doesn't have an MR\n");
- state = RESPST_ERR_RKEY_VIOLATION;
+ state = get_rkey_violation_state(pkt);
goto err;
}
@@ -507,7 +521,7 @@ static enum resp_states check_rkey(struct rxe_qp *qp,
mr = lookup_mr(qp->pd, access, rkey, RXE_LOOKUP_REMOTE);
if (!mr) {
rxe_dbg_qp(qp, "no MR matches rkey %#x\n", rkey);
- state = RESPST_ERR_RKEY_VIOLATION;
+ state = get_rkey_violation_state(pkt);
goto err;
}
}
@@ -521,7 +535,7 @@ static enum resp_states check_rkey(struct rxe_qp *qp,
}
if (mr_check_range(mr, va + qp->resp.offset, resid)) {
- state = RESPST_ERR_RKEY_VIOLATION;
+ state = get_rkey_violation_state(pkt);
goto err;
}
@@ -586,7 +600,7 @@ static enum resp_states write_data_in(struct rxe_qp *qp,
err = rxe_mr_copy(qp->resp.mr, qp->resp.va + qp->resp.offset,
payload_addr(pkt), data_len, RXE_TO_MR_OBJ);
if (err) {
- rc = RESPST_ERR_RKEY_VIOLATION;
+ rc = get_rkey_violation_state(pkt);
goto out;
}
@@ -667,7 +681,7 @@ static enum resp_states process_flush(struct rxe_qp *qp,
if (res->flush.type & IB_FLUSH_PERSISTENT) {
if (rxe_flush_pmem_iova(mr, start, length))
- return RESPST_ERR_RKEY_VIOLATION;
+ return get_rkey_violation_state(pkt);
/* Make data persistent. */
wmb();
} else if (res->flush.type & IB_FLUSH_GLOBAL) {
@@ -1383,6 +1397,20 @@ out:
return rc;
}
+static void do_qp_event(struct rxe_qp *qp, enum ib_event_type etype)
+{
+ struct ib_event event;
+ struct ib_qp *ibqp = &qp->ibqp;
+
+ event.event = etype;
+ event.device = ibqp->device;
+ event.element.qp = ibqp;
+ if (ibqp->event_handler) {
+ rxe_dbg_qp(qp, "reporting QP event %d\n", etype);
+ ibqp->event_handler(&event, ibqp->qp_context);
+ }
+}
+
/* Process a class A or C. Both are treated the same in this implementation. */
static void do_class_ac_error(struct rxe_qp *qp, u8 syndrome,
enum ib_wc_status status)
@@ -1476,14 +1504,9 @@ static void flush_recv_queue(struct rxe_qp *qp, bool notify)
int err;
if (qp->srq) {
- if (notify && qp->ibqp.event_handler) {
- struct ib_event ev;
+ if (notify && qp->ibqp.event_handler)
+ do_qp_event(qp, IB_EVENT_QP_LAST_WQE_REACHED);
- ev.device = qp->ibqp.device;
- ev.element.qp = &qp->ibqp;
- ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
- qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
- }
return;
}
@@ -1613,6 +1636,13 @@ int rxe_receiver(struct rxe_qp *qp)
state = RESPST_CLEANUP;
break;
+ case RESPST_ERR_RKEY_VIOLATION_EVENT:
+ if (qp_type(qp) == IB_QPT_RC)
+ do_qp_event(qp, IB_EVENT_QP_ACCESS_ERR);
+
+ state = RESPST_ERR_RKEY_VIOLATION;
+ break;
+
case RESPST_ERR_RKEY_VIOLATION:
if (qp_type(qp) == IB_QPT_RC) {
/* Class C */
diff --git a/drivers/infiniband/sw/rxe/rxe_task.c b/drivers/infiniband/sw/rxe/rxe_task.c
index f522820b950c..801d06c969c9 100644
--- a/drivers/infiniband/sw/rxe/rxe_task.c
+++ b/drivers/infiniband/sw/rxe/rxe_task.c
@@ -6,7 +6,7 @@
#include "rxe.h"
-static struct workqueue_struct *rxe_wq;
+struct workqueue_struct *rxe_wq;
int rxe_alloc_wq(void)
{
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c
index fe41362c5144..4d4891dc2884 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.c
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.c
@@ -452,18 +452,9 @@ static int rxe_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
int err;
if (udata) {
- if (udata->inlen < sizeof(cmd)) {
- err = -EINVAL;
- rxe_dbg_srq(srq, "malformed udata\n");
- goto err_out;
- }
-
- err = ib_copy_from_udata(&cmd, udata, sizeof(cmd));
- if (err) {
- err = -EFAULT;
- rxe_dbg_srq(srq, "unable to read udata\n");
+ err = ib_copy_validate_udata_in(udata, cmd, mmap_info_addr);
+ if (err)
goto err_out;
- }
}
err = rxe_srq_chk_attr(rxe, srq, attr, mask);
@@ -1097,11 +1088,8 @@ static int rxe_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
goto err_out;
}
- err = rxe_cq_chk_attr(rxe, NULL, attr->cqe, attr->comp_vector);
- if (err) {
- rxe_dbg_dev(rxe, "bad init attributes, err = %d\n", err);
- goto err_out;
- }
+ if (attr->cqe > rxe->attr.max_cqe)
+ return -EINVAL;
err = rxe_add_to_pool(&rxe->cq_pool, cq);
if (err) {
@@ -1127,7 +1115,8 @@ err_out:
return err;
}
-static int rxe_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
+static int rxe_resize_cq(struct ib_cq *ibcq, unsigned int cqe,
+ struct ib_udata *udata)
{
struct rxe_cq *cq = to_rcq(ibcq);
struct rxe_dev *rxe = to_rdev(ibcq->device);
@@ -1143,11 +1132,9 @@ static int rxe_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
uresp = udata->outbuf;
}
- err = rxe_cq_chk_attr(rxe, cq, cqe, 0);
- if (err) {
- rxe_dbg_cq(cq, "bad attr, err = %d\n", err);
- goto err_out;
- }
+ if (cqe > rxe->attr.max_cqe ||
+ cqe < queue_count(cq->queue, QUEUE_TYPE_TO_CLIENT))
+ return -EINVAL;
err = rxe_cq_resize_queue(cq, cqe, uresp, udata);
if (err) {
@@ -1519,7 +1506,7 @@ static const struct ib_device_ops rxe_dev_ops = {
.reg_user_mr = rxe_reg_user_mr,
.req_notify_cq = rxe_req_notify_cq,
.rereg_user_mr = rxe_rereg_user_mr,
- .resize_cq = rxe_resize_cq,
+ .resize_user_cq = rxe_resize_cq,
INIT_RDMA_OBJ_SIZE(ib_ah, rxe_ah, ibah),
INIT_RDMA_OBJ_SIZE(ib_cq, rxe_cq, ibcq),
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h
index fb149f37e91d..d92f80d16f78 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.h
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.h
@@ -154,6 +154,7 @@ enum resp_states {
RESPST_ERR_MISSING_OPCODE_LAST_D1E,
RESPST_ERR_TOO_MANY_RDMA_ATM_REQ,
RESPST_ERR_RNR,
+ RESPST_ERR_RKEY_VIOLATION_EVENT,
RESPST_ERR_RKEY_VIOLATION,
RESPST_ERR_INVALIDATE_RKEY,
RESPST_ERR_LENGTH,
diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/infiniband/sw/siw/siw_verbs.c
index ef504db8f2b4..1e1d262a4ae2 100644
--- a/drivers/infiniband/sw/siw/siw_verbs.c
+++ b/drivers/infiniband/sw/siw/siw_verbs.c
@@ -1373,11 +1373,7 @@ struct ib_mr *siw_reg_user_mr(struct ib_pd *pd, u64 start, u64 len,
struct siw_uresp_reg_mr uresp = {};
struct siw_mem *mem = mr->mem;
- if (udata->inlen < sizeof(ureq)) {
- rv = -EINVAL;
- goto err_out;
- }
- rv = ib_copy_from_udata(&ureq, udata, sizeof(ureq));
+ rv = ib_copy_validate_udata_in(udata, ureq, pad);
if (rv)
goto err_out;