summaryrefslogtreecommitdiff
path: root/include/rdma
diff options
context:
space:
mode:
Diffstat (limited to 'include/rdma')
-rw-r--r--include/rdma/frmr_pools.h39
-rw-r--r--include/rdma/ib_cache.h20
-rw-r--r--include/rdma/ib_cm.h19
-rw-r--r--include/rdma/ib_mad.h1
-rw-r--r--include/rdma/ib_marshall.h3
-rw-r--r--include/rdma/ib_pack.h3
-rw-r--r--include/rdma/ib_sa.h37
-rw-r--r--include/rdma/ib_ucaps.h30
-rw-r--r--include/rdma/ib_umem.h91
-rw-r--r--include/rdma/ib_umem_odp.h25
-rw-r--r--include/rdma/ib_verbs.h496
-rw-r--r--include/rdma/iter.h88
-rw-r--r--include/rdma/iw_cm.h14
-rw-r--r--include/rdma/opa_port_info.h8
-rw-r--r--include/rdma/opa_vnic.h96
-rw-r--r--include/rdma/rdma_cm.h39
-rw-r--r--include/rdma/rdma_counter.h7
-rw-r--r--include/rdma/rdma_netlink.h2
-rw-r--r--include/rdma/rdma_vt.h10
-rw-r--r--include/rdma/rdmavt_qp.h70
-rw-r--r--include/rdma/restrack.h8
-rw-r--r--include/rdma/rw.h22
-rw-r--r--include/rdma/uverbs_ioctl.h101
-rw-r--r--include/rdma/uverbs_std_types.h2
-rw-r--r--include/rdma/uverbs_types.h1
25 files changed, 834 insertions, 398 deletions
diff --git a/include/rdma/frmr_pools.h b/include/rdma/frmr_pools.h
new file mode 100644
index 000000000000..af1b88801fa4
--- /dev/null
+++ b/include/rdma/frmr_pools.h
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+ *
+ * Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ */
+
+#ifndef FRMR_POOLS_H
+#define FRMR_POOLS_H
+
+#include <linux/types.h>
+#include <asm/page.h>
+
+struct ib_device;
+struct ib_mr;
+
+struct ib_frmr_key {
+ u64 vendor_key;
+ /* A pool with non-zero kernel_vendor_key is a kernel-only pool. */
+ u64 kernel_vendor_key;
+ size_t num_dma_blocks;
+ int access_flags;
+ u8 ats:1;
+};
+
+struct ib_frmr_pool_ops {
+ int (*create_frmrs)(struct ib_device *device, struct ib_frmr_key *key,
+ u32 *handles, u32 count);
+ void (*destroy_frmrs)(struct ib_device *device, u32 *handles,
+ u32 count);
+ int (*build_key)(struct ib_device *device, const struct ib_frmr_key *in,
+ struct ib_frmr_key *out);
+};
+
+int ib_frmr_pools_init(struct ib_device *device,
+ const struct ib_frmr_pool_ops *pool_ops);
+void ib_frmr_pools_cleanup(struct ib_device *device);
+int ib_frmr_pool_pop(struct ib_device *device, struct ib_mr *mr);
+int ib_frmr_pool_push(struct ib_device *device, struct ib_mr *mr);
+
+#endif /* FRMR_POOLS_H */
diff --git a/include/rdma/ib_cache.h b/include/rdma/ib_cache.h
index 226ae3702d8a..eed46d966e40 100644
--- a/include/rdma/ib_cache.h
+++ b/include/rdma/ib_cache.h
@@ -34,7 +34,7 @@ struct net_device *rdma_read_gid_attr_ndev_rcu(const struct ib_gid_attr *attr);
/**
* ib_get_cached_pkey - Returns a cached PKey table entry
- * @device: The device to query.
+ * @device_handle: The device to query.
* @port_num: The port number of the device to query.
* @index: The index into the cached PKey table to query.
* @pkey: The PKey value found at the specified index.
@@ -64,22 +64,6 @@ int ib_find_cached_pkey(struct ib_device *device,
u16 *index);
/**
- * ib_find_exact_cached_pkey - Returns the PKey table index where a specified
- * PKey value occurs. Comparison uses the FULL 16 bits (incl membership bit)
- * @device: The device to query.
- * @port_num: The port number of the device to search for the PKey.
- * @pkey: The PKey value to search for.
- * @index: The index into the cached PKey table where the PKey was found.
- *
- * ib_find_exact_cached_pkey() searches the specified PKey table in
- * the local software cache.
- */
-int ib_find_exact_cached_pkey(struct ib_device *device,
- u32 port_num,
- u16 pkey,
- u16 *index);
-
-/**
* ib_get_cached_lmc - Returns a cached lmc table entry
* @device: The device to query.
* @port_num: The port number of the device to query.
@@ -96,7 +80,7 @@ int ib_get_cached_lmc(struct ib_device *device,
* ib_get_cached_port_state - Returns a cached port state table entry
* @device: The device to query.
* @port_num: The port number of the device to query.
- * @port_state: port_state for the specified port for that device.
+ * @port_active: port_state for the specified port for that device.
*
* ib_get_cached_port_state() fetches the specified port_state table entry stored in
* the local software cache.
diff --git a/include/rdma/ib_cm.h b/include/rdma/ib_cm.h
index a2ac62b4a6cf..4808a355de41 100644
--- a/include/rdma/ib_cm.h
+++ b/include/rdma/ib_cm.h
@@ -271,7 +271,7 @@ struct ib_cm_event {
#define CM_APR_ATTR_ID cpu_to_be16(0x001A)
/**
- * ib_cm_handler - User-defined callback to process communication events.
+ * typedef ib_cm_handler - User-defined callback to process communication events.
* @cm_id: Communication identifier associated with the reported event.
* @event: Information about the communication event.
*
@@ -480,23 +480,12 @@ int ib_send_cm_rej(struct ib_cm_id *cm_id,
const void *private_data,
u8 private_data_len);
-#define IB_CM_MRA_FLAG_DELAY 0x80 /* Send MRA only after a duplicate msg */
-
/**
- * ib_send_cm_mra - Sends a message receipt acknowledgement to a connection
- * message.
+ * ib_prepare_cm_mra - Prepares to send a message receipt acknowledgment to a
+ * connection message in case duplicates are received.
* @cm_id: Connection identifier associated with the connection message.
- * @service_timeout: The lower 5-bits specify the maximum time required for
- * the sender to reply to the connection message. The upper 3-bits
- * specify additional control flags.
- * @private_data: Optional user-defined private data sent with the
- * message receipt acknowledgement.
- * @private_data_len: Size of the private data buffer, in bytes.
*/
-int ib_send_cm_mra(struct ib_cm_id *cm_id,
- u8 service_timeout,
- const void *private_data,
- u8 private_data_len);
+int ib_prepare_cm_mra(struct ib_cm_id *cm_id);
/**
* ib_cm_init_qp_attr - Initializes the QP attributes for use in transitioning
diff --git a/include/rdma/ib_mad.h b/include/rdma/ib_mad.h
index 3f1b58d8b4bf..8bd0e1eb393b 100644
--- a/include/rdma/ib_mad.h
+++ b/include/rdma/ib_mad.h
@@ -48,6 +48,7 @@
#define IB_MGMT_METHOD_REPORT 0x06
#define IB_MGMT_METHOD_REPORT_RESP 0x86
#define IB_MGMT_METHOD_TRAP_REPRESS 0x07
+#define IB_MGMT_METHOD_GET_TABLE 0x12
#define IB_MGMT_METHOD_RESP 0x80
#define IB_BM_ATTR_MOD_RESP cpu_to_be32(1)
diff --git a/include/rdma/ib_marshall.h b/include/rdma/ib_marshall.h
index 1838869aad28..b179e464e3d1 100644
--- a/include/rdma/ib_marshall.h
+++ b/include/rdma/ib_marshall.h
@@ -22,7 +22,4 @@ void ib_copy_ah_attr_to_user(struct ib_device *device,
void ib_copy_path_rec_to_user(struct ib_user_path_rec *dst,
struct sa_path_rec *src);
-void ib_copy_path_rec_from_user(struct sa_path_rec *dst,
- struct ib_user_path_rec *src);
-
#endif /* IB_USER_MARSHALL_H */
diff --git a/include/rdma/ib_pack.h b/include/rdma/ib_pack.h
index b8c56d7dc35d..8266fab826a7 100644
--- a/include/rdma/ib_pack.h
+++ b/include/rdma/ib_pack.h
@@ -283,7 +283,4 @@ int ib_ud_header_init(int payload_bytes,
int ib_ud_header_pack(struct ib_ud_header *header,
void *buf);
-int ib_ud_header_unpack(void *buf,
- struct ib_ud_header *header);
-
#endif /* IB_PACK_H */
diff --git a/include/rdma/ib_sa.h b/include/rdma/ib_sa.h
index b46353fc53bf..95e8924ad563 100644
--- a/include/rdma/ib_sa.h
+++ b/include/rdma/ib_sa.h
@@ -189,6 +189,20 @@ struct sa_path_rec {
u32 flags;
};
+struct sa_service_rec {
+ __be64 id;
+ __u8 gid[16];
+ __be16 pkey;
+ __u8 reserved[2];
+ __be32 lease;
+ __u8 key[16];
+ __u8 name[64];
+ __u8 data_8[16];
+ __be16 data_16[8];
+ __be32 data_32[4];
+ __be64 data_64[2];
+};
+
static inline enum ib_gid_type
sa_conv_pathrec_to_gid_type(struct sa_path_rec *rec)
{
@@ -417,6 +431,17 @@ int ib_sa_path_rec_get(struct ib_sa_client *client, struct ib_device *device,
unsigned int num_prs, void *context),
void *context, struct ib_sa_query **query);
+int ib_sa_service_rec_get(struct ib_sa_client *client,
+ struct ib_device *device, u32 port_num,
+ struct sa_service_rec *rec,
+ ib_sa_comp_mask comp_mask,
+ unsigned long timeout_ms, gfp_t gfp_mask,
+ void (*callback)(int status,
+ struct sa_service_rec *resp,
+ unsigned int num_services,
+ void *context),
+ void *context, struct ib_sa_query **sa_query);
+
struct ib_sa_multicast {
struct ib_sa_mcmember_rec rec;
ib_sa_comp_mask comp_mask;
@@ -509,6 +534,18 @@ int ib_init_ah_attr_from_path(struct ib_device *device, u32 port_num,
void ib_sa_pack_path(struct sa_path_rec *rec, void *attribute);
/**
+ * ib_sa_pack_service - Convert a service record from struct ib_sa_service_rec
+ * to IB MAD wire format.
+ */
+void ib_sa_pack_service(struct sa_service_rec *rec, void *attribute);
+
+/**
+ * ib_sa_unpack_service - Convert a service record from MAD format to struct
+ * ib_sa_service_rec.
+ */
+void ib_sa_unpack_service(void *attribute, struct sa_service_rec *rec);
+
+/**
* ib_sa_unpack_path - Convert a path record from MAD format to struct
* ib_sa_path_rec.
*/
diff --git a/include/rdma/ib_ucaps.h b/include/rdma/ib_ucaps.h
new file mode 100644
index 000000000000..d9f96be3a553
--- /dev/null
+++ b/include/rdma/ib_ucaps.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved
+ */
+
+#ifndef _IB_UCAPS_H_
+#define _IB_UCAPS_H_
+
+#define UCAP_ENABLED(ucaps, type) (!!((ucaps) & (1U << (type))))
+
+enum rdma_user_cap {
+ RDMA_UCAP_MLX5_CTRL_LOCAL,
+ RDMA_UCAP_MLX5_CTRL_OTHER_VHCA,
+ RDMA_UCAP_MAX
+};
+
+void ib_cleanup_ucaps(void);
+int ib_get_ucaps(int *fds, int fd_count, uint64_t *idx_mask);
+#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)
+int ib_create_ucap(enum rdma_user_cap type);
+void ib_remove_ucap(enum rdma_user_cap type);
+#else
+static inline int ib_create_ucap(enum rdma_user_cap type)
+{
+ return -EOPNOTSUPP;
+}
+static inline void ib_remove_ucap(enum rdma_user_cap type) {}
+#endif /* CONFIG_INFINIBAND_USER_ACCESS */
+
+#endif /* _IB_UCAPS_H_ */
diff --git a/include/rdma/ib_umem.h b/include/rdma/ib_umem.h
index 7dc7b1cc71b5..2ad52cc1d52b 100644
--- a/include/rdma/ib_umem.h
+++ b/include/rdma/ib_umem.h
@@ -7,13 +7,9 @@
#ifndef IB_UMEM_H
#define IB_UMEM_H
-#include <linux/list.h>
#include <linux/scatterlist.h>
-#include <linux/workqueue.h>
-#include <rdma/ib_verbs.h>
-struct ib_ucontext;
-struct ib_umem_odp;
+struct ib_device;
struct dma_buf_attach_ops;
struct ib_umem {
@@ -22,6 +18,7 @@ struct ib_umem {
u64 iova;
size_t length;
unsigned long address;
+ unsigned long dma_attrs;
u32 writable : 1;
u32 is_odp : 1;
u32 is_dmabuf : 1;
@@ -36,6 +33,7 @@ struct ib_umem_dmabuf {
struct scatterlist *last_sg;
unsigned long first_sg_offset;
unsigned long last_sg_trim;
+ void (*pinned_revoke)(void *priv);
void *private;
u8 pinned : 1;
u8 revoked : 1;
@@ -52,11 +50,15 @@ static inline int ib_umem_offset(struct ib_umem *umem)
return umem->address & ~PAGE_MASK;
}
+static inline dma_addr_t ib_umem_start_dma_addr(struct ib_umem *umem)
+{
+ return sg_dma_address(umem->sgt_append.sgt.sgl) + ib_umem_offset(umem);
+}
+
static inline unsigned long ib_umem_dma_offset(struct ib_umem *umem,
unsigned long pgsz)
{
- return (sg_dma_address(umem->sgt_append.sgt.sgl) + ib_umem_offset(umem)) &
- (pgsz - 1);
+ return ib_umem_start_dma_addr(umem) & (pgsz - 1);
}
static inline size_t ib_umem_num_dma_blocks(struct ib_umem *umem,
@@ -71,37 +73,6 @@ static inline size_t ib_umem_num_pages(struct ib_umem *umem)
{
return ib_umem_num_dma_blocks(umem, PAGE_SIZE);
}
-
-static inline void __rdma_umem_block_iter_start(struct ib_block_iter *biter,
- struct ib_umem *umem,
- unsigned long pgsz)
-{
- __rdma_block_iter_start(biter, umem->sgt_append.sgt.sgl,
- umem->sgt_append.sgt.nents, pgsz);
- biter->__sg_advance = ib_umem_offset(umem) & ~(pgsz - 1);
- biter->__sg_numblocks = ib_umem_num_dma_blocks(umem, pgsz);
-}
-
-static inline bool __rdma_umem_block_iter_next(struct ib_block_iter *biter)
-{
- return __rdma_block_iter_next(biter) && biter->__sg_numblocks--;
-}
-
-/**
- * rdma_umem_for_each_dma_block - iterate over contiguous DMA blocks of the umem
- * @umem: umem to iterate over
- * @pgsz: Page size to split the list into
- *
- * pgsz must be <= PAGE_SIZE or computed by ib_umem_find_best_pgsz(). The
- * returned DMA blocks will be aligned to pgsz and span the range:
- * ALIGN_DOWN(umem->address, pgsz) to ALIGN(umem->address + umem->length, pgsz)
- *
- * Performs exactly ib_umem_num_dma_blocks() iterations.
- */
-#define rdma_umem_for_each_dma_block(umem, biter, pgsz) \
- for (__rdma_umem_block_iter_start(biter, umem, pgsz); \
- __rdma_umem_block_iter_next(biter);)
-
#ifdef CONFIG_INFINIBAND_USER_MEM
struct ib_umem *ib_umem_get(struct ib_device *device, unsigned long addr,
@@ -117,7 +88,7 @@ unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem,
* ib_umem_find_best_pgoff - Find best HW page size
*
* @umem: umem struct
- * @pgsz_bitmap bitmap of HW supported page sizes
+ * @pgsz_bitmap: bitmap of HW supported page sizes
* @pgoff_bitmask: Mask of bits that can be represented with an offset
*
* This is very similar to ib_umem_find_best_pgsz() except instead of accepting
@@ -130,19 +101,35 @@ unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem,
*
* If the pgoff_bitmask requires either alignment in the low bit or an
* unavailable page size for the high bits, this function returns 0.
+ *
+ * Returns: best HW page size for the parameters or 0 if none available
+ * for the given parameters.
*/
static inline unsigned long ib_umem_find_best_pgoff(struct ib_umem *umem,
unsigned long pgsz_bitmap,
u64 pgoff_bitmask)
{
- struct scatterlist *sg = umem->sgt_append.sgt.sgl;
dma_addr_t dma_addr;
- dma_addr = sg_dma_address(sg) + (umem->address & ~PAGE_MASK);
+ dma_addr = ib_umem_start_dma_addr(umem);
return ib_umem_find_best_pgsz(umem, pgsz_bitmap,
dma_addr & pgoff_bitmask);
}
+static inline bool ib_umem_is_contiguous(struct ib_umem *umem)
+{
+ dma_addr_t dma_addr;
+ unsigned long pgsz;
+
+ /*
+ * Select the smallest aligned page that can contain the whole umem if
+ * it was contiguous.
+ */
+ dma_addr = ib_umem_start_dma_addr(umem);
+ pgsz = roundup_pow_of_two((dma_addr ^ (umem->length - 1 + dma_addr)) + 1);
+ return !!ib_umem_find_best_pgoff(umem, pgsz, U64_MAX);
+}
+
struct ib_umem_dmabuf *ib_umem_dmabuf_get(struct ib_device *device,
unsigned long offset, size_t size,
int fd, int access,
@@ -152,6 +139,12 @@ struct ib_umem_dmabuf *ib_umem_dmabuf_get_pinned(struct ib_device *device,
size_t size, int fd,
int access);
struct ib_umem_dmabuf *
+ib_umem_dmabuf_get_pinned_revocable_and_lock(struct ib_device *device,
+ unsigned long offset, size_t size,
+ int fd, int access);
+void ib_umem_dmabuf_set_revoke_locked(struct ib_umem_dmabuf *umem_dmabuf,
+ void (*revoke)(void *priv), void *priv);
+struct ib_umem_dmabuf *
ib_umem_dmabuf_get_pinned_with_dma_device(struct ib_device *device,
struct device *dma_device,
unsigned long offset, size_t size,
@@ -159,6 +152,8 @@ ib_umem_dmabuf_get_pinned_with_dma_device(struct ib_device *device,
int ib_umem_dmabuf_map_pages(struct ib_umem_dmabuf *umem_dmabuf);
void ib_umem_dmabuf_unmap_pages(struct ib_umem_dmabuf *umem_dmabuf);
void ib_umem_dmabuf_release(struct ib_umem_dmabuf *umem_dmabuf);
+void ib_umem_dmabuf_revoke_lock(struct ib_umem_dmabuf *umem_dmabuf);
+void ib_umem_dmabuf_revoke_unlock(struct ib_umem_dmabuf *umem_dmabuf);
void ib_umem_dmabuf_revoke(struct ib_umem_dmabuf *umem_dmabuf);
#else /* CONFIG_INFINIBAND_USER_MEM */
@@ -205,6 +200,18 @@ ib_umem_dmabuf_get_pinned(struct ib_device *device, unsigned long offset,
}
static inline struct ib_umem_dmabuf *
+ib_umem_dmabuf_get_pinned_revocable_and_lock(struct ib_device *device,
+ unsigned long offset, size_t size,
+ int fd, int access)
+{
+ return ERR_PTR(-EOPNOTSUPP);
+}
+
+static inline void
+ib_umem_dmabuf_set_revoke_locked(struct ib_umem_dmabuf *umem_dmabuf,
+ void (*revoke)(void *priv), void *priv) {}
+
+static inline struct ib_umem_dmabuf *
ib_umem_dmabuf_get_pinned_with_dma_device(struct ib_device *device,
struct device *dma_device,
unsigned long offset, size_t size,
@@ -219,6 +226,8 @@ static inline int ib_umem_dmabuf_map_pages(struct ib_umem_dmabuf *umem_dmabuf)
}
static inline void ib_umem_dmabuf_unmap_pages(struct ib_umem_dmabuf *umem_dmabuf) { }
static inline void ib_umem_dmabuf_release(struct ib_umem_dmabuf *umem_dmabuf) { }
+static inline void ib_umem_dmabuf_revoke_lock(struct ib_umem_dmabuf *umem_dmabuf) {}
+static inline void ib_umem_dmabuf_revoke_unlock(struct ib_umem_dmabuf *umem_dmabuf) {}
static inline void ib_umem_dmabuf_revoke(struct ib_umem_dmabuf *umem_dmabuf) {}
#endif /* CONFIG_INFINIBAND_USER_MEM */
diff --git a/include/rdma/ib_umem_odp.h b/include/rdma/ib_umem_odp.h
index 0844c1d05ac6..2a24bf791c10 100644
--- a/include/rdma/ib_umem_odp.h
+++ b/include/rdma/ib_umem_odp.h
@@ -8,23 +8,17 @@
#include <rdma/ib_umem.h>
#include <rdma/ib_verbs.h>
+#include <linux/hmm-dma.h>
struct ib_umem_odp {
struct ib_umem umem;
struct mmu_interval_notifier notifier;
struct pid *tgid;
- /* An array of the pfns included in the on-demand paging umem. */
- unsigned long *pfn_list;
+ struct hmm_dma_map map;
/*
- * An array with DMA addresses mapped for pfns in pfn_list.
- * The lower two bits designate access permissions.
- * See ODP_READ_ALLOWED_BIT and ODP_WRITE_ALLOWED_BIT.
- */
- dma_addr_t *dma_list;
- /*
- * The umem_mutex protects the page_list and dma_list fields of an ODP
+ * The umem_mutex protects the page_list field of an ODP
* umem, allowing only a single thread to map/unmap pages. The mutex
* also protects access to the mmu notifier counters.
*/
@@ -67,19 +61,6 @@ static inline size_t ib_umem_odp_num_pages(struct ib_umem_odp *umem_odp)
umem_odp->page_shift;
}
-/*
- * The lower 2 bits of the DMA address signal the R/W permissions for
- * the entry. To upgrade the permissions, provide the appropriate
- * bitmask to the map_dma_pages function.
- *
- * Be aware that upgrading a mapped address might result in change of
- * the DMA address for the page.
- */
-#define ODP_READ_ALLOWED_BIT (1<<0ULL)
-#define ODP_WRITE_ALLOWED_BIT (1<<1ULL)
-
-#define ODP_DMA_ADDR_MASK (~(ODP_READ_ALLOWED_BIT | ODP_WRITE_ALLOWED_BIT))
-
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
struct ib_umem_odp *
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 3417636da960..9dd76f489a0b 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -15,6 +15,7 @@
#include <linux/ethtool.h>
#include <linux/types.h>
#include <linux/device.h>
+#include <linux/bvec.h>
#include <linux/dma-mapping.h>
#include <linux/kref.h>
#include <linux/list.h>
@@ -42,6 +43,9 @@
#include <rdma/signature.h>
#include <uapi/rdma/rdma_user_ioctl.h>
#include <uapi/rdma/ib_user_ioctl_verbs.h>
+#include <linux/pci-tph.h>
+#include <rdma/frmr_pools.h>
+#include <linux/dma-buf.h>
#define IB_FW_VERSION_NAME_MAX ETHTOOL_FWVERS_LEN
@@ -59,9 +63,6 @@ extern struct workqueue_struct *ib_comp_unbound_wq;
struct ib_ucq_object;
-__printf(3, 4) __cold
-void ibdev_printk(const char *level, const struct ib_device *ibdev,
- const char *format, ...);
__printf(2, 3) __cold
void ibdev_emerg(const struct ib_device *ibdev, const char *format, ...);
__printf(2, 3) __cold
@@ -317,17 +318,19 @@ enum ib_atomic_cap {
};
enum ib_odp_general_cap_bits {
- IB_ODP_SUPPORT = 1 << 0,
- IB_ODP_SUPPORT_IMPLICIT = 1 << 1,
+ IB_ODP_SUPPORT = IB_UVERBS_ODP_SUPPORT,
+ IB_ODP_SUPPORT_IMPLICIT = IB_UVERBS_ODP_SUPPORT_IMPLICIT,
};
enum ib_odp_transport_cap_bits {
- IB_ODP_SUPPORT_SEND = 1 << 0,
- IB_ODP_SUPPORT_RECV = 1 << 1,
- IB_ODP_SUPPORT_WRITE = 1 << 2,
- IB_ODP_SUPPORT_READ = 1 << 3,
- IB_ODP_SUPPORT_ATOMIC = 1 << 4,
- IB_ODP_SUPPORT_SRQ_RECV = 1 << 5,
+ IB_ODP_SUPPORT_SEND = IB_UVERBS_ODP_SUPPORT_SEND,
+ IB_ODP_SUPPORT_RECV = IB_UVERBS_ODP_SUPPORT_RECV,
+ IB_ODP_SUPPORT_WRITE = IB_UVERBS_ODP_SUPPORT_WRITE,
+ IB_ODP_SUPPORT_READ = IB_UVERBS_ODP_SUPPORT_READ,
+ IB_ODP_SUPPORT_ATOMIC = IB_UVERBS_ODP_SUPPORT_ATOMIC,
+ IB_ODP_SUPPORT_SRQ_RECV = IB_UVERBS_ODP_SUPPORT_SRQ_RECV,
+ IB_ODP_SUPPORT_FLUSH = IB_UVERBS_ODP_SUPPORT_FLUSH,
+ IB_ODP_SUPPORT_ATOMIC_WRITE = IB_UVERBS_ODP_SUPPORT_ATOMIC_WRITE,
};
struct ib_odp_caps {
@@ -522,6 +525,23 @@ enum ib_port_state {
IB_PORT_ACTIVE_DEFER = 5
};
+static inline const char *__attribute_const__
+ib_port_state_to_str(enum ib_port_state state)
+{
+ const char * const states[] = {
+ [IB_PORT_NOP] = "NOP",
+ [IB_PORT_DOWN] = "DOWN",
+ [IB_PORT_INIT] = "INIT",
+ [IB_PORT_ARMED] = "ARMED",
+ [IB_PORT_ACTIVE] = "ACTIVE",
+ [IB_PORT_ACTIVE_DEFER] = "ACTIVE_DEFER",
+ };
+
+ if (state < ARRAY_SIZE(states))
+ return states[state];
+ return "UNKNOWN";
+}
+
enum ib_port_phys_state {
IB_PORT_PHYS_STATE_SLEEP = 1,
IB_PORT_PHYS_STATE_POLLING = 2,
@@ -569,10 +589,10 @@ enum ib_stat_flag {
};
/**
- * struct rdma_stat_desc
- * @name - The name of the counter
- * @flags - Flags of the counter; For example, IB_STAT_FLAG_OPTIONAL
- * @priv - Driver private information; Core code should not use
+ * struct rdma_stat_desc - description of one rdma stat/counter
+ * @name: The name of the counter
+ * @flags: Flags of the counter; For example, IB_STAT_FLAG_OPTIONAL
+ * @priv: Driver private information; Core code should not use
*/
struct rdma_stat_desc {
const char *name;
@@ -581,24 +601,24 @@ struct rdma_stat_desc {
};
/**
- * struct rdma_hw_stats
- * @lock - Mutex to protect parallel write access to lifespan and values
+ * struct rdma_hw_stats - collection of hardware stats and their management
+ * @lock: Mutex to protect parallel write access to lifespan and values
* of counters, which are 64bits and not guaranteed to be written
* atomicaly on 32bits systems.
- * @timestamp - Used by the core code to track when the last update was
- * @lifespan - Used by the core code to determine how old the counters
+ * @timestamp: Used by the core code to track when the last update was
+ * @lifespan: Used by the core code to determine how old the counters
* should be before being updated again. Stored in jiffies, defaults
* to 10 milliseconds, drivers can override the default be specifying
* their own value during their allocation routine.
- * @descs - Array of pointers to static descriptors used for the counters
+ * @descs: Array of pointers to static descriptors used for the counters
* in directory.
- * @is_disabled - A bitmap to indicate each counter is currently disabled
+ * @is_disabled: A bitmap to indicate each counter is currently disabled
* or not.
- * @num_counters - How many hardware counters there are. If name is
+ * @num_counters: How many hardware counters there are. If name is
* shorter than this number, a kernel oops will result. Driver authors
* are encouraged to leave BUILD_BUG_ON(ARRAY_SIZE(@name) < num_counters)
* in their code to prevent this.
- * @value - Array of u64 counters that are accessed by the sysfs code and
+ * @value: Array of u64 counters that are accessed by the sysfs code and
* filled in by the drivers get_stats routine
*/
struct rdma_hw_stats {
@@ -747,6 +767,7 @@ enum ib_event_type {
IB_EVENT_CLIENT_REREGISTER,
IB_EVENT_GID_CHANGE,
IB_EVENT_WQ_FATAL,
+ IB_EVENT_DEVICE_SPEED_CHANGE,
};
const char *__attribute_const__ ib_event_msg(enum ib_event_type event);
@@ -842,6 +863,7 @@ enum ib_rate {
IB_RATE_400_GBPS = 21,
IB_RATE_600_GBPS = 22,
IB_RATE_800_GBPS = 23,
+ IB_RATE_1600_GBPS = 25,
};
/**
@@ -859,6 +881,20 @@ __attribute_const__ int ib_rate_to_mult(enum ib_rate rate);
*/
__attribute_const__ int ib_rate_to_mbps(enum ib_rate rate);
+struct ib_port_speed_info {
+ const char *str;
+ int rate; /* in deci-Gb/sec (100 MBps units) */
+};
+
+/**
+ * ib_port_attr_to_speed_info - Convert port attributes to speed information
+ * @attr: Port attributes containing active_speed and active_width
+ * @speed_info: Speed information to return
+ *
+ * Returns 0 on success, -EINVAL on error.
+ */
+int ib_port_attr_to_speed_info(struct ib_port_attr *attr,
+ struct ib_port_speed_info *speed_info);
/**
* enum ib_mr_type - memory region type
@@ -1516,6 +1552,7 @@ struct ib_ucontext {
struct ib_uverbs_file *ufile;
struct ib_rdmacg_object cg_obj;
+ u64 enabled_caps;
/*
* Implementation details of the RDMA core, don't use in drivers:
*/
@@ -1540,6 +1577,93 @@ struct ib_uobject {
const struct uverbs_api_object *uapi_object;
};
+/**
+ * struct ib_udata - Driver request/response data from userspace
+ * @inbuf: Pointer to request data from userspace
+ * @outbuf: Pointer to response buffer in userspace
+ * @inlen: Length of request data
+ * @outlen: Length of response buffer
+ *
+ * struct ib_udata is used to hold the driver data request and response
+ * structures defined in the uapi. They follow these rules for forwards and
+ * backwards compatibility:
+ *
+ * 1) Userspace can provide a longer request so long as the trailing part the
+ * kernel doesn't understand is all zeros.
+ *
+ * This provides a degree of safety if userspace wrongly tries to use a new
+ * feature the kernel does not understand with some non-zero value.
+ *
+ * It allows a simpler rdma-core implementation because the library can
+ * simply always use the latest structs for the request, even if they are
+ * bigger. It simply has to avoid using the new members if they are not
+ * supported/required.
+ *
+ * 2) Userspace can provide a shorter request; the kernel will zero-pad it out
+ * to fill the storage. The newer kernel should understand that older
+ * userspace will provide 0 to new fields. The kernel has three options to
+ * enable new request fields:
+ *
+ * - Input comp_mask that says the field is supported
+ * - Look for non-zero values
+ * - Check if the udata->inlen size covers the field
+ *
+ * This also corrects any bugs related to not filling in request structures
+ * as the new helper always fully writes to the struct.
+ *
+ * 3) Userspace can provide a shorter or longer response struct. If shorter,
+ * the kernel reply is truncated. The kernel should be designed to not write
+ * to new reply fields unless userspace has affirmatively requested them.
+ *
+ * If the user buffer is longer, the kernel will zero-fill it.
+ *
+ * Userspace has three options to enable new response fields:
+ *
+ * - Output comp_mask that says the field is supported
+ * - Look for non-zero values
+ * - Infer the output must be valid because the request contents demand it
+ * and old kernels will fail the request
+ *
+ * The following helper functions implement these semantics:
+ *
+ * ib_copy_validate_udata_in() - Checks the minimum length, and zero trailing::
+ *
+ * struct driver_create_cq_req req;
+ * int err;
+ *
+ * err = ib_copy_validate_udata_in(udata, req, end_member);
+ * if (err)
+ * return err;
+ *
+ * The third argument specifies the last member of the struct in the first
+ * kernel version that introduced it, establishing the minimum required size.
+ *
+ * ib_copy_validate_udata_in_cm() - The above but also validate a
+ * comp_mask member only has supported bits set::
+ *
+ * err = ib_copy_validate_udata_in_cm(udata, req, first_version_last_member,
+ * DRIVER_CREATE_CQ_MASK_FEATURE_A |
+ * DRIVER_CREATE_CQ_MASK_FEATURE_B);
+ *
+ * ib_respond_udata() - Implements the response rules::
+ *
+ * struct driver_create_cq_resp resp = {};
+ *
+ * resp.some_field = value;
+ * return ib_respond_udata(udata, resp);
+ *
+ * ib_is_udata_in_empty() - Used instead of ib_copy_validate_udata_in() if the
+ * driver does not have a request structure::
+ *
+ * ret = ib_is_udata_in_empty(udata);
+ * if (ret)
+ * return ret;
+ *
+ * Similarly ib_respond_empty_udata() is used instead of ib_respond_udata() if
+ * the driver does not have a response structure::
+ *
+ * return ib_respond_empty_udata(udata);
+ */
struct ib_udata {
const void __user *inbuf;
void __user *outbuf;
@@ -1614,6 +1738,7 @@ struct ib_cq {
u8 interrupt:1;
u8 shared:1;
unsigned int comp_vector;
+ struct ib_umem *umem;
/*
* Implementation details of the RDMA core, don't use in drivers:
@@ -1829,6 +1954,27 @@ struct ib_dm {
atomic_t usecnt;
};
+/* bit values to mark existence of ib_dmah fields */
+enum {
+ IB_DMAH_CPU_ID_EXISTS,
+ IB_DMAH_MEM_TYPE_EXISTS,
+ IB_DMAH_PH_EXISTS,
+};
+
+struct ib_dmah {
+ struct ib_device *device;
+ struct ib_uobject *uobject;
+ /*
+ * Implementation details of the RDMA core, don't use in drivers:
+ */
+ struct rdma_restrack_entry res;
+ u32 cpu_id;
+ enum tph_mem_type mem_type;
+ atomic_t usecnt;
+ u8 ph;
+ u8 valid_fields; /* use IB_DMAH_XXX_EXISTS */
+};
+
struct ib_mr {
struct ib_device *device;
struct ib_pd *pd;
@@ -1846,6 +1992,12 @@ struct ib_mr {
struct ib_dm *dm;
struct ib_sig_attrs *sig_attrs; /* only for IB_MR_TYPE_INTEGRITY MRs */
+ struct ib_dmah *dmah;
+ struct {
+ struct ib_frmr_pool *pool;
+ struct ib_frmr_key key;
+ u32 handle;
+ } frmr;
/*
* Implementation details of the RDMA core, don't use in drivers:
*/
@@ -2177,6 +2329,7 @@ struct ib_port_cache {
struct ib_gid_table *gid;
u8 lmc;
enum ib_port_state port_state;
+ enum ib_port_state last_port_state;
};
struct ib_port_immutable {
@@ -2208,7 +2361,6 @@ struct ib_port_data {
/* rdma netdev type - specifies protocol type */
enum rdma_netdev_t {
- RDMA_NETDEV_OPA_VNIC,
RDMA_NETDEV_IPOIB,
};
@@ -2222,11 +2374,6 @@ struct rdma_netdev {
u32 port_num;
int mtu;
- /*
- * cleanup function must be specified.
- * FIXME: This is only used for OPA_VNIC and that usage should be
- * removed too.
- */
void (*free_rdma_netdev)(struct net_device *netdev);
/* control functions */
@@ -2256,7 +2403,9 @@ struct rdma_netdev_alloc_params {
struct ib_odp_counters {
atomic64_t faults;
+ atomic64_t faults_handled;
atomic64_t invalidations;
+ atomic64_t invalidations_handled;
atomic64_t prefetch;
};
@@ -2304,6 +2453,9 @@ struct rdma_user_mmap_entry {
unsigned long start_pgoff;
size_t npages;
bool driver_removed;
+ /* protects access to dmabufs */
+ struct mutex dmabufs_lock;
+ struct list_head dmabufs;
};
/* Return the offset (in bytes) the user should pass to libc's mmap() */
@@ -2323,6 +2475,12 @@ struct ib_device_ops {
enum rdma_driver_id driver_id;
u32 uverbs_abi_ver;
unsigned int uverbs_no_driver_id_binding:1;
+ /*
+ * Indicates the driver checks every op accepting a udata for the
+ * correct size on input and always handles the output using the udata
+ * helpers.
+ */
+ unsigned int uverbs_robust_udata:1;
/*
* NOTE: New drivers should not make use of device_group; instead new
@@ -2355,14 +2513,14 @@ struct ib_device_ops {
int (*modify_device)(struct ib_device *device, int device_modify_mask,
struct ib_device_modify *device_modify);
void (*get_dev_fw_str)(struct ib_device *device, char *str);
- const struct cpumask *(*get_vector_affinity)(struct ib_device *ibdev,
- int comp_vector);
int (*query_port)(struct ib_device *device, u32 port_num,
struct ib_port_attr *port_attr);
+ int (*query_port_speed)(struct ib_device *device, u32 port_num,
+ u64 *speed);
int (*modify_port)(struct ib_device *device, u32 port_num,
int port_modify_mask,
struct ib_port_modify *port_modify);
- /**
+ /*
* The following mandatory functions are used only at device
* registration. Keep functions such as these at the end of this
* structure to avoid cache line misses when accessing struct ib_device
@@ -2372,7 +2530,7 @@ struct ib_device_ops {
struct ib_port_immutable *immutable);
enum rdma_link_layer (*get_link_layer)(struct ib_device *device,
u32 port_num);
- /**
+ /*
* When calling get_netdev, the HW vendor's driver should return the
* net device of device @device at port @port_num or NULL if such
* a net device doesn't exist. The vendor driver should call dev_hold
@@ -2382,7 +2540,7 @@ struct ib_device_ops {
*/
struct net_device *(*get_netdev)(struct ib_device *device,
u32 port_num);
- /**
+ /*
* rdma netdev operation
*
* Driver implementing alloc_rdma_netdev or rdma_netdev_get_params
@@ -2396,14 +2554,14 @@ struct ib_device_ops {
int (*rdma_netdev_get_params)(struct ib_device *device, u32 port_num,
enum rdma_netdev_t type,
struct rdma_netdev_alloc_params *params);
- /**
+ /*
* query_gid should be return GID value for @device, when @port_num
* link layer is either IB or iWarp. It is no-op if @port_num port
* is RoCE link layer.
*/
int (*query_gid)(struct ib_device *device, u32 port_num, int index,
union ib_gid *gid);
- /**
+ /*
* When calling add_gid, the HW vendor's driver should add the gid
* of device of port at gid index available at @attr. Meta-info of
* that gid (for example, the network device related to this gid) is
@@ -2417,7 +2575,7 @@ struct ib_device_ops {
* roce_gid_table is used.
*/
int (*add_gid)(const struct ib_gid_attr *attr, void **context);
- /**
+ /*
* When calling del_gid, the HW vendor's driver should delete the
* gid of device @device at gid index gid_index of port port_num
* available in @attr.
@@ -2432,13 +2590,18 @@ struct ib_device_ops {
struct ib_udata *udata);
void (*dealloc_ucontext)(struct ib_ucontext *context);
int (*mmap)(struct ib_ucontext *context, struct vm_area_struct *vma);
- /**
+ /*
* This will be called once refcount of an entry in mmap_xa reaches
* zero. The type of the memory that was mapped may differ between
* entries and is opaque to the rdma_user_mmap interface.
* Therefore needs to be implemented by the driver in mmap_free.
*/
void (*mmap_free)(struct rdma_user_mmap_entry *entry);
+ int (*mmap_get_pfns)(struct rdma_user_mmap_entry *entry,
+ struct phys_vec *phys_vec,
+ struct p2pdma_provider **provider);
+ struct rdma_user_mmap_entry *(*pgoff_to_mmap_entry)(struct ib_ucontext *ucontext,
+ off_t pg_off);
void (*disassociate_ucontext)(struct ib_ucontext *ibcontext);
int (*alloc_pd)(struct ib_pd *pd, struct ib_udata *udata);
int (*dealloc_pd)(struct ib_pd *pd, struct ib_udata *udata);
@@ -2466,16 +2629,31 @@ struct ib_device_ops {
int (*destroy_qp)(struct ib_qp *qp, struct ib_udata *udata);
int (*create_cq)(struct ib_cq *cq, const struct ib_cq_init_attr *attr,
struct uverbs_attr_bundle *attrs);
+ int (*create_user_cq)(struct ib_cq *cq,
+ const struct ib_cq_init_attr *attr,
+ struct uverbs_attr_bundle *attrs);
int (*modify_cq)(struct ib_cq *cq, u16 cq_count, u16 cq_period);
int (*destroy_cq)(struct ib_cq *cq, struct ib_udata *udata);
- int (*resize_cq)(struct ib_cq *cq, int cqe, struct ib_udata *udata);
+ int (*resize_user_cq)(struct ib_cq *cq, unsigned int cqe,
+ struct ib_udata *udata);
+ /*
+ * pre_destroy_cq - Prevent a cq from generating any new work
+ * completions, but not free any kernel resources
+ */
+ int (*pre_destroy_cq)(struct ib_cq *cq);
+ /*
+ * post_destroy_cq - Free all kernel resources
+ */
+ void (*post_destroy_cq)(struct ib_cq *cq);
struct ib_mr *(*get_dma_mr)(struct ib_pd *pd, int mr_access_flags);
struct ib_mr *(*reg_user_mr)(struct ib_pd *pd, u64 start, u64 length,
u64 virt_addr, int mr_access_flags,
+ struct ib_dmah *dmah,
struct ib_udata *udata);
struct ib_mr *(*reg_user_mr_dmabuf)(struct ib_pd *pd, u64 offset,
u64 length, u64 virt_addr, int fd,
int mr_access_flags,
+ struct ib_dmah *dmah,
struct uverbs_attr_bundle *attrs);
struct ib_mr *(*rereg_user_mr)(struct ib_mr *mr, int flags, u64 start,
u64 length, u64 virt_addr,
@@ -2540,6 +2718,9 @@ struct ib_device_ops {
struct ib_dm_alloc_attr *attr,
struct uverbs_attr_bundle *attrs);
int (*dealloc_dm)(struct ib_dm *dm, struct uverbs_attr_bundle *attrs);
+ int (*alloc_dmah)(struct ib_dmah *ibdmah,
+ struct uverbs_attr_bundle *attrs);
+ int (*dealloc_dmah)(struct ib_dmah *dmah, struct uverbs_attr_bundle *attrs);
struct ib_mr *(*reg_dm_mr)(struct ib_pd *pd, struct ib_dm *dm,
struct ib_dm_mr_attr *attr,
struct uverbs_attr_bundle *attrs);
@@ -2554,7 +2735,7 @@ struct ib_device_ops {
struct scatterlist *meta_sg, int meta_sg_nents,
unsigned int *meta_sg_offset);
- /**
+ /*
* alloc_hw_[device,port]_stats - Allocate a struct rdma_hw_stats and
* fill in the driver initialized data. The struct is kfree()'ed by
* the sysfs core when the device is removed. A lifespan of -1 in the
@@ -2563,7 +2744,7 @@ struct ib_device_ops {
struct rdma_hw_stats *(*alloc_hw_device_stats)(struct ib_device *device);
struct rdma_hw_stats *(*alloc_hw_port_stats)(struct ib_device *device,
u32 port_num);
- /**
+ /*
* get_hw_stats - Fill in the counter value(s) in the stats struct.
* @index - The index in the value array we wish to have updated, or
* num_counters if we want all stats updated
@@ -2578,14 +2759,14 @@ struct ib_device_ops {
int (*get_hw_stats)(struct ib_device *device,
struct rdma_hw_stats *stats, u32 port, int index);
- /**
+ /*
* modify_hw_stat - Modify the counter configuration
* @enable: true/false when enable/disable a counter
* Return codes - 0 on success or error code otherwise.
*/
int (*modify_hw_stat)(struct ib_device *device, u32 port,
unsigned int counter_index, bool enable);
- /**
+ /*
* Allows rdma drivers to add their own restrack attributes.
*/
int (*fill_res_mr_entry)(struct sk_buff *msg, struct ib_mr *ibmr);
@@ -2621,33 +2802,39 @@ struct ib_device_ops {
u8 pdata_len);
int (*iw_create_listen)(struct iw_cm_id *cm_id, int backlog);
int (*iw_destroy_listen)(struct iw_cm_id *cm_id);
- /**
+ /*
* counter_bind_qp - Bind a QP to a counter.
* @counter - The counter to be bound. If counter->id is zero then
* the driver needs to allocate a new counter and set counter->id
*/
- int (*counter_bind_qp)(struct rdma_counter *counter, struct ib_qp *qp);
- /**
+ int (*counter_bind_qp)(struct rdma_counter *counter, struct ib_qp *qp,
+ u32 port);
+ /*
* counter_unbind_qp - Unbind the qp from the dynamically-allocated
* counter and bind it onto the default one
*/
- int (*counter_unbind_qp)(struct ib_qp *qp);
- /**
+ int (*counter_unbind_qp)(struct ib_qp *qp, u32 port);
+ /*
* counter_dealloc -De-allocate the hw counter
*/
int (*counter_dealloc)(struct rdma_counter *counter);
- /**
+ /*
* counter_alloc_stats - Allocate a struct rdma_hw_stats and fill in
* the driver initialized data.
*/
struct rdma_hw_stats *(*counter_alloc_stats)(
struct rdma_counter *counter);
- /**
+ /*
* counter_update_stats - Query the stats value of this counter
*/
int (*counter_update_stats)(struct rdma_counter *counter);
- /**
+ /*
+ * counter_init - Initialize the driver specific rdma counter struct.
+ */
+ void (*counter_init)(struct rdma_counter *counter);
+
+ /*
* Allows rdma drivers to add their own restrack attributes
* dumped via 'rdma stat' iproute2 command.
*/
@@ -2663,27 +2850,35 @@ struct ib_device_ops {
*/
int (*get_numa_node)(struct ib_device *dev);
- /**
+ /*
* add_sub_dev - Add a sub IB device
*/
struct ib_device *(*add_sub_dev)(struct ib_device *parent,
enum rdma_nl_dev_type type,
const char *name);
- /**
+ /*
* del_sub_dev - Delete a sub IB device
*/
void (*del_sub_dev)(struct ib_device *sub_dev);
- /**
+ /*
* ufile_cleanup - Attempt to cleanup ubojects HW resources inside
* the ufile.
*/
void (*ufile_hw_cleanup)(struct ib_uverbs_file *ufile);
+ /*
+ * report_port_event - Drivers need to implement this if they have
+ * some private stuff to handle when link status changes.
+ */
+ void (*report_port_event)(struct ib_device *ibdev,
+ struct net_device *ndev, unsigned long event);
+
DECLARE_RDMA_OBJ_SIZE(ib_ah);
DECLARE_RDMA_OBJ_SIZE(ib_counters);
DECLARE_RDMA_OBJ_SIZE(ib_cq);
+ DECLARE_RDMA_OBJ_SIZE(ib_dmah);
DECLARE_RDMA_OBJ_SIZE(ib_mw);
DECLARE_RDMA_OBJ_SIZE(ib_pd);
DECLARE_RDMA_OBJ_SIZE(ib_qp);
@@ -2691,6 +2886,7 @@ struct ib_device_ops {
DECLARE_RDMA_OBJ_SIZE(ib_srq);
DECLARE_RDMA_OBJ_SIZE(ib_ucontext);
DECLARE_RDMA_OBJ_SIZE(ib_xrcd);
+ DECLARE_RDMA_OBJ_SIZE(rdma_counter);
};
struct ib_core_device {
@@ -2743,6 +2939,7 @@ struct ib_device {
* It is a NULL terminated array.
*/
const struct attribute_group *groups[4];
+ u8 hw_stats_attr_index;
u64 uverbs_cmd_mask;
@@ -2802,6 +2999,8 @@ struct ib_device {
struct list_head subdev_list;
enum rdma_nl_name_assign_type name_assign_type;
+
+ struct ib_frmr_pools *frmr_pools;
};
static inline void *rdma_zalloc_obj(struct ib_device *dev, size_t size,
@@ -2854,27 +3053,18 @@ struct ib_client {
u8 no_kverbs_req:1;
};
-/*
- * IB block DMA iterator
- *
- * Iterates the DMA-mapped SGL in contiguous memory blocks aligned
- * to a HW supported page size.
- */
-struct ib_block_iter {
- /* internal states */
- struct scatterlist *__sg; /* sg holding the current aligned block */
- dma_addr_t __dma_addr; /* unaligned DMA address of this block */
- size_t __sg_numblocks; /* ib_umem_num_dma_blocks() */
- unsigned int __sg_nents; /* number of SG entries */
- unsigned int __sg_advance; /* number of bytes to advance in sg in next step */
- unsigned int __pg_bit; /* alignment of current block */
-};
-
-struct ib_device *_ib_alloc_device(size_t size);
+struct ib_device *_ib_alloc_device(size_t size, struct net *net);
#define ib_alloc_device(drv_struct, member) \
container_of(_ib_alloc_device(sizeof(struct drv_struct) + \
BUILD_BUG_ON_ZERO(offsetof( \
- struct drv_struct, member))), \
+ struct drv_struct, member)), \
+ &init_net), \
+ struct drv_struct, member)
+
+#define ib_alloc_device_with_net(drv_struct, member, net) \
+ container_of(_ib_alloc_device(sizeof(struct drv_struct) + \
+ BUILD_BUG_ON_ZERO(offsetof( \
+ struct drv_struct, member)), net), \
struct drv_struct, member)
void ib_dealloc_device(struct ib_device *device);
@@ -2891,38 +3081,6 @@ void ib_unregister_device_queued(struct ib_device *ib_dev);
int ib_register_client (struct ib_client *client);
void ib_unregister_client(struct ib_client *client);
-void __rdma_block_iter_start(struct ib_block_iter *biter,
- struct scatterlist *sglist,
- unsigned int nents,
- unsigned long pgsz);
-bool __rdma_block_iter_next(struct ib_block_iter *biter);
-
-/**
- * rdma_block_iter_dma_address - get the aligned dma address of the current
- * block held by the block iterator.
- * @biter: block iterator holding the memory block
- */
-static inline dma_addr_t
-rdma_block_iter_dma_address(struct ib_block_iter *biter)
-{
- return biter->__dma_addr & ~(BIT_ULL(biter->__pg_bit) - 1);
-}
-
-/**
- * rdma_for_each_block - iterate over contiguous memory blocks of the sg list
- * @sglist: sglist to iterate over
- * @biter: block iterator holding the memory block
- * @nents: maximum number of sg entries to iterate over
- * @pgsz: best HW supported page size to use
- *
- * Callers may use rdma_block_iter_dma_address() to get each
- * blocks aligned DMA address.
- */
-#define rdma_for_each_block(sglist, biter, nents, pgsz) \
- for (__rdma_block_iter_start(biter, sglist, nents, \
- pgsz); \
- __rdma_block_iter_next(biter);)
-
/**
* ib_get_client_data - Get IB client context
* @device:Device to get context for
@@ -3073,8 +3231,8 @@ static inline u32 rdma_start_port(const struct ib_device *device)
/**
* rdma_for_each_port - Iterate over all valid port numbers of the IB device
- * @device - The struct ib_device * to iterate over
- * @iter - The unsigned int to store the port number
+ * @device: The struct ib_device * to iterate over
+ * @iter: The unsigned int to store the port number
*/
#define rdma_for_each_port(device, iter) \
for (iter = rdma_start_port(device + \
@@ -3440,7 +3598,7 @@ static inline bool rdma_core_cap_opa_port(struct ib_device *device,
/**
* rdma_mtu_enum_to_int - Return the mtu of the port as an integer value.
* @device: Device
- * @port_num: Port number
+ * @port: Port number
* @mtu: enum value of MTU
*
* Return the MTU size supported by the port as an integer value. Will return
@@ -3458,7 +3616,7 @@ static inline int rdma_mtu_enum_to_int(struct ib_device *device, u32 port,
/**
* rdma_mtu_from_attr - Return the mtu of the port from the port attribute.
* @device: Device
- * @port_num: Port number
+ * @port: Port number
* @attr: port attribute
*
* Return the MTU size supported by the port as an integer value.
@@ -3835,7 +3993,7 @@ static inline int ib_destroy_qp(struct ib_qp *qp)
/**
* ib_open_qp - Obtain a reference to an existing sharable QP.
- * @xrcd - XRC domain
+ * @xrcd: XRC domain
* @qp_open_attr: Attributes identifying the QP to open.
*
* Returns a reference to a sharable QP.
@@ -3947,15 +4105,6 @@ struct ib_cq *__ib_create_cq(struct ib_device *device,
__ib_create_cq((device), (cmp_hndlr), (evt_hndlr), (cq_ctxt), (cq_attr), KBUILD_MODNAME)
/**
- * ib_resize_cq - Modifies the capacity of the CQ.
- * @cq: The CQ to resize.
- * @cqe: The minimum size of the CQ.
- *
- * Users can examine the cq structure to determine the actual CQ size.
- */
-int ib_resize_cq(struct ib_cq *cq, int cqe);
-
-/**
* rdma_set_cq_moderation - Modifies moderation params of the CQ
* @cq: The CQ to modify.
* @cq_count: number of CQEs that will trigger an event
@@ -4164,6 +4313,47 @@ static inline void ib_dma_unmap_page(struct ib_device *dev,
dma_unmap_page(dev->dma_device, addr, size, direction);
}
+/**
+ * ib_dma_map_bvec - Map a bio_vec to DMA address
+ * @dev: The device for which the dma_addr is to be created
+ * @bvec: The bio_vec to map
+ * @direction: The direction of the DMA
+ *
+ * Returns a DMA address for the bio_vec. The caller must check the
+ * result with ib_dma_mapping_error() before use; a failed mapping
+ * must not be passed to ib_dma_unmap_bvec().
+ *
+ * For software RDMA devices (rxe, siw), returns a virtual address
+ * and no actual DMA mapping occurs.
+ */
+static inline u64 ib_dma_map_bvec(struct ib_device *dev,
+ struct bio_vec *bvec,
+ enum dma_data_direction direction)
+{
+ if (ib_uses_virt_dma(dev))
+ return (uintptr_t)bvec_virt(bvec);
+ return dma_map_phys(dev->dma_device, bvec_phys(bvec),
+ bvec->bv_len, direction, 0);
+}
+
+/**
+ * ib_dma_unmap_bvec - Unmap a bio_vec DMA mapping
+ * @dev: The device for which the DMA address was created
+ * @addr: The DMA address returned by ib_dma_map_bvec()
+ * @size: The size of the region in bytes
+ * @direction: The direction of the DMA
+ *
+ * Releases a DMA mapping created by ib_dma_map_bvec(). For software
+ * RDMA devices this is a no-op since no actual mapping occurred.
+ */
+static inline void ib_dma_unmap_bvec(struct ib_device *dev,
+ u64 addr, size_t size,
+ enum dma_data_direction direction)
+{
+ if (!ib_uses_virt_dma(dev))
+ dma_unmap_phys(dev->dma_device, addr, size, direction, 0);
+}
+
int ib_dma_virt_map_sg(struct ib_device *dev, struct scatterlist *sg, int nents);
static inline int ib_dma_map_sg_attrs(struct ib_device *dev,
struct scatterlist *sg, int nents,
@@ -4189,9 +4379,9 @@ static inline void ib_dma_unmap_sg_attrs(struct ib_device *dev,
/**
* ib_dma_map_sgtable_attrs - Map a scatter/gather table to DMA addresses
* @dev: The device for which the DMA addresses are to be created
- * @sg: The sg_table object describing the buffer
+ * @sgt: The sg_table object describing the buffer
* @direction: The direction of the DMA
- * @attrs: Optional DMA attributes for the map operation
+ * @dma_attrs: Optional DMA attributes for the map operation
*/
static inline int ib_dma_map_sgtable_attrs(struct ib_device *dev,
struct sg_table *sgt,
@@ -4335,8 +4525,8 @@ struct ib_mr *ib_alloc_mr_integrity(struct ib_pd *pd,
/**
* ib_update_fast_reg_key - updates the key portion of the fast_reg MR
* R_Key and L_Key.
- * @mr - struct ib_mr pointer to be updated.
- * @newkey - new key to be used.
+ * @mr: struct ib_mr pointer to be updated.
+ * @newkey: new key to be used.
*/
static inline void ib_update_fast_reg_key(struct ib_mr *mr, u8 newkey)
{
@@ -4347,7 +4537,7 @@ static inline void ib_update_fast_reg_key(struct ib_mr *mr, u8 newkey)
/**
* ib_inc_rkey - increments the key portion of the given rkey. Can be used
* for calculating a new rkey for type 2 memory windows.
- * @rkey - the rkey to increment.
+ * @rkey: the rkey to increment.
*/
static inline u32 ib_inc_rkey(u32 rkey)
{
@@ -4441,7 +4631,7 @@ int ib_check_mr_status(struct ib_mr *mr, u32 check_mask,
/**
* ib_device_try_get: Hold a registration lock
- * device: The device to lock
+ * @dev: The device to lock
*
* A device under an active registration lock cannot become unregistered. It
* is only possible to obtain a registration lock on a device that is fully
@@ -4460,8 +4650,6 @@ static inline bool ib_device_try_get(struct ib_device *dev)
void ib_device_put(struct ib_device *device);
struct ib_device *ib_device_get_by_netdev(struct net_device *ndev,
enum rdma_driver_id driver_id);
-struct ib_device *ib_device_get_by_name(const char *name,
- enum rdma_driver_id driver_id);
struct net_device *ib_get_net_dev_by_params(struct ib_device *dev, u32 port,
u16 pkey, const union ib_gid *gid,
const struct sockaddr *addr);
@@ -4469,6 +4657,17 @@ int ib_device_set_netdev(struct ib_device *ib_dev, struct net_device *ndev,
unsigned int port);
struct net_device *ib_device_get_netdev(struct ib_device *ib_dev,
u32 port);
+int ib_query_netdev_port(struct ib_device *ibdev, struct net_device *ndev,
+ u32 *port);
+
+static inline enum ib_port_state ib_get_curr_port_state(struct net_device *net_dev)
+{
+ return (netif_running(net_dev) && netif_carrier_ok(net_dev)) ?
+ IB_PORT_ACTIVE : IB_PORT_DOWN;
+}
+
+void ib_dispatch_port_state_event(struct ib_device *ibdev,
+ struct net_device *ndev);
struct ib_wq *ib_create_wq(struct ib_pd *pd,
struct ib_wq_init_attr *init_attr);
int ib_destroy_wq_user(struct ib_wq *wq, struct ib_udata *udata);
@@ -4713,31 +4912,10 @@ static inline __be16 ib_lid_be16(u32 lid)
}
/**
- * ib_get_vector_affinity - Get the affinity mappings of a given completion
- * vector
- * @device: the rdma device
- * @comp_vector: index of completion vector
- *
- * Returns NULL on failure, otherwise a corresponding cpu map of the
- * completion vector (returns all-cpus map if the device driver doesn't
- * implement get_vector_affinity).
- */
-static inline const struct cpumask *
-ib_get_vector_affinity(struct ib_device *device, int comp_vector)
-{
- if (comp_vector < 0 || comp_vector >= device->num_comp_vectors ||
- !device->ops.get_vector_affinity)
- return NULL;
-
- return device->ops.get_vector_affinity(device, comp_vector);
-
-}
-
-/**
* rdma_roce_rescan_device - Rescan all of the network devices in the system
* and add their gids, as needed, to the relevant RoCE devices.
*
- * @device: the rdma device
+ * @ibdev: the rdma device
*/
void rdma_roce_rescan_device(struct ib_device *ibdev);
void rdma_roce_rescan_port(struct ib_device *ib_dev, u32 port);
@@ -4746,7 +4924,20 @@ void roce_del_all_netdev_gids(struct ib_device *ib_dev,
struct ib_ucontext *ib_uverbs_get_ucontext_file(struct ib_uverbs_file *ufile);
+#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)
int uverbs_destroy_def_handler(struct uverbs_attr_bundle *attrs);
+bool rdma_uattrs_has_raw_cap(const struct uverbs_attr_bundle *attrs);
+#else
+static inline int uverbs_destroy_def_handler(struct uverbs_attr_bundle *attrs)
+{
+ return 0;
+}
+static inline bool
+rdma_uattrs_has_raw_cap(const struct uverbs_attr_bundle *attrs)
+{
+ return false;
+}
+#endif
struct net_device *rdma_alloc_netdev(struct ib_device *device, u32 port_num,
enum rdma_netdev_t type, const char *name,
@@ -4777,7 +4968,7 @@ static inline struct ib_device *rdma_device_to_ibdev(struct device *device)
/**
* ibdev_to_node - return the NUMA node for a given ib_device
- * @dev: device to get the NUMA node for.
+ * @ibdev: device to get the NUMA node for.
*/
static inline int ibdev_to_node(struct ib_device *ibdev)
{
@@ -4802,6 +4993,12 @@ static inline int ibdev_to_node(struct ib_device *ibdev)
bool rdma_dev_access_netns(const struct ib_device *device,
const struct net *net);
+bool rdma_dev_has_raw_cap(const struct ib_device *dev);
+static inline struct net *rdma_dev_net(struct ib_device *device)
+{
+ return read_pnet(&device->coredev.rdma_net);
+}
+
#define IB_ROCE_UDP_ENCAP_VALID_PORT_MIN (0xC000)
#define IB_ROCE_UDP_ENCAP_VALID_PORT_MAX (0xFFFF)
#define IB_GRH_FLOWLABEL_MASK (0x000FFFFF)
@@ -4809,6 +5006,7 @@ bool rdma_dev_access_netns(const struct ib_device *device,
/**
* rdma_flow_label_to_udp_sport - generate a RoCE v2 UDP src port value based
* on the flow_label
+ * @fl: flow_label value
*
* This function will convert the 20 bit flow_label input to a valid RoCE v2
* UDP src port 14 bit value. All RoCE V2 drivers should use this same
diff --git a/include/rdma/iter.h b/include/rdma/iter.h
new file mode 100644
index 000000000000..19d64ef04ba9
--- /dev/null
+++ b/include/rdma/iter.h
@@ -0,0 +1,88 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. */
+
+#ifndef _RDMA_ITER_H_
+#define _RDMA_ITER_H_
+
+#include <linux/scatterlist.h>
+#include <rdma/ib_umem.h>
+
+/**
+ * IB block DMA iterator
+ *
+ * Iterates the DMA-mapped SGL in contiguous memory blocks aligned
+ * to a HW supported page size.
+ */
+struct ib_block_iter {
+ /* internal states */
+ struct scatterlist *__sg; /* sg holding the current aligned block */
+ dma_addr_t __dma_addr; /* unaligned DMA address of this block */
+ size_t __sg_numblocks; /* ib_umem_num_dma_blocks() */
+ unsigned int __sg_nents; /* number of SG entries */
+ unsigned int __sg_advance; /* number of bytes to advance in sg in next step */
+ unsigned int __pg_bit; /* alignment of current block */
+};
+
+void __rdma_block_iter_start(struct ib_block_iter *biter,
+ struct scatterlist *sglist,
+ unsigned int nents,
+ unsigned long pgsz);
+bool __rdma_block_iter_next(struct ib_block_iter *biter);
+
+/**
+ * rdma_block_iter_dma_address - get the aligned dma address of the current
+ * block held by the block iterator.
+ * @biter: block iterator holding the memory block
+ */
+static inline dma_addr_t
+rdma_block_iter_dma_address(struct ib_block_iter *biter)
+{
+ return biter->__dma_addr & ~(BIT_ULL(biter->__pg_bit) - 1);
+}
+
+/**
+ * rdma_for_each_block - iterate over contiguous memory blocks of the sg list
+ * @sglist: sglist to iterate over
+ * @biter: block iterator holding the memory block
+ * @nents: maximum number of sg entries to iterate over
+ * @pgsz: best HW supported page size to use
+ *
+ * Callers may use rdma_block_iter_dma_address() to get each
+ * blocks aligned DMA address.
+ */
+#define rdma_for_each_block(sglist, biter, nents, pgsz) \
+ for (__rdma_block_iter_start(biter, sglist, nents, \
+ pgsz); \
+ __rdma_block_iter_next(biter);)
+
+static inline void __rdma_umem_block_iter_start(struct ib_block_iter *biter,
+ struct ib_umem *umem,
+ unsigned long pgsz)
+{
+ __rdma_block_iter_start(biter, umem->sgt_append.sgt.sgl,
+ umem->sgt_append.sgt.nents, pgsz);
+ biter->__sg_advance = ib_umem_offset(umem) & ~(pgsz - 1);
+ biter->__sg_numblocks = ib_umem_num_dma_blocks(umem, pgsz);
+}
+
+static inline bool __rdma_umem_block_iter_next(struct ib_block_iter *biter)
+{
+ return __rdma_block_iter_next(biter) && biter->__sg_numblocks--;
+}
+
+/**
+ * rdma_umem_for_each_dma_block - iterate over contiguous DMA blocks of the umem
+ * @umem: umem to iterate over
+ * @pgsz: Page size to split the list into
+ *
+ * pgsz must be <= PAGE_SIZE or computed by ib_umem_find_best_pgsz(). The
+ * returned DMA blocks will be aligned to pgsz and span the range:
+ * ALIGN_DOWN(umem->address, pgsz) to ALIGN(umem->address + umem->length, pgsz)
+ *
+ * Performs exactly ib_umem_num_dma_blocks() iterations.
+ */
+#define rdma_umem_for_each_dma_block(umem, biter, pgsz) \
+ for (__rdma_umem_block_iter_start(biter, umem, pgsz); \
+ __rdma_umem_block_iter_next(biter);)
+
+#endif /* _RDMA_ITER_H_ */
diff --git a/include/rdma/iw_cm.h b/include/rdma/iw_cm.h
index 2b22f153ef63..57b33edd9ce7 100644
--- a/include/rdma/iw_cm.h
+++ b/include/rdma/iw_cm.h
@@ -33,8 +33,8 @@ struct iw_cm_event {
};
/**
- * iw_cm_handler - Function to be called by the IW CM when delivering events
- * to the client.
+ * typedef iw_cm_handler - Function to be called by the IW CM when delivering
+ * events to the client.
*
* @cm_id: The IW CM identifier associated with the event.
* @event: Pointer to the event structure.
@@ -43,9 +43,9 @@ typedef int (*iw_cm_handler)(struct iw_cm_id *cm_id,
struct iw_cm_event *event);
/**
- * iw_event_handler - Function called by the provider when delivering provider
- * events to the IW CM. Returns either 0 indicating the event was processed
- * or -errno if the event could not be processed.
+ * typedef iw_event_handler - Function called by the provider when delivering
+ * provider events to the IW CM. Returns either 0 indicating the event was
+ * processed or -errno if the event could not be processed.
*
* @cm_id: The IW CM identifier associated with the event.
* @event: Pointer to the event structure.
@@ -97,7 +97,7 @@ enum iw_flags {
* iw_create_cm_id - Create an IW CM identifier.
*
* @device: The IB device on which to create the IW CM identier.
- * @event_handler: User callback invoked to report events associated with the
+ * @cm_handler: User callback invoked to report events associated with the
* returned IW CM identifier.
* @context: User specified context associated with the id.
*/
@@ -147,7 +147,7 @@ int iw_cm_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param);
* iw_cm_reject - Reject an incoming connection request.
*
* @cm_id: Connection identifier associated with the request.
- * @private_daa: Pointer to data to deliver to the remote peer as part of the
+ * @private_data: Pointer to data to deliver to the remote peer as part of the
* reject message.
* @private_data_len: The number of bytes in the private_data parameter.
*
diff --git a/include/rdma/opa_port_info.h b/include/rdma/opa_port_info.h
index 73bcac90a048..fb66d3a1dfa9 100644
--- a/include/rdma/opa_port_info.h
+++ b/include/rdma/opa_port_info.h
@@ -93,9 +93,11 @@
#define OPA_LINKINIT_QUARANTINED (9 << 4)
#define OPA_LINKINIT_INSUFIC_CAPABILITY (10 << 4)
-#define OPA_LINK_SPEED_NOP 0x0000 /* Reserved (1-5 Gbps) */
-#define OPA_LINK_SPEED_12_5G 0x0001 /* 12.5 Gbps */
-#define OPA_LINK_SPEED_25G 0x0002 /* 25.78125? Gbps (EDR) */
+#define OPA_LINK_SPEED_NOP 0x0000 /* no change */
+#define OPA_LINK_SPEED_12_5G 0x0001 /* 12.5 Gbps */
+#define OPA_LINK_SPEED_25G 0x0002 /* 25.78125 Gbps */
+#define OPA_LINK_SPEED_50G 0x0004 /* 53.125 Gbps */
+#define OPA_LINK_SPEED_100G 0x0008 /* 106.25 Gbps */
#define OPA_LINK_WIDTH_1X 0x0001
#define OPA_LINK_WIDTH_2X 0x0002
diff --git a/include/rdma/opa_vnic.h b/include/rdma/opa_vnic.h
deleted file mode 100644
index d297f084001a..000000000000
--- a/include/rdma/opa_vnic.h
+++ /dev/null
@@ -1,96 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
-/*
- * Copyright(c) 2017 - 2020 Intel Corporation.
- */
-
-#ifndef _OPA_VNIC_H
-#define _OPA_VNIC_H
-
-/*
- * This file contains Intel Omni-Path (OPA) Virtual Network Interface
- * Controller (VNIC) specific declarations.
- */
-
-#include <rdma/ib_verbs.h>
-
-/* 16 header bytes + 2 reserved bytes */
-#define OPA_VNIC_L2_HDR_LEN (16 + 2)
-
-#define OPA_VNIC_L4_HDR_LEN 2
-
-#define OPA_VNIC_HDR_LEN (OPA_VNIC_L2_HDR_LEN + \
- OPA_VNIC_L4_HDR_LEN)
-
-#define OPA_VNIC_L4_ETHR 0x78
-
-#define OPA_VNIC_ICRC_LEN 4
-#define OPA_VNIC_TAIL_LEN 1
-#define OPA_VNIC_ICRC_TAIL_LEN (OPA_VNIC_ICRC_LEN + OPA_VNIC_TAIL_LEN)
-
-#define OPA_VNIC_SKB_MDATA_LEN 4
-#define OPA_VNIC_SKB_MDATA_ENCAP_ERR 0x1
-
-/* opa vnic rdma netdev's private data structure */
-struct opa_vnic_rdma_netdev {
- struct rdma_netdev rn; /* keep this first */
- /* followed by device private data */
- char *dev_priv[];
-};
-
-static inline void *opa_vnic_priv(const struct net_device *dev)
-{
- struct rdma_netdev *rn = netdev_priv(dev);
-
- return rn->clnt_priv;
-}
-
-static inline void *opa_vnic_dev_priv(const struct net_device *dev)
-{
- struct opa_vnic_rdma_netdev *oparn = netdev_priv(dev);
-
- return oparn->dev_priv;
-}
-
-/* opa_vnic skb meta data structure */
-struct opa_vnic_skb_mdata {
- u8 vl;
- u8 entropy;
- u8 flags;
- u8 rsvd;
-} __packed;
-
-/* OPA VNIC group statistics */
-struct opa_vnic_grp_stats {
- u64 unicast;
- u64 mcastbcast;
- u64 untagged;
- u64 vlan;
- u64 s_64;
- u64 s_65_127;
- u64 s_128_255;
- u64 s_256_511;
- u64 s_512_1023;
- u64 s_1024_1518;
- u64 s_1519_max;
-};
-
-struct opa_vnic_stats {
- /* standard netdev statistics */
- struct rtnl_link_stats64 netstats;
-
- /* OPA VNIC statistics */
- struct opa_vnic_grp_stats tx_grp;
- struct opa_vnic_grp_stats rx_grp;
- u64 tx_dlid_zero;
- u64 tx_drop_state;
- u64 rx_drop_state;
- u64 rx_runt;
- u64 rx_oversize;
-};
-
-static inline bool rdma_cap_opa_vnic(struct ib_device *device)
-{
- return !!(device->attrs.kernel_cap_flags & IBK_RDMA_NETDEV_OPA);
-}
-
-#endif /* _OPA_VNIC_H */
diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h
index 8a8ab2f793ab..d639ff889e64 100644
--- a/include/rdma/rdma_cm.h
+++ b/include/rdma/rdma_cm.h
@@ -33,7 +33,11 @@ enum rdma_cm_event_type {
RDMA_CM_EVENT_MULTICAST_JOIN,
RDMA_CM_EVENT_MULTICAST_ERROR,
RDMA_CM_EVENT_ADDR_CHANGE,
- RDMA_CM_EVENT_TIMEWAIT_EXIT
+ RDMA_CM_EVENT_TIMEWAIT_EXIT,
+ RDMA_CM_EVENT_ADDRINFO_RESOLVED,
+ RDMA_CM_EVENT_ADDRINFO_ERROR,
+ RDMA_CM_EVENT_USER,
+ RDMA_CM_EVENT_INTERNAL,
};
const char *__attribute_const__ rdma_event_msg(enum rdma_cm_event_type event);
@@ -63,6 +67,9 @@ struct rdma_route {
* 2 - Both primary and alternate path are available
*/
int num_pri_alt_paths;
+
+ unsigned int num_service_recs;
+ struct sa_service_rec *service_recs;
};
struct rdma_conn_param {
@@ -93,6 +100,7 @@ struct rdma_cm_event {
union {
struct rdma_conn_param conn;
struct rdma_ud_param ud;
+ u64 arg;
} param;
struct rdma_ucm_ece ece;
};
@@ -161,6 +169,23 @@ struct rdma_cm_id *rdma_create_user_id(rdma_cm_event_handler event_handler,
void rdma_destroy_id(struct rdma_cm_id *id);
/**
+ * rdma_restrict_node_type - Restrict an RDMA identifier to specific
+ * RDMA device node type.
+ *
+ * @id: RDMA identifier.
+ * @node_type: The device node type. Only RDMA_NODE_UNSPECIFIED (default),
+ * RDMA_NODE_RNIC and RDMA_NODE_IB_CA are allowed
+ *
+ * This allows the caller to restrict the possible devices
+ * used to iWarp (RDMA_NODE_RNIC) or InfiniBand/RoCEv1/RoCEv2 (RDMA_NODE_IB_CA).
+ *
+ * It needs to be called before the RDMA identifier is bound
+ * to an device, which mean it should be called before
+ * rdma_bind_addr(), rdma_resolve_addr() and rdma_listen().
+ */
+int rdma_restrict_node_type(struct rdma_cm_id *id, u8 node_type);
+
+/**
* rdma_bind_addr - Bind an RDMA identifier to a source address and
* associated RDMA device, if needed.
*
@@ -198,6 +223,17 @@ int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
int rdma_resolve_route(struct rdma_cm_id *id, unsigned long timeout_ms);
/**
+ * rdma_resolve_ib_service - Resolve the IB service record of the
+ * service with the given service ID or name.
+ *
+ * This function is optional in the rdma cm flow. It is called on the client
+ * side of a connection, before calling rdma_resolve_route. The resolution
+ * can be done once per rdma_cm_id.
+ */
+int rdma_resolve_ib_service(struct rdma_cm_id *id,
+ struct rdma_ucm_ib_service *ibs);
+
+/**
* rdma_create_qp - Allocate a QP and associate it with the specified RDMA
* identifier.
*
@@ -388,6 +424,5 @@ void rdma_read_gids(struct rdma_cm_id *cm_id, union ib_gid *sgid,
union ib_gid *dgid);
struct iw_cm_id *rdma_iw_cm_id(struct rdma_cm_id *cm_id);
-struct rdma_cm_id *rdma_res_to_id(struct rdma_restrack_entry *res);
#endif /* RDMA_CM_H */
diff --git a/include/rdma/rdma_counter.h b/include/rdma/rdma_counter.h
index 45d5481a7846..4204d08a010a 100644
--- a/include/rdma/rdma_counter.h
+++ b/include/rdma/rdma_counter.h
@@ -23,6 +23,7 @@ struct rdma_counter_mode {
enum rdma_nl_counter_mode mode;
enum rdma_nl_counter_mask mask;
struct auto_mode_param param;
+ bool bind_opcnt;
};
struct rdma_port_counter {
@@ -47,9 +48,10 @@ void rdma_counter_init(struct ib_device *dev);
void rdma_counter_release(struct ib_device *dev);
int rdma_counter_set_auto_mode(struct ib_device *dev, u32 port,
enum rdma_nl_counter_mask mask,
+ bool bind_opcnt,
struct netlink_ext_ack *extack);
int rdma_counter_bind_qp_auto(struct ib_qp *qp, u32 port);
-int rdma_counter_unbind_qp(struct ib_qp *qp, bool force);
+int rdma_counter_unbind_qp(struct ib_qp *qp, u32 port, bool force);
int rdma_counter_query_stats(struct rdma_counter *counter);
u64 rdma_counter_get_hwstat_value(struct ib_device *dev, u32 port, u32 index);
@@ -61,7 +63,8 @@ int rdma_counter_unbind_qpn(struct ib_device *dev, u32 port,
u32 qp_num, u32 counter_id);
int rdma_counter_get_mode(struct ib_device *dev, u32 port,
enum rdma_nl_counter_mode *mode,
- enum rdma_nl_counter_mask *mask);
+ enum rdma_nl_counter_mask *mask,
+ bool *opcnt);
int rdma_counter_modify(struct ib_device *dev, u32 port,
unsigned int index, bool enable);
diff --git a/include/rdma/rdma_netlink.h b/include/rdma/rdma_netlink.h
index 326deaf56d5d..2fd1358ea57d 100644
--- a/include/rdma/rdma_netlink.h
+++ b/include/rdma/rdma_netlink.h
@@ -5,6 +5,7 @@
#include <linux/netlink.h>
#include <uapi/rdma/rdma_netlink.h>
+#include <rdma/ib_verbs.h>
struct ib_device;
@@ -126,6 +127,7 @@ struct rdma_link_ops {
struct list_head list;
const char *type;
int (*newlink)(const char *ibdev_name, struct net_device *ndev);
+ int (*dellink)(struct ib_device *dev);
};
void rdma_link_register(struct rdma_link_ops *ops);
diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h
index c429d6ddb129..7d8de561f71b 100644
--- a/include/rdma/rdma_vt.h
+++ b/include/rdma/rdma_vt.h
@@ -149,6 +149,7 @@ struct rvt_driver_params {
/* User context */
struct rvt_ucontext {
struct ib_ucontext ibucontext;
+ void *priv;
};
/* Protection domain */
@@ -359,6 +360,15 @@ struct rvt_driver_provided {
/* Get and return CPU to pin CQ processing thread */
int (*comp_vect_cpu_lookup)(struct rvt_dev_info *rdi, int comp_vect);
+
+ /* allocate a ucontext */
+ int (*alloc_ucontext)(struct ib_ucontext *uctx, struct ib_udata *udata);
+
+ /* deallocate a ucontext */
+ void (*dealloc_ucontext)(struct ib_ucontext *context);
+
+ /* driver mmap */
+ int (*mmap)(struct ib_ucontext *context, struct vm_area_struct *vma);
};
struct rvt_dev_info {
diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h
index d67892944193..71140ea0aeb2 100644
--- a/include/rdma/rdmavt_qp.h
+++ b/include/rdma/rdmavt_qp.h
@@ -144,7 +144,7 @@
#define RVT_SEND_COMPLETION_ONLY (IB_SEND_RESERVED_START << 1)
/**
- * rvt_ud_wr - IB UD work plus AH cache
+ * struct rvt_ud_wr - IB UD work plus AH cache
* @wr: valid IB work request
* @attr: pointer to an allocated AH attribute
*
@@ -184,10 +184,10 @@ struct rvt_swqe {
* struct rvt_krwq - kernel struct receive work request
* @p_lock: lock to protect producer of the kernel buffer
* @head: index of next entry to fill
- * @c_lock:lock to protect consumer of the kernel buffer
+ * @c_lock: lock to protect consumer of the kernel buffer
* @tail: index of next entry to pull
- * @count: count is aproximate of total receive enteries posted
- * @rvt_rwqe: struct of receive work request queue entry
+ * @count: count is approximate of total receive entries posted
+ * @curr_wq: struct of receive work request queue entry
*
* This structure is used to contain the head pointer,
* tail pointer and receive work queue entries for kernel
@@ -309,10 +309,10 @@ struct rvt_ack_entry {
#define RVT_OPERATION_MAX (IB_WR_RESERVED10 + 1)
/**
- * rvt_operation_params - op table entry
- * @length - the length to copy into the swqe entry
- * @qpt_support - a bit mask indicating QP type support
- * @flags - RVT_OPERATION flags (see above)
+ * struct rvt_operation_params - op table entry
+ * @length: the length to copy into the swqe entry
+ * @qpt_support: a bit mask indicating QP type support
+ * @flags: RVT_OPERATION flags (see above)
*
* This supports table driven post send so that
* the driver can have differing an potentially
@@ -552,7 +552,7 @@ static inline struct rvt_rwqe *rvt_get_rwqe_ptr(struct rvt_rq *rq, unsigned n)
/**
* rvt_is_user_qp - return if this is user mode QP
- * @qp - the target QP
+ * @qp: the target QP
*/
static inline bool rvt_is_user_qp(struct rvt_qp *qp)
{
@@ -561,7 +561,7 @@ static inline bool rvt_is_user_qp(struct rvt_qp *qp)
/**
* rvt_get_qp - get a QP reference
- * @qp - the QP to hold
+ * @qp: the QP to hold
*/
static inline void rvt_get_qp(struct rvt_qp *qp)
{
@@ -570,7 +570,7 @@ static inline void rvt_get_qp(struct rvt_qp *qp)
/**
* rvt_put_qp - release a QP reference
- * @qp - the QP to release
+ * @qp: the QP to release
*/
static inline void rvt_put_qp(struct rvt_qp *qp)
{
@@ -580,7 +580,7 @@ static inline void rvt_put_qp(struct rvt_qp *qp)
/**
* rvt_put_swqe - drop mr refs held by swqe
- * @wqe - the send wqe
+ * @wqe: the send wqe
*
* This drops any mr references held by the swqe
*/
@@ -597,8 +597,8 @@ static inline void rvt_put_swqe(struct rvt_swqe *wqe)
/**
* rvt_qp_wqe_reserve - reserve operation
- * @qp - the rvt qp
- * @wqe - the send wqe
+ * @qp: the rvt qp
+ * @wqe: the send wqe
*
* This routine used in post send to record
* a wqe relative reserved operation use.
@@ -612,8 +612,8 @@ static inline void rvt_qp_wqe_reserve(
/**
* rvt_qp_wqe_unreserve - clean reserved operation
- * @qp - the rvt qp
- * @flags - send wqe flags
+ * @qp: the rvt qp
+ * @flags: send wqe flags
*
* This decrements the reserve use count.
*
@@ -653,8 +653,8 @@ u32 rvt_restart_sge(struct rvt_sge_state *ss, struct rvt_swqe *wqe, u32 len);
/**
* rvt_div_round_up_mtu - round up divide
- * @qp - the qp pair
- * @len - the length
+ * @qp: the qp pair
+ * @len: the length
*
* Perform a shift based mtu round up divide
*/
@@ -664,8 +664,9 @@ static inline u32 rvt_div_round_up_mtu(struct rvt_qp *qp, u32 len)
}
/**
- * @qp - the qp pair
- * @len - the length
+ * rvt_div_mtu - shift-based divide
+ * @qp: the qp pair
+ * @len: the length
*
* Perform a shift based mtu divide
*/
@@ -676,7 +677,7 @@ static inline u32 rvt_div_mtu(struct rvt_qp *qp, u32 len)
/**
* rvt_timeout_to_jiffies - Convert a ULP timeout input into jiffies
- * @timeout - timeout input(0 - 31).
+ * @timeout: timeout input(0 - 31).
*
* Return a timeout value in jiffies.
*/
@@ -690,7 +691,8 @@ static inline unsigned long rvt_timeout_to_jiffies(u8 timeout)
/**
* rvt_lookup_qpn - return the QP with the given QPN
- * @ibp: the ibport
+ * @rdi: rvt device info structure
+ * @rvp: the ibport
* @qpn: the QP number to look up
*
* The caller must hold the rcu_read_lock(), and keep the lock until
@@ -716,9 +718,9 @@ static inline struct rvt_qp *rvt_lookup_qpn(struct rvt_dev_info *rdi,
}
/**
- * rvt_mod_retry_timer - mod a retry timer
- * @qp - the QP
- * @shift - timeout shift to wait for multiple packets
+ * rvt_mod_retry_timer_ext - mod a retry timer
+ * @qp: the QP
+ * @shift: timeout shift to wait for multiple packets
* Modify a potentially already running retry timer
*/
static inline void rvt_mod_retry_timer_ext(struct rvt_qp *qp, u8 shift)
@@ -753,7 +755,7 @@ static inline void rvt_put_qp_swqe(struct rvt_qp *qp, struct rvt_swqe *wqe)
}
/**
- * rvt_qp_sqwe_incr - increment ring index
+ * rvt_qp_swqe_incr - increment ring index
* @qp: the qp
* @val: the starting value
*
@@ -811,10 +813,10 @@ static inline void rvt_send_cq(struct rvt_qp *qp, struct ib_wc *wc,
/**
* rvt_qp_complete_swqe - insert send completion
- * @qp - the qp
- * @wqe - the send wqe
- * @opcode - wc operation (driver dependent)
- * @status - completion status
+ * @qp: the qp
+ * @wqe: the send wqe
+ * @opcode: wc operation (driver dependent)
+ * @status: completion status
*
* Update the s_last information, and then insert a send
* completion into the completion
@@ -891,7 +893,7 @@ void rvt_ruc_loopback(struct rvt_qp *qp);
/**
* struct rvt_qp_iter - the iterator for QPs
- * @qp - the current QP
+ * @qp: the current QP
*
* This structure defines the current iterator
* state for sequenced access to all QPs relative
@@ -913,7 +915,7 @@ struct rvt_qp_iter {
/**
* ib_cq_tail - Return tail index of cq buffer
- * @send_cq - The cq for send
+ * @send_cq: The cq for send
*
* This is called in qp_iter_print to get tail
* of cq buffer.
@@ -929,7 +931,7 @@ static inline u32 ib_cq_tail(struct ib_cq *send_cq)
/**
* ib_cq_head - Return head index of cq buffer
- * @send_cq - The cq for send
+ * @send_cq: The cq for send
*
* This is called in qp_iter_print to get head
* of cq buffer.
@@ -945,7 +947,7 @@ static inline u32 ib_cq_head(struct ib_cq *send_cq)
/**
* rvt_free_rq - free memory allocated for rvt_rq struct
- * @rvt_rq: request queue data structure
+ * @rq: request queue data structure
*
* This function should only be called if the rvt_mmap_info()
* has not succeeded.
diff --git a/include/rdma/restrack.h b/include/rdma/restrack.h
index 0d69ded73bf2..451f99e3717d 100644
--- a/include/rdma/restrack.h
+++ b/include/rdma/restrack.h
@@ -57,6 +57,10 @@ enum rdma_restrack_type {
*/
RDMA_RESTRACK_SRQ,
/**
+ * @RDMA_RESTRACK_DMAH: DMA handle
+ */
+ RDMA_RESTRACK_DMAH,
+ /**
* @RDMA_RESTRACK_MAX: Last entry, used for array dclarations
*/
RDMA_RESTRACK_MAX
@@ -83,11 +87,11 @@ struct rdma_restrack_entry {
* query stage.
*/
u8 no_track : 1;
- /*
+ /**
* @kref: Protect destroy of the resource
*/
struct kref kref;
- /*
+ /**
* @comp: Signal that all consumers of resource are completed their work
*/
struct completion comp;
diff --git a/include/rdma/rw.h b/include/rdma/rw.h
index d606cac48233..6a1d08614e09 100644
--- a/include/rdma/rw.h
+++ b/include/rdma/rw.h
@@ -5,6 +5,7 @@
#ifndef _RDMA_RW_H
#define _RDMA_RW_H
+#include <linux/bvec.h>
#include <linux/dma-mapping.h>
#include <linux/scatterlist.h>
#include <rdma/ib_verbs.h>
@@ -31,6 +32,14 @@ struct rdma_rw_ctx {
struct ib_rdma_wr *wrs;
} map;
+ /* for IOVA-based mapping of bvecs into contiguous DMA range: */
+ struct {
+ struct dma_iova_state state;
+ struct ib_sge sge;
+ struct ib_rdma_wr wr;
+ size_t mapped_len;
+ } iova;
+
/* for registering multiple WRs: */
struct rdma_rw_reg_ctx {
struct ib_sge sge;
@@ -38,6 +47,7 @@ struct rdma_rw_ctx {
struct ib_reg_wr reg_wr;
struct ib_send_wr inv_wr;
struct ib_mr *mr;
+ struct sg_table sgt;
} *reg;
};
};
@@ -49,6 +59,16 @@ void rdma_rw_ctx_destroy(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
u32 port_num, struct scatterlist *sg, u32 sg_cnt,
enum dma_data_direction dir);
+struct bio_vec;
+
+int rdma_rw_ctx_init_bvec(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
+ u32 port_num, const struct bio_vec *bvecs, u32 nr_bvec,
+ struct bvec_iter iter, u64 remote_addr, u32 rkey,
+ enum dma_data_direction dir);
+void rdma_rw_ctx_destroy_bvec(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
+ u32 port_num, const struct bio_vec *bvecs, u32 nr_bvec,
+ enum dma_data_direction dir);
+
int rdma_rw_ctx_signature_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
u32 port_num, struct scatterlist *sg, u32 sg_cnt,
struct scatterlist *prot_sg, u32 prot_sg_cnt,
@@ -66,6 +86,8 @@ int rdma_rw_ctx_post(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u32 port_num,
unsigned int rdma_rw_mr_factor(struct ib_device *device, u32 port_num,
unsigned int maxpages);
+unsigned int rdma_rw_max_send_wr(struct ib_device *dev, u32 port_num,
+ unsigned int max_rdma_ctxs, u32 create_flags);
void rdma_rw_init_qp(struct ib_device *dev, struct ib_qp_init_attr *attr);
int rdma_rw_init_mrs(struct ib_qp *qp, struct ib_qp_init_attr *attr);
void rdma_rw_cleanup_mrs(struct ib_qp *qp);
diff --git a/include/rdma/uverbs_ioctl.h b/include/rdma/uverbs_ioctl.h
index e6c0de227fad..e2af17da3e32 100644
--- a/include/rdma/uverbs_ioctl.h
+++ b/include/rdma/uverbs_ioctl.h
@@ -667,6 +667,8 @@ rdma_udata_to_uverbs_attr_bundle(struct ib_udata *udata)
(udata ? container_of(rdma_udata_to_uverbs_attr_bundle(udata)->context, \
drv_dev_struct, member) : (drv_dev_struct *)NULL)
+struct ib_device *rdma_udata_to_dev(struct ib_udata *udata);
+
#define IS_UVERBS_COPY_ERR(_ret) ((_ret) && (_ret) != -ENOENT)
static inline const struct uverbs_attr *uverbs_attr_get(const struct uverbs_attr_bundle *attrs_bundle,
@@ -895,6 +897,10 @@ int _uverbs_get_const_unsigned(u64 *to,
size_t idx, u64 upper_bound, u64 *def_val);
int uverbs_copy_to_struct_or_zero(const struct uverbs_attr_bundle *bundle,
size_t idx, const void *from, size_t size);
+
+int _ib_copy_validate_udata_in(struct ib_udata *udata, void *req,
+ size_t kernel_size, size_t minimum_size);
+int _ib_respond_udata(struct ib_udata *udata, const void *src, size_t len);
#else
static inline int
uverbs_get_flags64(u64 *to, const struct uverbs_attr_bundle *attrs_bundle,
@@ -951,6 +957,19 @@ _uverbs_get_const_unsigned(u64 *to,
{
return -EINVAL;
}
+
+static inline int _ib_copy_validate_udata_in(struct ib_udata *udata, void *req,
+ size_t kernel_size,
+ size_t minimum_size)
+{
+ return -EINVAL;
+}
+
+static inline int _ib_respond_udata(struct ib_udata *udata, const void *src,
+ size_t len)
+{
+ return -EINVAL;
+}
#endif
#define uverbs_get_const_signed(_to, _attrs_bundle, _idx) \
@@ -1016,4 +1035,86 @@ uverbs_get_raw_fd(int *to, const struct uverbs_attr_bundle *attrs_bundle,
return uverbs_get_const_signed(to, attrs_bundle, idx);
}
+/**
+ * ib_copy_validate_udata_in - Copy and validate that the request structure is
+ * compatible with this kernel
+ * @_udata: The system calls ib_udata struct
+ * @_req: The name of an on-stack structure that holds the driver data
+ * @_end_member: The member in the struct that is the original end of struct
+ * from the first kernel to introduce it.
+ *
+ * Check that the udata input request struct is properly formed for this kernel.
+ * Then copy it into req
+ */
+#define ib_copy_validate_udata_in(_udata, _req, _end_member) \
+ _ib_copy_validate_udata_in(_udata, &(_req), sizeof(_req), \
+ offsetofend(typeof(_req), _end_member))
+
+int _ib_copy_validate_udata_cm_fail(struct ib_udata *udata, u64 req_cm,
+ u64 valid_cm);
+
+/**
+ * ib_copy_validate_udata_in_cm - Copy the req structure and check the comp_mask
+ * @_udata: The system calls ib_udata struct
+ * @_req: The name of an on-stack structure that holds the driver data
+ * @_end_member: The member in the struct that is the original end of struct
+ * from the first kernel to introduce it.
+ * @_valid_cm: A bitmask of bits permitted in the comp_mask_field.
+ *
+ * Check that the udata input request struct is properly formed for this kernel.
+ * Then copy it into req
+ */
+#define ib_copy_validate_udata_in_cm(_udata, _req, _end_member, _valid_cm) \
+ ({ \
+ typeof((_req).comp_mask) __valid_cm = _valid_cm; \
+ int ret = \
+ ib_copy_validate_udata_in(_udata, _req, _end_member); \
+ if (!ret && ((_req).comp_mask & ~__valid_cm)) \
+ ret = _ib_copy_validate_udata_cm_fail( \
+ _udata, (_req).comp_mask, __valid_cm); \
+ ret; \
+ })
+
+/**
+ * ib_is_udata_in_empty - Check if the udata input buffer is all zeros
+ * @udata: The system calls ib_udata struct
+ *
+ * This should be used if the driver does not currently define a driver data
+ * struct. Returns 0 if the buffer is empty or all zeros, -EOPNOTSUPP if
+ * non-zero data is present, or a negative error code on failure.
+ */
+static inline int ib_is_udata_in_empty(struct ib_udata *udata)
+{
+ if (!udata || udata->inlen == 0)
+ return 0;
+ return _ib_copy_validate_udata_in(udata, NULL, 0, 0);
+}
+
+/**
+ * ib_respond_udata - Copy a driver data response to userspace
+ * @_udata: The system calls ib_udata struct
+ * @_rep: Kernel buffer containing the response driver data on the stack
+ *
+ * Copy driver data response structures back to userspace in a way that
+ * is forwards and backwards compatible. Longer kernel structs are truncated,
+ * userspace has made some kind of error if it needed the truncated information.
+ * Shorter structs are zero padded.
+ */
+#define ib_respond_udata(_udata, _rep) \
+ _ib_respond_udata(_udata, &(_rep), sizeof(_rep))
+
+/**
+ * ib_respond_empty_udata - Zero fill the response buffer to userspace
+ * @_udata: The system calls ib_udata struct
+ *
+ * Used when there is no driver response data to return. Provides forward
+ * compatability by zeroing any buffer the user may have provided.
+ */
+static inline int ib_respond_empty_udata(struct ib_udata *udata)
+{
+ if (udata && udata->outlen && clear_user(udata->outbuf, udata->outlen))
+ return -EFAULT;
+ return 0;
+}
+
#endif
diff --git a/include/rdma/uverbs_std_types.h b/include/rdma/uverbs_std_types.h
index fe0512116958..555ea3d142a4 100644
--- a/include/rdma/uverbs_std_types.h
+++ b/include/rdma/uverbs_std_types.h
@@ -34,7 +34,7 @@
static inline void *_uobj_get_obj_read(struct ib_uobject *uobj)
{
if (IS_ERR(uobj))
- return NULL;
+ return ERR_CAST(uobj);
return uobj->object;
}
#define uobj_get_obj_read(_object, _type, _id, _attrs) \
diff --git a/include/rdma/uverbs_types.h b/include/rdma/uverbs_types.h
index 26ba919ac245..6a253b7dc5ea 100644
--- a/include/rdma/uverbs_types.h
+++ b/include/rdma/uverbs_types.h
@@ -186,6 +186,7 @@ struct ib_uverbs_file {
extern const struct uverbs_obj_type_class uverbs_idr_class;
extern const struct uverbs_obj_type_class uverbs_fd_class;
int uverbs_uobject_fd_release(struct inode *inode, struct file *filp);
+int uverbs_uobject_release(struct ib_uobject *uobj);
#define UVERBS_BUILD_BUG_ON(cond) (sizeof(char[1 - 2 * !!(cond)]) - \
sizeof(char))