From 6a9af2e18a5c6ebcf8283309d20ac0e9fa35e346 Mon Sep 17 00:00:00 2001 From: Sean Hefty Date: Sat, 17 Jun 2006 20:37:27 -0700 Subject: IB: common handling for marshalling parameters to/from userspace Provide common handling for marshalling data between userspace clients and kernel InfiniBand drivers. Signed-off-by: Sean Hefty Signed-off-by: Roland Dreier --- include/rdma/ib_marshall.h | 50 ++++++++++++++++++++++++++ include/rdma/ib_user_cm.h | 84 +++----------------------------------------- include/rdma/ib_user_sa.h | 60 +++++++++++++++++++++++++++++++ include/rdma/ib_user_verbs.h | 80 ++++++++++++++++++++++++++++++----------- 4 files changed, 174 insertions(+), 100 deletions(-) create mode 100644 include/rdma/ib_marshall.h create mode 100644 include/rdma/ib_user_sa.h (limited to 'include') diff --git a/include/rdma/ib_marshall.h b/include/rdma/ib_marshall.h new file mode 100644 index 000000000000..66bf4d7d0dfb --- /dev/null +++ b/include/rdma/ib_marshall.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2005 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#if !defined(IB_USER_MARSHALL_H) +#define IB_USER_MARSHALL_H + +#include +#include +#include +#include + +void ib_copy_qp_attr_to_user(struct ib_uverbs_qp_attr *dst, + struct ib_qp_attr *src); + +void ib_copy_path_rec_to_user(struct ib_user_path_rec *dst, + struct ib_sa_path_rec *src); + +void ib_copy_path_rec_from_user(struct ib_sa_path_rec *dst, + struct ib_user_path_rec *src); + +#endif /* IB_USER_MARSHALL_H */ diff --git a/include/rdma/ib_user_cm.h b/include/rdma/ib_user_cm.h index 19be116047f6..a9e1b22d245c 100644 --- a/include/rdma/ib_user_cm.h +++ b/include/rdma/ib_user_cm.h @@ -30,13 +30,13 @@ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * - * $Id: ib_user_cm.h 2576 2005-06-09 17:00:30Z libor $ + * $Id: ib_user_cm.h 4019 2005-11-11 00:33:09Z sean.hefty $ */ #ifndef IB_USER_CM_H #define IB_USER_CM_H -#include +#include #define IB_USER_CM_ABI_VERSION 4 @@ -110,58 +110,6 @@ struct ib_ucm_init_qp_attr { __u32 qp_state; }; -struct ib_ucm_ah_attr { - __u8 grh_dgid[16]; - __u32 grh_flow_label; - __u16 dlid; - __u16 reserved; - __u8 grh_sgid_index; - __u8 grh_hop_limit; - __u8 grh_traffic_class; - __u8 sl; - __u8 src_path_bits; - __u8 static_rate; - __u8 is_global; - __u8 port_num; -}; - -struct ib_ucm_init_qp_attr_resp { - __u32 qp_attr_mask; - __u32 qp_state; - __u32 cur_qp_state; - __u32 path_mtu; - __u32 path_mig_state; - __u32 qkey; - __u32 rq_psn; - __u32 sq_psn; - __u32 dest_qp_num; - __u32 qp_access_flags; - - struct ib_ucm_ah_attr ah_attr; - struct ib_ucm_ah_attr alt_ah_attr; - - /* ib_qp_cap */ - __u32 max_send_wr; - __u32 max_recv_wr; - __u32 max_send_sge; - __u32 max_recv_sge; - __u32 max_inline_data; - - __u16 pkey_index; - __u16 alt_pkey_index; - __u8 en_sqd_async_notify; - __u8 sq_draining; - __u8 max_rd_atomic; - __u8 max_dest_rd_atomic; - __u8 min_rnr_timer; - __u8 port_num; - __u8 timeout; - __u8 retry_cnt; - __u8 rnr_retry; - __u8 alt_port_num; - __u8 alt_timeout; -}; - struct ib_ucm_listen { __be64 service_id; __be64 service_mask; @@ -180,28 +128,6 @@ struct ib_ucm_private_data { __u8 reserved[3]; }; -struct ib_ucm_path_rec { - __u8 dgid[16]; - __u8 sgid[16]; - __be16 dlid; - __be16 slid; - __u32 raw_traffic; - __be32 flow_label; - __u32 reversible; - __u32 mtu; - __be16 pkey; - __u8 hop_limit; - __u8 traffic_class; - __u8 numb_path; - __u8 sl; - __u8 mtu_selector; - __u8 rate_selector; - __u8 rate; - __u8 packet_life_time_selector; - __u8 packet_life_time; - __u8 preference; -}; - struct ib_ucm_req { __u32 id; __u32 qpn; @@ -304,8 +230,8 @@ struct ib_ucm_event_get { }; struct ib_ucm_req_event_resp { - struct ib_ucm_path_rec primary_path; - struct ib_ucm_path_rec alternate_path; + struct ib_user_path_rec primary_path; + struct ib_user_path_rec alternate_path; __be64 remote_ca_guid; __u32 remote_qkey; __u32 remote_qpn; @@ -349,7 +275,7 @@ struct ib_ucm_mra_event_resp { }; struct ib_ucm_lap_event_resp { - struct ib_ucm_path_rec path; + struct ib_user_path_rec path; }; struct ib_ucm_apr_event_resp { diff --git a/include/rdma/ib_user_sa.h b/include/rdma/ib_user_sa.h new file mode 100644 index 000000000000..659120157e14 --- /dev/null +++ b/include/rdma/ib_user_sa.h @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2005 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef IB_USER_SA_H +#define IB_USER_SA_H + +#include + +struct ib_user_path_rec { + __u8 dgid[16]; + __u8 sgid[16]; + __be16 dlid; + __be16 slid; + __u32 raw_traffic; + __be32 flow_label; + __u32 reversible; + __u32 mtu; + __be16 pkey; + __u8 hop_limit; + __u8 traffic_class; + __u8 numb_path; + __u8 sl; + __u8 mtu_selector; + __u8 rate_selector; + __u8 rate; + __u8 packet_life_time_selector; + __u8 packet_life_time; + __u8 preference; +}; + +#endif /* IB_USER_SA_H */ diff --git a/include/rdma/ib_user_verbs.h b/include/rdma/ib_user_verbs.h index 338ed4333063..7b5372010f4b 100644 --- a/include/rdma/ib_user_verbs.h +++ b/include/rdma/ib_user_verbs.h @@ -32,7 +32,7 @@ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * - * $Id: ib_user_verbs.h 2708 2005-06-24 17:27:21Z roland $ + * $Id: ib_user_verbs.h 4019 2005-11-11 00:33:09Z sean.hefty $ */ #ifndef IB_USER_VERBS_H @@ -323,6 +323,64 @@ struct ib_uverbs_destroy_cq_resp { __u32 async_events_reported; }; +struct ib_uverbs_global_route { + __u8 dgid[16]; + __u32 flow_label; + __u8 sgid_index; + __u8 hop_limit; + __u8 traffic_class; + __u8 reserved; +}; + +struct ib_uverbs_ah_attr { + struct ib_uverbs_global_route grh; + __u16 dlid; + __u8 sl; + __u8 src_path_bits; + __u8 static_rate; + __u8 is_global; + __u8 port_num; + __u8 reserved; +}; + +struct ib_uverbs_qp_attr { + __u32 qp_attr_mask; + __u32 qp_state; + __u32 cur_qp_state; + __u32 path_mtu; + __u32 path_mig_state; + __u32 qkey; + __u32 rq_psn; + __u32 sq_psn; + __u32 dest_qp_num; + __u32 qp_access_flags; + + struct ib_uverbs_ah_attr ah_attr; + struct ib_uverbs_ah_attr alt_ah_attr; + + /* ib_qp_cap */ + __u32 max_send_wr; + __u32 max_recv_wr; + __u32 max_send_sge; + __u32 max_recv_sge; + __u32 max_inline_data; + + __u16 pkey_index; + __u16 alt_pkey_index; + __u8 en_sqd_async_notify; + __u8 sq_draining; + __u8 max_rd_atomic; + __u8 max_dest_rd_atomic; + __u8 min_rnr_timer; + __u8 port_num; + __u8 timeout; + __u8 retry_cnt; + __u8 rnr_retry; + __u8 alt_port_num; + __u8 alt_timeout; + __u8 reserved[5]; +}; + struct ib_uverbs_create_qp { __u64 response; __u64 user_handle; @@ -541,26 +599,6 @@ struct ib_uverbs_post_srq_recv_resp { __u32 bad_wr; }; -struct ib_uverbs_global_route { - __u8 dgid[16]; - __u32 flow_label; - __u8 sgid_index; - __u8 hop_limit; - __u8 traffic_class; - __u8 reserved; -}; - -struct ib_uverbs_ah_attr { - struct ib_uverbs_global_route grh; - __u16 dlid; - __u8 sl; - __u8 src_path_bits; - __u8 static_rate; - __u8 is_global; - __u8 port_num; - __u8 reserved; -}; - struct ib_uverbs_create_ah { __u64 response; __u64 user_handle; -- cgit v1.2.3 From 6e61d04f2d8c7ac4f67e1f498ed2a2a3ad8edaa3 Mon Sep 17 00:00:00 2001 From: Sean Hefty Date: Sat, 17 Jun 2006 20:37:28 -0700 Subject: IB/cm: Match connection requests based on private data Extend matching connection requests to listens in the InfiniBand CM to include private data checks. This allows applications to listen on the same service identifier, with private data directing the request to the appropriate application. Signed-off-by: Sean Hefty Signed-off-by: Roland Dreier --- drivers/infiniband/core/cm.c | 91 +++++++++++++++++++++++++++++++++++++------ drivers/infiniband/core/ucm.c | 19 ++++++++- include/rdma/ib_cm.h | 23 ++++++++--- 3 files changed, 114 insertions(+), 19 deletions(-) (limited to 'include') diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 86fee43502cd..490fd03766db 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -32,7 +32,7 @@ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * - * $Id: cm.c 2821 2005-07-08 17:07:28Z sean.hefty $ + * $Id: cm.c 4311 2005-12-05 18:42:01Z sean.hefty $ */ #include @@ -132,6 +132,7 @@ struct cm_id_private { /* todo: use alternate port on send failure */ struct cm_av av; struct cm_av alt_av; + struct ib_cm_compare_data *compare_data; void *private_data; __be64 tid; @@ -357,6 +358,41 @@ static struct cm_id_private * cm_acquire_id(__be32 local_id, __be32 remote_id) return cm_id_priv; } +static void cm_mask_copy(u8 *dst, u8 *src, u8 *mask) +{ + int i; + + for (i = 0; i < IB_CM_COMPARE_SIZE / sizeof(unsigned long); i++) + ((unsigned long *) dst)[i] = ((unsigned long *) src)[i] & + ((unsigned long *) mask)[i]; +} + +static int cm_compare_data(struct ib_cm_compare_data *src_data, + struct ib_cm_compare_data *dst_data) +{ + u8 src[IB_CM_COMPARE_SIZE]; + u8 dst[IB_CM_COMPARE_SIZE]; + + if (!src_data || !dst_data) + return 0; + + cm_mask_copy(src, src_data->data, dst_data->mask); + cm_mask_copy(dst, dst_data->data, src_data->mask); + return memcmp(src, dst, IB_CM_COMPARE_SIZE); +} + +static int cm_compare_private_data(u8 *private_data, + struct ib_cm_compare_data *dst_data) +{ + u8 src[IB_CM_COMPARE_SIZE]; + + if (!dst_data) + return 0; + + cm_mask_copy(src, private_data, dst_data->mask); + return memcmp(src, dst_data->data, IB_CM_COMPARE_SIZE); +} + static struct cm_id_private * cm_insert_listen(struct cm_id_private *cm_id_priv) { struct rb_node **link = &cm.listen_service_table.rb_node; @@ -364,14 +400,18 @@ static struct cm_id_private * cm_insert_listen(struct cm_id_private *cm_id_priv) struct cm_id_private *cur_cm_id_priv; __be64 service_id = cm_id_priv->id.service_id; __be64 service_mask = cm_id_priv->id.service_mask; + int data_cmp; while (*link) { parent = *link; cur_cm_id_priv = rb_entry(parent, struct cm_id_private, service_node); + data_cmp = cm_compare_data(cm_id_priv->compare_data, + cur_cm_id_priv->compare_data); if ((cur_cm_id_priv->id.service_mask & service_id) == (service_mask & cur_cm_id_priv->id.service_id) && - (cm_id_priv->id.device == cur_cm_id_priv->id.device)) + (cm_id_priv->id.device == cur_cm_id_priv->id.device) && + !data_cmp) return cur_cm_id_priv; if (cm_id_priv->id.device < cur_cm_id_priv->id.device) @@ -380,6 +420,10 @@ static struct cm_id_private * cm_insert_listen(struct cm_id_private *cm_id_priv) link = &(*link)->rb_right; else if (service_id < cur_cm_id_priv->id.service_id) link = &(*link)->rb_left; + else if (service_id > cur_cm_id_priv->id.service_id) + link = &(*link)->rb_right; + else if (data_cmp < 0) + link = &(*link)->rb_left; else link = &(*link)->rb_right; } @@ -389,16 +433,20 @@ static struct cm_id_private * cm_insert_listen(struct cm_id_private *cm_id_priv) } static struct cm_id_private * cm_find_listen(struct ib_device *device, - __be64 service_id) + __be64 service_id, + u8 *private_data) { struct rb_node *node = cm.listen_service_table.rb_node; struct cm_id_private *cm_id_priv; + int data_cmp; while (node) { cm_id_priv = rb_entry(node, struct cm_id_private, service_node); + data_cmp = cm_compare_private_data(private_data, + cm_id_priv->compare_data); if ((cm_id_priv->id.service_mask & service_id) == cm_id_priv->id.service_id && - (cm_id_priv->id.device == device)) + (cm_id_priv->id.device == device) && !data_cmp) return cm_id_priv; if (device < cm_id_priv->id.device) @@ -407,6 +455,10 @@ static struct cm_id_private * cm_find_listen(struct ib_device *device, node = node->rb_right; else if (service_id < cm_id_priv->id.service_id) node = node->rb_left; + else if (service_id > cm_id_priv->id.service_id) + node = node->rb_right; + else if (data_cmp < 0) + node = node->rb_left; else node = node->rb_right; } @@ -730,15 +782,14 @@ retest: wait_for_completion(&cm_id_priv->comp); while ((work = cm_dequeue_work(cm_id_priv)) != NULL) cm_free_work(work); - if (cm_id_priv->private_data && cm_id_priv->private_data_len) - kfree(cm_id_priv->private_data); + kfree(cm_id_priv->compare_data); + kfree(cm_id_priv->private_data); kfree(cm_id_priv); } EXPORT_SYMBOL(ib_destroy_cm_id); -int ib_cm_listen(struct ib_cm_id *cm_id, - __be64 service_id, - __be64 service_mask) +int ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id, __be64 service_mask, + struct ib_cm_compare_data *compare_data) { struct cm_id_private *cm_id_priv, *cur_cm_id_priv; unsigned long flags; @@ -752,7 +803,19 @@ int ib_cm_listen(struct ib_cm_id *cm_id, return -EINVAL; cm_id_priv = container_of(cm_id, struct cm_id_private, id); - BUG_ON(cm_id->state != IB_CM_IDLE); + if (cm_id->state != IB_CM_IDLE) + return -EINVAL; + + if (compare_data) { + cm_id_priv->compare_data = kzalloc(sizeof *compare_data, + GFP_KERNEL); + if (!cm_id_priv->compare_data) + return -ENOMEM; + cm_mask_copy(cm_id_priv->compare_data->data, + compare_data->data, compare_data->mask); + memcpy(cm_id_priv->compare_data->mask, compare_data->mask, + IB_CM_COMPARE_SIZE); + } cm_id->state = IB_CM_LISTEN; @@ -769,6 +832,8 @@ int ib_cm_listen(struct ib_cm_id *cm_id, if (cur_cm_id_priv) { cm_id->state = IB_CM_IDLE; + kfree(cm_id_priv->compare_data); + cm_id_priv->compare_data = NULL; ret = -EBUSY; } return ret; @@ -1241,7 +1306,8 @@ static struct cm_id_private * cm_match_req(struct cm_work *work, /* Find matching listen request. */ listen_cm_id_priv = cm_find_listen(cm_id_priv->id.device, - req_msg->service_id); + req_msg->service_id, + req_msg->private_data); if (!listen_cm_id_priv) { spin_unlock_irqrestore(&cm.lock, flags); cm_issue_rej(work->port, work->mad_recv_wc, @@ -2654,7 +2720,8 @@ static int cm_sidr_req_handler(struct cm_work *work) goto out; /* Duplicate message. */ } cur_cm_id_priv = cm_find_listen(cm_id->device, - sidr_req_msg->service_id); + sidr_req_msg->service_id, + sidr_req_msg->private_data); if (!cur_cm_id_priv) { rb_erase(&cm_id_priv->sidr_id_node, &cm.remote_sidr_table); spin_unlock_irqrestore(&cm.lock, flags); diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c index b396bf703f80..0136aee0faa7 100644 --- a/drivers/infiniband/core/ucm.c +++ b/drivers/infiniband/core/ucm.c @@ -648,6 +648,17 @@ out: return result; } +static int ucm_validate_listen(__be64 service_id, __be64 service_mask) +{ + service_id &= service_mask; + + if (((service_id & IB_CMA_SERVICE_ID_MASK) == IB_CMA_SERVICE_ID) || + ((service_id & IB_SDP_SERVICE_ID_MASK) == IB_SDP_SERVICE_ID)) + return -EINVAL; + + return 0; +} + static ssize_t ib_ucm_listen(struct ib_ucm_file *file, const char __user *inbuf, int in_len, int out_len) @@ -663,7 +674,13 @@ static ssize_t ib_ucm_listen(struct ib_ucm_file *file, if (IS_ERR(ctx)) return PTR_ERR(ctx); - result = ib_cm_listen(ctx->cm_id, cmd.service_id, cmd.service_mask); + result = ucm_validate_listen(cmd.service_id, cmd.service_mask); + if (result) + goto out; + + result = ib_cm_listen(ctx->cm_id, cmd.service_id, cmd.service_mask, + NULL); +out: ib_ucm_ctx_put(ctx); return result; } diff --git a/include/rdma/ib_cm.h b/include/rdma/ib_cm.h index 0a9fcd59eb43..8f394f035684 100644 --- a/include/rdma/ib_cm.h +++ b/include/rdma/ib_cm.h @@ -32,7 +32,7 @@ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * - * $Id: ib_cm.h 2730 2005-06-28 16:43:03Z sean.hefty $ + * $Id: ib_cm.h 4311 2005-12-05 18:42:01Z sean.hefty $ */ #if !defined(IB_CM_H) #define IB_CM_H @@ -102,7 +102,8 @@ enum ib_cm_data_size { IB_CM_APR_INFO_LENGTH = 72, IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE = 216, IB_CM_SIDR_REP_PRIVATE_DATA_SIZE = 136, - IB_CM_SIDR_REP_INFO_LENGTH = 72 + IB_CM_SIDR_REP_INFO_LENGTH = 72, + IB_CM_COMPARE_SIZE = 64 }; struct ib_cm_id; @@ -238,7 +239,6 @@ struct ib_cm_sidr_rep_event_param { u32 qpn; void *info; u8 info_len; - }; struct ib_cm_event { @@ -317,6 +317,15 @@ void ib_destroy_cm_id(struct ib_cm_id *cm_id); #define IB_SERVICE_ID_AGN_MASK __constant_cpu_to_be64(0xFF00000000000000ULL) #define IB_CM_ASSIGN_SERVICE_ID __constant_cpu_to_be64(0x0200000000000000ULL) +#define IB_CMA_SERVICE_ID __constant_cpu_to_be64(0x0000000001000000ULL) +#define IB_CMA_SERVICE_ID_MASK __constant_cpu_to_be64(0xFFFFFFFFFF000000ULL) +#define IB_SDP_SERVICE_ID __constant_cpu_to_be64(0x0000000000010000ULL) +#define IB_SDP_SERVICE_ID_MASK __constant_cpu_to_be64(0xFFFFFFFFFFFF0000ULL) + +struct ib_cm_compare_data { + u8 data[IB_CM_COMPARE_SIZE]; + u8 mask[IB_CM_COMPARE_SIZE]; +}; /** * ib_cm_listen - Initiates listening on the specified service ID for @@ -330,10 +339,12 @@ void ib_destroy_cm_id(struct ib_cm_id *cm_id); * range of service IDs. If set to 0, the service ID is matched * exactly. This parameter is ignored if %service_id is set to * IB_CM_ASSIGN_SERVICE_ID. + * @compare_data: This parameter is optional. It specifies data that must + * appear in the private data of a connection request for the specified + * listen request. */ -int ib_cm_listen(struct ib_cm_id *cm_id, - __be64 service_id, - __be64 service_mask); +int ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id, __be64 service_mask, + struct ib_cm_compare_data *compare_data); struct ib_cm_req_param { struct ib_sa_path_rec *primary_path; -- cgit v1.2.3 From 7025fcd36bd62af2c6ca0ea3490c00b216c4d168 Mon Sep 17 00:00:00 2001 From: Sean Hefty Date: Sat, 17 Jun 2006 20:37:28 -0700 Subject: IB: address translation to map IP toIB addresses (GIDs) Add an address translation service that maps IP addresses to InfiniBand GID addresses using IPoIB. Signed-off-by: Sean Hefty Signed-off-by: Roland Dreier --- drivers/infiniband/Kconfig | 5 + drivers/infiniband/core/Makefile | 6 +- drivers/infiniband/core/addr.c | 367 +++++++++++++++++++++++++++++++++++++++ include/rdma/ib_addr.h | 114 ++++++++++++ 4 files changed, 491 insertions(+), 1 deletion(-) create mode 100644 drivers/infiniband/core/addr.c create mode 100644 include/rdma/ib_addr.h (limited to 'include') diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig index afc612b8577d..ba2d6505e9a4 100644 --- a/drivers/infiniband/Kconfig +++ b/drivers/infiniband/Kconfig @@ -29,6 +29,11 @@ config INFINIBAND_USER_ACCESS libibverbs, libibcm and a hardware driver library from . +config INFINIBAND_ADDR_TRANS + bool + depends on INFINIBAND && INET + default y + source "drivers/infiniband/hw/mthca/Kconfig" source "drivers/infiniband/hw/ipath/Kconfig" diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile index a56bf9c994f6..05095919d168 100644 --- a/drivers/infiniband/core/Makefile +++ b/drivers/infiniband/core/Makefile @@ -1,5 +1,7 @@ +addr_trans-$(CONFIG_INFINIBAND_ADDR_TRANS) := ib_addr.o + obj-$(CONFIG_INFINIBAND) += ib_core.o ib_mad.o ib_sa.o \ - ib_cm.o + ib_cm.o $(addr_trans-y) obj-$(CONFIG_INFINIBAND_USER_MAD) += ib_umad.o obj-$(CONFIG_INFINIBAND_USER_ACCESS) += ib_uverbs.o ib_ucm.o @@ -12,6 +14,8 @@ ib_sa-y := sa_query.o ib_cm-y := cm.o +ib_addr-y := addr.o + ib_umad-y := user_mad.o ib_ucm-y := ucm.o diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c new file mode 100644 index 000000000000..d294bbc42f09 --- /dev/null +++ b/drivers/infiniband/core/addr.c @@ -0,0 +1,367 @@ +/* + * Copyright (c) 2005 Voltaire Inc. All rights reserved. + * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved. + * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved. + * Copyright (c) 2005 Intel Corporation. All rights reserved. + * + * This Software is licensed under one of the following licenses: + * + * 1) under the terms of the "Common Public License 1.0" a copy of which is + * available from the Open Source Initiative, see + * http://www.opensource.org/licenses/cpl.php. + * + * 2) under the terms of the "The BSD License" a copy of which is + * available from the Open Source Initiative, see + * http://www.opensource.org/licenses/bsd-license.php. + * + * 3) under the terms of the "GNU General Public License (GPL) Version 2" a + * copy of which is available from the Open Source Initiative, see + * http://www.opensource.org/licenses/gpl-license.php. + * + * Licensee has the right to choose one of the above licenses. + * + * Redistributions of source code must retain the above copyright + * notice and one of the license notices. + * + * Redistributions in binary form must reproduce both the above copyright + * notice, one of the license notices in the documentation + * and/or other materials provided with the distribution. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +MODULE_AUTHOR("Sean Hefty"); +MODULE_DESCRIPTION("IB Address Translation"); +MODULE_LICENSE("Dual BSD/GPL"); + +struct addr_req { + struct list_head list; + struct sockaddr src_addr; + struct sockaddr dst_addr; + struct rdma_dev_addr *addr; + void *context; + void (*callback)(int status, struct sockaddr *src_addr, + struct rdma_dev_addr *addr, void *context); + unsigned long timeout; + int status; +}; + +static void process_req(void *data); + +static DEFINE_MUTEX(lock); +static LIST_HEAD(req_list); +static DECLARE_WORK(work, process_req, NULL); +static struct workqueue_struct *addr_wq; + +static int copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev, + unsigned char *dst_dev_addr) +{ + switch (dev->type) { + case ARPHRD_INFINIBAND: + dev_addr->dev_type = IB_NODE_CA; + break; + default: + return -EADDRNOTAVAIL; + } + + memcpy(dev_addr->src_dev_addr, dev->dev_addr, MAX_ADDR_LEN); + memcpy(dev_addr->broadcast, dev->broadcast, MAX_ADDR_LEN); + if (dst_dev_addr) + memcpy(dev_addr->dst_dev_addr, dst_dev_addr, MAX_ADDR_LEN); + return 0; +} + +int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr) +{ + struct net_device *dev; + u32 ip = ((struct sockaddr_in *) addr)->sin_addr.s_addr; + int ret; + + dev = ip_dev_find(ip); + if (!dev) + return -EADDRNOTAVAIL; + + ret = copy_addr(dev_addr, dev, NULL); + dev_put(dev); + return ret; +} +EXPORT_SYMBOL(rdma_translate_ip); + +static void set_timeout(unsigned long time) +{ + unsigned long delay; + + cancel_delayed_work(&work); + + delay = time - jiffies; + if ((long)delay <= 0) + delay = 1; + + queue_delayed_work(addr_wq, &work, delay); +} + +static void queue_req(struct addr_req *req) +{ + struct addr_req *temp_req; + + mutex_lock(&lock); + list_for_each_entry_reverse(temp_req, &req_list, list) { + if (time_after(req->timeout, temp_req->timeout)) + break; + } + + list_add(&req->list, &temp_req->list); + + if (req_list.next == &req->list) + set_timeout(req->timeout); + mutex_unlock(&lock); +} + +static void addr_send_arp(struct sockaddr_in *dst_in) +{ + struct rtable *rt; + struct flowi fl; + u32 dst_ip = dst_in->sin_addr.s_addr; + + memset(&fl, 0, sizeof fl); + fl.nl_u.ip4_u.daddr = dst_ip; + if (ip_route_output_key(&rt, &fl)) + return; + + arp_send(ARPOP_REQUEST, ETH_P_ARP, rt->rt_gateway, rt->idev->dev, + rt->rt_src, NULL, rt->idev->dev->dev_addr, NULL); + ip_rt_put(rt); +} + +static int addr_resolve_remote(struct sockaddr_in *src_in, + struct sockaddr_in *dst_in, + struct rdma_dev_addr *addr) +{ + u32 src_ip = src_in->sin_addr.s_addr; + u32 dst_ip = dst_in->sin_addr.s_addr; + struct flowi fl; + struct rtable *rt; + struct neighbour *neigh; + int ret; + + memset(&fl, 0, sizeof fl); + fl.nl_u.ip4_u.daddr = dst_ip; + fl.nl_u.ip4_u.saddr = src_ip; + ret = ip_route_output_key(&rt, &fl); + if (ret) + goto out; + + /* If the device does ARP internally, return 'done' */ + if (rt->idev->dev->flags & IFF_NOARP) { + copy_addr(addr, rt->idev->dev, NULL); + goto put; + } + + neigh = neigh_lookup(&arp_tbl, &rt->rt_gateway, rt->idev->dev); + if (!neigh) { + ret = -ENODATA; + goto put; + } + + if (!(neigh->nud_state & NUD_VALID)) { + ret = -ENODATA; + goto release; + } + + if (!src_ip) { + src_in->sin_family = dst_in->sin_family; + src_in->sin_addr.s_addr = rt->rt_src; + } + + ret = copy_addr(addr, neigh->dev, neigh->ha); +release: + neigh_release(neigh); +put: + ip_rt_put(rt); +out: + return ret; +} + +static void process_req(void *data) +{ + struct addr_req *req, *temp_req; + struct sockaddr_in *src_in, *dst_in; + struct list_head done_list; + + INIT_LIST_HEAD(&done_list); + + mutex_lock(&lock); + list_for_each_entry_safe(req, temp_req, &req_list, list) { + if (req->status) { + src_in = (struct sockaddr_in *) &req->src_addr; + dst_in = (struct sockaddr_in *) &req->dst_addr; + req->status = addr_resolve_remote(src_in, dst_in, + req->addr); + } + if (req->status && time_after(jiffies, req->timeout)) + req->status = -ETIMEDOUT; + else if (req->status == -ENODATA) + continue; + + list_del(&req->list); + list_add_tail(&req->list, &done_list); + } + + if (!list_empty(&req_list)) { + req = list_entry(req_list.next, struct addr_req, list); + set_timeout(req->timeout); + } + mutex_unlock(&lock); + + list_for_each_entry_safe(req, temp_req, &done_list, list) { + list_del(&req->list); + req->callback(req->status, &req->src_addr, req->addr, + req->context); + kfree(req); + } +} + +static int addr_resolve_local(struct sockaddr_in *src_in, + struct sockaddr_in *dst_in, + struct rdma_dev_addr *addr) +{ + struct net_device *dev; + u32 src_ip = src_in->sin_addr.s_addr; + u32 dst_ip = dst_in->sin_addr.s_addr; + int ret; + + dev = ip_dev_find(dst_ip); + if (!dev) + return -EADDRNOTAVAIL; + + if (ZERONET(src_ip)) { + src_in->sin_family = dst_in->sin_family; + src_in->sin_addr.s_addr = dst_ip; + ret = copy_addr(addr, dev, dev->dev_addr); + } else if (LOOPBACK(src_ip)) { + ret = rdma_translate_ip((struct sockaddr *)dst_in, addr); + if (!ret) + memcpy(addr->dst_dev_addr, dev->dev_addr, MAX_ADDR_LEN); + } else { + ret = rdma_translate_ip((struct sockaddr *)src_in, addr); + if (!ret) + memcpy(addr->dst_dev_addr, dev->dev_addr, MAX_ADDR_LEN); + } + + dev_put(dev); + return ret; +} + +int rdma_resolve_ip(struct sockaddr *src_addr, struct sockaddr *dst_addr, + struct rdma_dev_addr *addr, int timeout_ms, + void (*callback)(int status, struct sockaddr *src_addr, + struct rdma_dev_addr *addr, void *context), + void *context) +{ + struct sockaddr_in *src_in, *dst_in; + struct addr_req *req; + int ret = 0; + + req = kmalloc(sizeof *req, GFP_KERNEL); + if (!req) + return -ENOMEM; + memset(req, 0, sizeof *req); + + if (src_addr) + memcpy(&req->src_addr, src_addr, ip_addr_size(src_addr)); + memcpy(&req->dst_addr, dst_addr, ip_addr_size(dst_addr)); + req->addr = addr; + req->callback = callback; + req->context = context; + + src_in = (struct sockaddr_in *) &req->src_addr; + dst_in = (struct sockaddr_in *) &req->dst_addr; + + req->status = addr_resolve_local(src_in, dst_in, addr); + if (req->status == -EADDRNOTAVAIL) + req->status = addr_resolve_remote(src_in, dst_in, addr); + + switch (req->status) { + case 0: + req->timeout = jiffies; + queue_req(req); + break; + case -ENODATA: + req->timeout = msecs_to_jiffies(timeout_ms) + jiffies; + queue_req(req); + addr_send_arp(dst_in); + break; + default: + ret = req->status; + kfree(req); + break; + } + return ret; +} +EXPORT_SYMBOL(rdma_resolve_ip); + +void rdma_addr_cancel(struct rdma_dev_addr *addr) +{ + struct addr_req *req, *temp_req; + + mutex_lock(&lock); + list_for_each_entry_safe(req, temp_req, &req_list, list) { + if (req->addr == addr) { + req->status = -ECANCELED; + req->timeout = jiffies; + list_del(&req->list); + list_add(&req->list, &req_list); + set_timeout(req->timeout); + break; + } + } + mutex_unlock(&lock); +} +EXPORT_SYMBOL(rdma_addr_cancel); + +static int addr_arp_recv(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pkt, struct net_device *orig_dev) +{ + struct arphdr *arp_hdr; + + arp_hdr = (struct arphdr *) skb->nh.raw; + + if (arp_hdr->ar_op == htons(ARPOP_REQUEST) || + arp_hdr->ar_op == htons(ARPOP_REPLY)) + set_timeout(jiffies); + + kfree_skb(skb); + return 0; +} + +static struct packet_type addr_arp = { + .type = __constant_htons(ETH_P_ARP), + .func = addr_arp_recv, + .af_packet_priv = (void*) 1, +}; + +static int addr_init(void) +{ + addr_wq = create_singlethread_workqueue("ib_addr_wq"); + if (!addr_wq) + return -ENOMEM; + + dev_add_pack(&addr_arp); + return 0; +} + +static void addr_cleanup(void) +{ + dev_remove_pack(&addr_arp); + destroy_workqueue(addr_wq); +} + +module_init(addr_init); +module_exit(addr_cleanup); diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h new file mode 100644 index 000000000000..fcb5ba87dcc5 --- /dev/null +++ b/include/rdma/ib_addr.h @@ -0,0 +1,114 @@ +/* + * Copyright (c) 2005 Voltaire Inc. All rights reserved. + * Copyright (c) 2005 Intel Corporation. All rights reserved. + * + * This Software is licensed under one of the following licenses: + * + * 1) under the terms of the "Common Public License 1.0" a copy of which is + * available from the Open Source Initiative, see + * http://www.opensource.org/licenses/cpl.php. + * + * 2) under the terms of the "The BSD License" a copy of which is + * available from the Open Source Initiative, see + * http://www.opensource.org/licenses/bsd-license.php. + * + * 3) under the terms of the "GNU General Public License (GPL) Version 2" a + * copy of which is available from the Open Source Initiative, see + * http://www.opensource.org/licenses/gpl-license.php. + * + * Licensee has the right to choose one of the above licenses. + * + * Redistributions of source code must retain the above copyright + * notice and one of the license notices. + * + * Redistributions in binary form must reproduce both the above copyright + * notice, one of the license notices in the documentation + * and/or other materials provided with the distribution. + * + */ + +#if !defined(IB_ADDR_H) +#define IB_ADDR_H + +#include +#include +#include +#include +#include + +struct rdma_dev_addr { + unsigned char src_dev_addr[MAX_ADDR_LEN]; + unsigned char dst_dev_addr[MAX_ADDR_LEN]; + unsigned char broadcast[MAX_ADDR_LEN]; + enum ib_node_type dev_type; +}; + +/** + * rdma_translate_ip - Translate a local IP address to an RDMA hardware + * address. + */ +int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr); + +/** + * rdma_resolve_ip - Resolve source and destination IP addresses to + * RDMA hardware addresses. + * @src_addr: An optional source address to use in the resolution. If a + * source address is not provided, a usable address will be returned via + * the callback. + * @dst_addr: The destination address to resolve. + * @addr: A reference to a data location that will receive the resolved + * addresses. The data location must remain valid until the callback has + * been invoked. + * @timeout_ms: Amount of time to wait for the address resolution to complete. + * @callback: Call invoked once address resolution has completed, timed out, + * or been canceled. A status of 0 indicates success. + * @context: User-specified context associated with the call. + */ +int rdma_resolve_ip(struct sockaddr *src_addr, struct sockaddr *dst_addr, + struct rdma_dev_addr *addr, int timeout_ms, + void (*callback)(int status, struct sockaddr *src_addr, + struct rdma_dev_addr *addr, void *context), + void *context); + +void rdma_addr_cancel(struct rdma_dev_addr *addr); + +static inline int ip_addr_size(struct sockaddr *addr) +{ + return addr->sa_family == AF_INET6 ? + sizeof(struct sockaddr_in6) : sizeof(struct sockaddr_in); +} + +static inline u16 ib_addr_get_pkey(struct rdma_dev_addr *dev_addr) +{ + return ((u16)dev_addr->broadcast[8] << 8) | (u16)dev_addr->broadcast[9]; +} + +static inline void ib_addr_set_pkey(struct rdma_dev_addr *dev_addr, u16 pkey) +{ + dev_addr->broadcast[8] = pkey >> 8; + dev_addr->broadcast[9] = (unsigned char) pkey; +} + +static inline union ib_gid *ib_addr_get_sgid(struct rdma_dev_addr *dev_addr) +{ + return (union ib_gid *) (dev_addr->src_dev_addr + 4); +} + +static inline void ib_addr_set_sgid(struct rdma_dev_addr *dev_addr, + union ib_gid *gid) +{ + memcpy(dev_addr->src_dev_addr + 4, gid, sizeof *gid); +} + +static inline union ib_gid *ib_addr_get_dgid(struct rdma_dev_addr *dev_addr) +{ + return (union ib_gid *) (dev_addr->dst_dev_addr + 4); +} + +static inline void ib_addr_set_dgid(struct rdma_dev_addr *dev_addr, + union ib_gid *gid) +{ + memcpy(dev_addr->dst_dev_addr + 4, gid, sizeof *gid); +} + +#endif /* IB_ADDR_H */ -- cgit v1.2.3 From e51060f08a61965c4dd91516d82fe90617152590 Mon Sep 17 00:00:00 2001 From: Sean Hefty Date: Sat, 17 Jun 2006 20:37:29 -0700 Subject: IB: IP address based RDMA connection manager Kernel connection management agent over InfiniBand that connects based on IP addresses. The agent defines a generic RDMA connection abstraction to support clients wanting to connect over different RDMA devices. The agent also handles RDMA device hotplug events on behalf of clients. Signed-off-by: Sean Hefty Signed-off-by: Roland Dreier --- drivers/infiniband/core/Makefile | 6 +- drivers/infiniband/core/cma.c | 1927 ++++++++++++++++++++++++++++++++++++++ include/rdma/rdma_cm.h | 256 +++++ include/rdma/rdma_cm_ib.h | 47 + 4 files changed, 2234 insertions(+), 2 deletions(-) create mode 100644 drivers/infiniband/core/cma.c create mode 100644 include/rdma/rdma_cm.h create mode 100644 include/rdma/rdma_cm_ib.h (limited to 'include') diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile index 05095919d168..68e73ec2d1f8 100644 --- a/drivers/infiniband/core/Makefile +++ b/drivers/infiniband/core/Makefile @@ -1,7 +1,7 @@ -addr_trans-$(CONFIG_INFINIBAND_ADDR_TRANS) := ib_addr.o +infiniband-$(CONFIG_INFINIBAND_ADDR_TRANS) := ib_addr.o rdma_cm.o obj-$(CONFIG_INFINIBAND) += ib_core.o ib_mad.o ib_sa.o \ - ib_cm.o $(addr_trans-y) + ib_cm.o $(infiniband-y) obj-$(CONFIG_INFINIBAND_USER_MAD) += ib_umad.o obj-$(CONFIG_INFINIBAND_USER_ACCESS) += ib_uverbs.o ib_ucm.o @@ -14,6 +14,8 @@ ib_sa-y := sa_query.o ib_cm-y := cm.o +rdma_cm-y := cma.o + ib_addr-y := addr.o ib_umad-y := user_mad.o diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c new file mode 100644 index 000000000000..a76834edf608 --- /dev/null +++ b/drivers/infiniband/core/cma.c @@ -0,0 +1,1927 @@ +/* + * Copyright (c) 2005 Voltaire Inc. All rights reserved. + * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved. + * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved. + * Copyright (c) 2005-2006 Intel Corporation. All rights reserved. + * + * This Software is licensed under one of the following licenses: + * + * 1) under the terms of the "Common Public License 1.0" a copy of which is + * available from the Open Source Initiative, see + * http://www.opensource.org/licenses/cpl.php. + * + * 2) under the terms of the "The BSD License" a copy of which is + * available from the Open Source Initiative, see + * http://www.opensource.org/licenses/bsd-license.php. + * + * 3) under the terms of the "GNU General Public License (GPL) Version 2" a + * copy of which is available from the Open Source Initiative, see + * http://www.opensource.org/licenses/gpl-license.php. + * + * Licensee has the right to choose one of the above licenses. + * + * Redistributions of source code must retain the above copyright + * notice and one of the license notices. + * + * Redistributions in binary form must reproduce both the above copyright + * notice, one of the license notices in the documentation + * and/or other materials provided with the distribution. + * + */ + +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include + +MODULE_AUTHOR("Sean Hefty"); +MODULE_DESCRIPTION("Generic RDMA CM Agent"); +MODULE_LICENSE("Dual BSD/GPL"); + +#define CMA_CM_RESPONSE_TIMEOUT 20 +#define CMA_MAX_CM_RETRIES 3 + +static void cma_add_one(struct ib_device *device); +static void cma_remove_one(struct ib_device *device); + +static struct ib_client cma_client = { + .name = "cma", + .add = cma_add_one, + .remove = cma_remove_one +}; + +static LIST_HEAD(dev_list); +static LIST_HEAD(listen_any_list); +static DEFINE_MUTEX(lock); +static struct workqueue_struct *cma_wq; +static DEFINE_IDR(sdp_ps); +static DEFINE_IDR(tcp_ps); + +struct cma_device { + struct list_head list; + struct ib_device *device; + __be64 node_guid; + struct completion comp; + atomic_t refcount; + struct list_head id_list; +}; + +enum cma_state { + CMA_IDLE, + CMA_ADDR_QUERY, + CMA_ADDR_RESOLVED, + CMA_ROUTE_QUERY, + CMA_ROUTE_RESOLVED, + CMA_CONNECT, + CMA_DISCONNECT, + CMA_ADDR_BOUND, + CMA_LISTEN, + CMA_DEVICE_REMOVAL, + CMA_DESTROYING +}; + +struct rdma_bind_list { + struct idr *ps; + struct hlist_head owners; + unsigned short port; +}; + +/* + * Device removal can occur at anytime, so we need extra handling to + * serialize notifying the user of device removal with other callbacks. + * We do this by disabling removal notification while a callback is in process, + * and reporting it after the callback completes. + */ +struct rdma_id_private { + struct rdma_cm_id id; + + struct rdma_bind_list *bind_list; + struct hlist_node node; + struct list_head list; + struct list_head listen_list; + struct cma_device *cma_dev; + + enum cma_state state; + spinlock_t lock; + struct completion comp; + atomic_t refcount; + wait_queue_head_t wait_remove; + atomic_t dev_remove; + + int backlog; + int timeout_ms; + struct ib_sa_query *query; + int query_id; + union { + struct ib_cm_id *ib; + } cm_id; + + u32 seq_num; + u32 qp_num; + enum ib_qp_type qp_type; + u8 srq; +}; + +struct cma_work { + struct work_struct work; + struct rdma_id_private *id; + enum cma_state old_state; + enum cma_state new_state; + struct rdma_cm_event event; +}; + +union cma_ip_addr { + struct in6_addr ip6; + struct { + __u32 pad[3]; + __u32 addr; + } ip4; +}; + +struct cma_hdr { + u8 cma_version; + u8 ip_version; /* IP version: 7:4 */ + __u16 port; + union cma_ip_addr src_addr; + union cma_ip_addr dst_addr; +}; + +struct sdp_hh { + u8 bsdh[16]; + u8 sdp_version; /* Major version: 7:4 */ + u8 ip_version; /* IP version: 7:4 */ + u8 sdp_specific1[10]; + __u16 port; + __u16 sdp_specific2; + union cma_ip_addr src_addr; + union cma_ip_addr dst_addr; +}; + +struct sdp_hah { + u8 bsdh[16]; + u8 sdp_version; +}; + +#define CMA_VERSION 0x00 +#define SDP_MAJ_VERSION 0x2 + +static int cma_comp(struct rdma_id_private *id_priv, enum cma_state comp) +{ + unsigned long flags; + int ret; + + spin_lock_irqsave(&id_priv->lock, flags); + ret = (id_priv->state == comp); + spin_unlock_irqrestore(&id_priv->lock, flags); + return ret; +} + +static int cma_comp_exch(struct rdma_id_private *id_priv, + enum cma_state comp, enum cma_state exch) +{ + unsigned long flags; + int ret; + + spin_lock_irqsave(&id_priv->lock, flags); + if ((ret = (id_priv->state == comp))) + id_priv->state = exch; + spin_unlock_irqrestore(&id_priv->lock, flags); + return ret; +} + +static enum cma_state cma_exch(struct rdma_id_private *id_priv, + enum cma_state exch) +{ + unsigned long flags; + enum cma_state old; + + spin_lock_irqsave(&id_priv->lock, flags); + old = id_priv->state; + id_priv->state = exch; + spin_unlock_irqrestore(&id_priv->lock, flags); + return old; +} + +static inline u8 cma_get_ip_ver(struct cma_hdr *hdr) +{ + return hdr->ip_version >> 4; +} + +static inline void cma_set_ip_ver(struct cma_hdr *hdr, u8 ip_ver) +{ + hdr->ip_version = (ip_ver << 4) | (hdr->ip_version & 0xF); +} + +static inline u8 sdp_get_majv(u8 sdp_version) +{ + return sdp_version >> 4; +} + +static inline u8 sdp_get_ip_ver(struct sdp_hh *hh) +{ + return hh->ip_version >> 4; +} + +static inline void sdp_set_ip_ver(struct sdp_hh *hh, u8 ip_ver) +{ + hh->ip_version = (ip_ver << 4) | (hh->ip_version & 0xF); +} + +static void cma_attach_to_dev(struct rdma_id_private *id_priv, + struct cma_device *cma_dev) +{ + atomic_inc(&cma_dev->refcount); + id_priv->cma_dev = cma_dev; + id_priv->id.device = cma_dev->device; + list_add_tail(&id_priv->list, &cma_dev->id_list); +} + +static inline void cma_deref_dev(struct cma_device *cma_dev) +{ + if (atomic_dec_and_test(&cma_dev->refcount)) + complete(&cma_dev->comp); +} + +static void cma_detach_from_dev(struct rdma_id_private *id_priv) +{ + list_del(&id_priv->list); + cma_deref_dev(id_priv->cma_dev); + id_priv->cma_dev = NULL; +} + +static int cma_acquire_ib_dev(struct rdma_id_private *id_priv) +{ + struct cma_device *cma_dev; + union ib_gid *gid; + int ret = -ENODEV; + + gid = ib_addr_get_sgid(&id_priv->id.route.addr.dev_addr); + + mutex_lock(&lock); + list_for_each_entry(cma_dev, &dev_list, list) { + ret = ib_find_cached_gid(cma_dev->device, gid, + &id_priv->id.port_num, NULL); + if (!ret) { + cma_attach_to_dev(id_priv, cma_dev); + break; + } + } + mutex_unlock(&lock); + return ret; +} + +static int cma_acquire_dev(struct rdma_id_private *id_priv) +{ + switch (id_priv->id.route.addr.dev_addr.dev_type) { + case IB_NODE_CA: + return cma_acquire_ib_dev(id_priv); + default: + return -ENODEV; + } +} + +static void cma_deref_id(struct rdma_id_private *id_priv) +{ + if (atomic_dec_and_test(&id_priv->refcount)) + complete(&id_priv->comp); +} + +static void cma_release_remove(struct rdma_id_private *id_priv) +{ + if (atomic_dec_and_test(&id_priv->dev_remove)) + wake_up(&id_priv->wait_remove); +} + +struct rdma_cm_id *rdma_create_id(rdma_cm_event_handler event_handler, + void *context, enum rdma_port_space ps) +{ + struct rdma_id_private *id_priv; + + id_priv = kzalloc(sizeof *id_priv, GFP_KERNEL); + if (!id_priv) + return ERR_PTR(-ENOMEM); + + id_priv->state = CMA_IDLE; + id_priv->id.context = context; + id_priv->id.event_handler = event_handler; + id_priv->id.ps = ps; + spin_lock_init(&id_priv->lock); + init_completion(&id_priv->comp); + atomic_set(&id_priv->refcount, 1); + init_waitqueue_head(&id_priv->wait_remove); + atomic_set(&id_priv->dev_remove, 0); + INIT_LIST_HEAD(&id_priv->listen_list); + get_random_bytes(&id_priv->seq_num, sizeof id_priv->seq_num); + + return &id_priv->id; +} +EXPORT_SYMBOL(rdma_create_id); + +static int cma_init_ib_qp(struct rdma_id_private *id_priv, struct ib_qp *qp) +{ + struct ib_qp_attr qp_attr; + struct rdma_dev_addr *dev_addr; + int ret; + + dev_addr = &id_priv->id.route.addr.dev_addr; + ret = ib_find_cached_pkey(id_priv->id.device, id_priv->id.port_num, + ib_addr_get_pkey(dev_addr), + &qp_attr.pkey_index); + if (ret) + return ret; + + qp_attr.qp_state = IB_QPS_INIT; + qp_attr.qp_access_flags = IB_ACCESS_LOCAL_WRITE; + qp_attr.port_num = id_priv->id.port_num; + return ib_modify_qp(qp, &qp_attr, IB_QP_STATE | IB_QP_ACCESS_FLAGS | + IB_QP_PKEY_INDEX | IB_QP_PORT); +} + +int rdma_create_qp(struct rdma_cm_id *id, struct ib_pd *pd, + struct ib_qp_init_attr *qp_init_attr) +{ + struct rdma_id_private *id_priv; + struct ib_qp *qp; + int ret; + + id_priv = container_of(id, struct rdma_id_private, id); + if (id->device != pd->device) + return -EINVAL; + + qp = ib_create_qp(pd, qp_init_attr); + if (IS_ERR(qp)) + return PTR_ERR(qp); + + switch (id->device->node_type) { + case IB_NODE_CA: + ret = cma_init_ib_qp(id_priv, qp); + break; + default: + ret = -ENOSYS; + break; + } + + if (ret) + goto err; + + id->qp = qp; + id_priv->qp_num = qp->qp_num; + id_priv->qp_type = qp->qp_type; + id_priv->srq = (qp->srq != NULL); + return 0; +err: + ib_destroy_qp(qp); + return ret; +} +EXPORT_SYMBOL(rdma_create_qp); + +void rdma_destroy_qp(struct rdma_cm_id *id) +{ + ib_destroy_qp(id->qp); +} +EXPORT_SYMBOL(rdma_destroy_qp); + +static int cma_modify_qp_rtr(struct rdma_cm_id *id) +{ + struct ib_qp_attr qp_attr; + int qp_attr_mask, ret; + + if (!id->qp) + return 0; + + /* Need to update QP attributes from default values. */ + qp_attr.qp_state = IB_QPS_INIT; + ret = rdma_init_qp_attr(id, &qp_attr, &qp_attr_mask); + if (ret) + return ret; + + ret = ib_modify_qp(id->qp, &qp_attr, qp_attr_mask); + if (ret) + return ret; + + qp_attr.qp_state = IB_QPS_RTR; + ret = rdma_init_qp_attr(id, &qp_attr, &qp_attr_mask); + if (ret) + return ret; + + return ib_modify_qp(id->qp, &qp_attr, qp_attr_mask); +} + +static int cma_modify_qp_rts(struct rdma_cm_id *id) +{ + struct ib_qp_attr qp_attr; + int qp_attr_mask, ret; + + if (!id->qp) + return 0; + + qp_attr.qp_state = IB_QPS_RTS; + ret = rdma_init_qp_attr(id, &qp_attr, &qp_attr_mask); + if (ret) + return ret; + + return ib_modify_qp(id->qp, &qp_attr, qp_attr_mask); +} + +static int cma_modify_qp_err(struct rdma_cm_id *id) +{ + struct ib_qp_attr qp_attr; + + if (!id->qp) + return 0; + + qp_attr.qp_state = IB_QPS_ERR; + return ib_modify_qp(id->qp, &qp_attr, IB_QP_STATE); +} + +int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr, + int *qp_attr_mask) +{ + struct rdma_id_private *id_priv; + int ret; + + id_priv = container_of(id, struct rdma_id_private, id); + switch (id_priv->id.device->node_type) { + case IB_NODE_CA: + ret = ib_cm_init_qp_attr(id_priv->cm_id.ib, qp_attr, + qp_attr_mask); + if (qp_attr->qp_state == IB_QPS_RTR) + qp_attr->rq_psn = id_priv->seq_num; + break; + default: + ret = -ENOSYS; + break; + } + + return ret; +} +EXPORT_SYMBOL(rdma_init_qp_attr); + +static inline int cma_zero_addr(struct sockaddr *addr) +{ + struct in6_addr *ip6; + + if (addr->sa_family == AF_INET) + return ZERONET(((struct sockaddr_in *) addr)->sin_addr.s_addr); + else { + ip6 = &((struct sockaddr_in6 *) addr)->sin6_addr; + return (ip6->s6_addr32[0] | ip6->s6_addr32[1] | + ip6->s6_addr32[3] | ip6->s6_addr32[4]) == 0; + } +} + +static inline int cma_loopback_addr(struct sockaddr *addr) +{ + return LOOPBACK(((struct sockaddr_in *) addr)->sin_addr.s_addr); +} + +static inline int cma_any_addr(struct sockaddr *addr) +{ + return cma_zero_addr(addr) || cma_loopback_addr(addr); +} + +static inline int cma_any_port(struct sockaddr *addr) +{ + return !((struct sockaddr_in *) addr)->sin_port; +} + +static int cma_get_net_info(void *hdr, enum rdma_port_space ps, + u8 *ip_ver, __u16 *port, + union cma_ip_addr **src, union cma_ip_addr **dst) +{ + switch (ps) { + case RDMA_PS_SDP: + if (sdp_get_majv(((struct sdp_hh *) hdr)->sdp_version) != + SDP_MAJ_VERSION) + return -EINVAL; + + *ip_ver = sdp_get_ip_ver(hdr); + *port = ((struct sdp_hh *) hdr)->port; + *src = &((struct sdp_hh *) hdr)->src_addr; + *dst = &((struct sdp_hh *) hdr)->dst_addr; + break; + default: + if (((struct cma_hdr *) hdr)->cma_version != CMA_VERSION) + return -EINVAL; + + *ip_ver = cma_get_ip_ver(hdr); + *port = ((struct cma_hdr *) hdr)->port; + *src = &((struct cma_hdr *) hdr)->src_addr; + *dst = &((struct cma_hdr *) hdr)->dst_addr; + break; + } + + if (*ip_ver != 4 && *ip_ver != 6) + return -EINVAL; + return 0; +} + +static void cma_save_net_info(struct rdma_addr *addr, + struct rdma_addr *listen_addr, + u8 ip_ver, __u16 port, + union cma_ip_addr *src, union cma_ip_addr *dst) +{ + struct sockaddr_in *listen4, *ip4; + struct sockaddr_in6 *listen6, *ip6; + + switch (ip_ver) { + case 4: + listen4 = (struct sockaddr_in *) &listen_addr->src_addr; + ip4 = (struct sockaddr_in *) &addr->src_addr; + ip4->sin_family = listen4->sin_family; + ip4->sin_addr.s_addr = dst->ip4.addr; + ip4->sin_port = listen4->sin_port; + + ip4 = (struct sockaddr_in *) &addr->dst_addr; + ip4->sin_family = listen4->sin_family; + ip4->sin_addr.s_addr = src->ip4.addr; + ip4->sin_port = port; + break; + case 6: + listen6 = (struct sockaddr_in6 *) &listen_addr->src_addr; + ip6 = (struct sockaddr_in6 *) &addr->src_addr; + ip6->sin6_family = listen6->sin6_family; + ip6->sin6_addr = dst->ip6; + ip6->sin6_port = listen6->sin6_port; + + ip6 = (struct sockaddr_in6 *) &addr->dst_addr; + ip6->sin6_family = listen6->sin6_family; + ip6->sin6_addr = src->ip6; + ip6->sin6_port = port; + break; + default: + break; + } +} + +static inline int cma_user_data_offset(enum rdma_port_space ps) +{ + switch (ps) { + case RDMA_PS_SDP: + return 0; + default: + return sizeof(struct cma_hdr); + } +} + +static int cma_notify_user(struct rdma_id_private *id_priv, + enum rdma_cm_event_type type, int status, + void *data, u8 data_len) +{ + struct rdma_cm_event event; + + event.event = type; + event.status = status; + event.private_data = data; + event.private_data_len = data_len; + + return id_priv->id.event_handler(&id_priv->id, &event); +} + +static void cma_cancel_route(struct rdma_id_private *id_priv) +{ + switch (id_priv->id.device->node_type) { + case IB_NODE_CA: + if (id_priv->query) + ib_sa_cancel_query(id_priv->query_id, id_priv->query); + break; + default: + break; + } +} + +static inline int cma_internal_listen(struct rdma_id_private *id_priv) +{ + return (id_priv->state == CMA_LISTEN) && id_priv->cma_dev && + cma_any_addr(&id_priv->id.route.addr.src_addr); +} + +static void cma_destroy_listen(struct rdma_id_private *id_priv) +{ + cma_exch(id_priv, CMA_DESTROYING); + + if (id_priv->cma_dev) { + switch (id_priv->id.device->node_type) { + case IB_NODE_CA: + if (id_priv->cm_id.ib && !IS_ERR(id_priv->cm_id.ib)) + ib_destroy_cm_id(id_priv->cm_id.ib); + break; + default: + break; + } + cma_detach_from_dev(id_priv); + } + list_del(&id_priv->listen_list); + + cma_deref_id(id_priv); + wait_for_completion(&id_priv->comp); + + kfree(id_priv); +} + +static void cma_cancel_listens(struct rdma_id_private *id_priv) +{ + struct rdma_id_private *dev_id_priv; + + mutex_lock(&lock); + list_del(&id_priv->list); + + while (!list_empty(&id_priv->listen_list)) { + dev_id_priv = list_entry(id_priv->listen_list.next, + struct rdma_id_private, listen_list); + cma_destroy_listen(dev_id_priv); + } + mutex_unlock(&lock); +} + +static void cma_cancel_operation(struct rdma_id_private *id_priv, + enum cma_state state) +{ + switch (state) { + case CMA_ADDR_QUERY: + rdma_addr_cancel(&id_priv->id.route.addr.dev_addr); + break; + case CMA_ROUTE_QUERY: + cma_cancel_route(id_priv); + break; + case CMA_LISTEN: + if (cma_any_addr(&id_priv->id.route.addr.src_addr) && + !id_priv->cma_dev) + cma_cancel_listens(id_priv); + break; + default: + break; + } +} + +static void cma_release_port(struct rdma_id_private *id_priv) +{ + struct rdma_bind_list *bind_list = id_priv->bind_list; + + if (!bind_list) + return; + + mutex_lock(&lock); + hlist_del(&id_priv->node); + if (hlist_empty(&bind_list->owners)) { + idr_remove(bind_list->ps, bind_list->port); + kfree(bind_list); + } + mutex_unlock(&lock); +} + +void rdma_destroy_id(struct rdma_cm_id *id) +{ + struct rdma_id_private *id_priv; + enum cma_state state; + + id_priv = container_of(id, struct rdma_id_private, id); + state = cma_exch(id_priv, CMA_DESTROYING); + cma_cancel_operation(id_priv, state); + + if (id_priv->cma_dev) { + switch (id->device->node_type) { + case IB_NODE_CA: + if (id_priv->cm_id.ib && !IS_ERR(id_priv->cm_id.ib)) + ib_destroy_cm_id(id_priv->cm_id.ib); + break; + default: + break; + } + mutex_lock(&lock); + cma_detach_from_dev(id_priv); + mutex_unlock(&lock); + } + + cma_release_port(id_priv); + cma_deref_id(id_priv); + wait_for_completion(&id_priv->comp); + + kfree(id_priv->id.route.path_rec); + kfree(id_priv); +} +EXPORT_SYMBOL(rdma_destroy_id); + +static int cma_rep_recv(struct rdma_id_private *id_priv) +{ + int ret; + + ret = cma_modify_qp_rtr(&id_priv->id); + if (ret) + goto reject; + + ret = cma_modify_qp_rts(&id_priv->id); + if (ret) + goto reject; + + ret = ib_send_cm_rtu(id_priv->cm_id.ib, NULL, 0); + if (ret) + goto reject; + + return 0; +reject: + cma_modify_qp_err(&id_priv->id); + ib_send_cm_rej(id_priv->cm_id.ib, IB_CM_REJ_CONSUMER_DEFINED, + NULL, 0, NULL, 0); + return ret; +} + +static int cma_verify_rep(struct rdma_id_private *id_priv, void *data) +{ + if (id_priv->id.ps == RDMA_PS_SDP && + sdp_get_majv(((struct sdp_hah *) data)->sdp_version) != + SDP_MAJ_VERSION) + return -EINVAL; + + return 0; +} + +static int cma_rtu_recv(struct rdma_id_private *id_priv) +{ + int ret; + + ret = cma_modify_qp_rts(&id_priv->id); + if (ret) + goto reject; + + return 0; +reject: + cma_modify_qp_err(&id_priv->id); + ib_send_cm_rej(id_priv->cm_id.ib, IB_CM_REJ_CONSUMER_DEFINED, + NULL, 0, NULL, 0); + return ret; +} + +static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) +{ + struct rdma_id_private *id_priv = cm_id->context; + enum rdma_cm_event_type event; + u8 private_data_len = 0; + int ret = 0, status = 0; + + atomic_inc(&id_priv->dev_remove); + if (!cma_comp(id_priv, CMA_CONNECT)) + goto out; + + switch (ib_event->event) { + case IB_CM_REQ_ERROR: + case IB_CM_REP_ERROR: + event = RDMA_CM_EVENT_UNREACHABLE; + status = -ETIMEDOUT; + break; + case IB_CM_REP_RECEIVED: + status = cma_verify_rep(id_priv, ib_event->private_data); + if (status) + event = RDMA_CM_EVENT_CONNECT_ERROR; + else if (id_priv->id.qp && id_priv->id.ps != RDMA_PS_SDP) { + status = cma_rep_recv(id_priv); + event = status ? RDMA_CM_EVENT_CONNECT_ERROR : + RDMA_CM_EVENT_ESTABLISHED; + } else + event = RDMA_CM_EVENT_CONNECT_RESPONSE; + private_data_len = IB_CM_REP_PRIVATE_DATA_SIZE; + break; + case IB_CM_RTU_RECEIVED: + status = cma_rtu_recv(id_priv); + event = status ? RDMA_CM_EVENT_CONNECT_ERROR : + RDMA_CM_EVENT_ESTABLISHED; + break; + case IB_CM_DREQ_ERROR: + status = -ETIMEDOUT; /* fall through */ + case IB_CM_DREQ_RECEIVED: + case IB_CM_DREP_RECEIVED: + if (!cma_comp_exch(id_priv, CMA_CONNECT, CMA_DISCONNECT)) + goto out; + event = RDMA_CM_EVENT_DISCONNECTED; + break; + case IB_CM_TIMEWAIT_EXIT: + case IB_CM_MRA_RECEIVED: + /* ignore event */ + goto out; + case IB_CM_REJ_RECEIVED: + cma_modify_qp_err(&id_priv->id); + status = ib_event->param.rej_rcvd.reason; + event = RDMA_CM_EVENT_REJECTED; + break; + default: + printk(KERN_ERR "RDMA CMA: unexpected IB CM event: %d", + ib_event->event); + goto out; + } + + ret = cma_notify_user(id_priv, event, status, ib_event->private_data, + private_data_len); + if (ret) { + /* Destroy the CM ID by returning a non-zero value. */ + id_priv->cm_id.ib = NULL; + cma_exch(id_priv, CMA_DESTROYING); + cma_release_remove(id_priv); + rdma_destroy_id(&id_priv->id); + return ret; + } +out: + cma_release_remove(id_priv); + return ret; +} + +static struct rdma_id_private *cma_new_id(struct rdma_cm_id *listen_id, + struct ib_cm_event *ib_event) +{ + struct rdma_id_private *id_priv; + struct rdma_cm_id *id; + struct rdma_route *rt; + union cma_ip_addr *src, *dst; + __u16 port; + u8 ip_ver; + + id = rdma_create_id(listen_id->event_handler, listen_id->context, + listen_id->ps); + if (IS_ERR(id)) + return NULL; + + rt = &id->route; + rt->num_paths = ib_event->param.req_rcvd.alternate_path ? 2 : 1; + rt->path_rec = kmalloc(sizeof *rt->path_rec * rt->num_paths, GFP_KERNEL); + if (!rt->path_rec) + goto err; + + if (cma_get_net_info(ib_event->private_data, listen_id->ps, + &ip_ver, &port, &src, &dst)) + goto err; + + cma_save_net_info(&id->route.addr, &listen_id->route.addr, + ip_ver, port, src, dst); + rt->path_rec[0] = *ib_event->param.req_rcvd.primary_path; + if (rt->num_paths == 2) + rt->path_rec[1] = *ib_event->param.req_rcvd.alternate_path; + + ib_addr_set_sgid(&rt->addr.dev_addr, &rt->path_rec[0].sgid); + ib_addr_set_dgid(&rt->addr.dev_addr, &rt->path_rec[0].dgid); + ib_addr_set_pkey(&rt->addr.dev_addr, be16_to_cpu(rt->path_rec[0].pkey)); + rt->addr.dev_addr.dev_type = IB_NODE_CA; + + id_priv = container_of(id, struct rdma_id_private, id); + id_priv->state = CMA_CONNECT; + return id_priv; +err: + rdma_destroy_id(id); + return NULL; +} + +static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) +{ + struct rdma_id_private *listen_id, *conn_id; + int offset, ret; + + listen_id = cm_id->context; + atomic_inc(&listen_id->dev_remove); + if (!cma_comp(listen_id, CMA_LISTEN)) { + ret = -ECONNABORTED; + goto out; + } + + conn_id = cma_new_id(&listen_id->id, ib_event); + if (!conn_id) { + ret = -ENOMEM; + goto out; + } + + atomic_inc(&conn_id->dev_remove); + ret = cma_acquire_ib_dev(conn_id); + if (ret) { + ret = -ENODEV; + cma_release_remove(conn_id); + rdma_destroy_id(&conn_id->id); + goto out; + } + + conn_id->cm_id.ib = cm_id; + cm_id->context = conn_id; + cm_id->cm_handler = cma_ib_handler; + + offset = cma_user_data_offset(listen_id->id.ps); + ret = cma_notify_user(conn_id, RDMA_CM_EVENT_CONNECT_REQUEST, 0, + ib_event->private_data + offset, + IB_CM_REQ_PRIVATE_DATA_SIZE - offset); + if (ret) { + /* Destroy the CM ID by returning a non-zero value. */ + conn_id->cm_id.ib = NULL; + cma_exch(conn_id, CMA_DESTROYING); + cma_release_remove(conn_id); + rdma_destroy_id(&conn_id->id); + } +out: + cma_release_remove(listen_id); + return ret; +} + +static __be64 cma_get_service_id(enum rdma_port_space ps, struct sockaddr *addr) +{ + return cpu_to_be64(((u64)ps << 16) + + be16_to_cpu(((struct sockaddr_in *) addr)->sin_port)); +} + +static void cma_set_compare_data(enum rdma_port_space ps, struct sockaddr *addr, + struct ib_cm_compare_data *compare) +{ + struct cma_hdr *cma_data, *cma_mask; + struct sdp_hh *sdp_data, *sdp_mask; + __u32 ip4_addr; + struct in6_addr ip6_addr; + + memset(compare, 0, sizeof *compare); + cma_data = (void *) compare->data; + cma_mask = (void *) compare->mask; + sdp_data = (void *) compare->data; + sdp_mask = (void *) compare->mask; + + switch (addr->sa_family) { + case AF_INET: + ip4_addr = ((struct sockaddr_in *) addr)->sin_addr.s_addr; + if (ps == RDMA_PS_SDP) { + sdp_set_ip_ver(sdp_data, 4); + sdp_set_ip_ver(sdp_mask, 0xF); + sdp_data->dst_addr.ip4.addr = ip4_addr; + sdp_mask->dst_addr.ip4.addr = ~0; + } else { + cma_set_ip_ver(cma_data, 4); + cma_set_ip_ver(cma_mask, 0xF); + cma_data->dst_addr.ip4.addr = ip4_addr; + cma_mask->dst_addr.ip4.addr = ~0; + } + break; + case AF_INET6: + ip6_addr = ((struct sockaddr_in6 *) addr)->sin6_addr; + if (ps == RDMA_PS_SDP) { + sdp_set_ip_ver(sdp_data, 6); + sdp_set_ip_ver(sdp_mask, 0xF); + sdp_data->dst_addr.ip6 = ip6_addr; + memset(&sdp_mask->dst_addr.ip6, 0xFF, + sizeof sdp_mask->dst_addr.ip6); + } else { + cma_set_ip_ver(cma_data, 6); + cma_set_ip_ver(cma_mask, 0xF); + cma_data->dst_addr.ip6 = ip6_addr; + memset(&cma_mask->dst_addr.ip6, 0xFF, + sizeof cma_mask->dst_addr.ip6); + } + break; + default: + break; + } +} + +static int cma_ib_listen(struct rdma_id_private *id_priv) +{ + struct ib_cm_compare_data compare_data; + struct sockaddr *addr; + __be64 svc_id; + int ret; + + id_priv->cm_id.ib = ib_create_cm_id(id_priv->id.device, cma_req_handler, + id_priv); + if (IS_ERR(id_priv->cm_id.ib)) + return PTR_ERR(id_priv->cm_id.ib); + + addr = &id_priv->id.route.addr.src_addr; + svc_id = cma_get_service_id(id_priv->id.ps, addr); + if (cma_any_addr(addr)) + ret = ib_cm_listen(id_priv->cm_id.ib, svc_id, 0, NULL); + else { + cma_set_compare_data(id_priv->id.ps, addr, &compare_data); + ret = ib_cm_listen(id_priv->cm_id.ib, svc_id, 0, &compare_data); + } + + if (ret) { + ib_destroy_cm_id(id_priv->cm_id.ib); + id_priv->cm_id.ib = NULL; + } + + return ret; +} + +static int cma_listen_handler(struct rdma_cm_id *id, + struct rdma_cm_event *event) +{ + struct rdma_id_private *id_priv = id->context; + + id->context = id_priv->id.context; + id->event_handler = id_priv->id.event_handler; + return id_priv->id.event_handler(id, event); +} + +static void cma_listen_on_dev(struct rdma_id_private *id_priv, + struct cma_device *cma_dev) +{ + struct rdma_id_private *dev_id_priv; + struct rdma_cm_id *id; + int ret; + + id = rdma_create_id(cma_listen_handler, id_priv, id_priv->id.ps); + if (IS_ERR(id)) + return; + + dev_id_priv = container_of(id, struct rdma_id_private, id); + + dev_id_priv->state = CMA_ADDR_BOUND; + memcpy(&id->route.addr.src_addr, &id_priv->id.route.addr.src_addr, + ip_addr_size(&id_priv->id.route.addr.src_addr)); + + cma_attach_to_dev(dev_id_priv, cma_dev); + list_add_tail(&dev_id_priv->listen_list, &id_priv->listen_list); + + ret = rdma_listen(id, id_priv->backlog); + if (ret) + goto err; + + return; +err: + cma_destroy_listen(dev_id_priv); +} + +static void cma_listen_on_all(struct rdma_id_private *id_priv) +{ + struct cma_device *cma_dev; + + mutex_lock(&lock); + list_add_tail(&id_priv->list, &listen_any_list); + list_for_each_entry(cma_dev, &dev_list, list) + cma_listen_on_dev(id_priv, cma_dev); + mutex_unlock(&lock); +} + +static int cma_bind_any(struct rdma_cm_id *id, sa_family_t af) +{ + struct sockaddr_in addr_in; + + memset(&addr_in, 0, sizeof addr_in); + addr_in.sin_family = af; + return rdma_bind_addr(id, (struct sockaddr *) &addr_in); +} + +int rdma_listen(struct rdma_cm_id *id, int backlog) +{ + struct rdma_id_private *id_priv; + int ret; + + id_priv = container_of(id, struct rdma_id_private, id); + if (id_priv->state == CMA_IDLE) { + ret = cma_bind_any(id, AF_INET); + if (ret) + return ret; + } + + if (!cma_comp_exch(id_priv, CMA_ADDR_BOUND, CMA_LISTEN)) + return -EINVAL; + + id_priv->backlog = backlog; + if (id->device) { + switch (id->device->node_type) { + case IB_NODE_CA: + ret = cma_ib_listen(id_priv); + if (ret) + goto err; + break; + default: + ret = -ENOSYS; + goto err; + } + } else + cma_listen_on_all(id_priv); + + return 0; +err: + id_priv->backlog = 0; + cma_comp_exch(id_priv, CMA_LISTEN, CMA_ADDR_BOUND); + return ret; +} +EXPORT_SYMBOL(rdma_listen); + +static void cma_query_handler(int status, struct ib_sa_path_rec *path_rec, + void *context) +{ + struct cma_work *work = context; + struct rdma_route *route; + + route = &work->id->id.route; + + if (!status) { + route->num_paths = 1; + *route->path_rec = *path_rec; + } else { + work->old_state = CMA_ROUTE_QUERY; + work->new_state = CMA_ADDR_RESOLVED; + work->event.event = RDMA_CM_EVENT_ROUTE_ERROR; + } + + queue_work(cma_wq, &work->work); +} + +static int cma_query_ib_route(struct rdma_id_private *id_priv, int timeout_ms, + struct cma_work *work) +{ + struct rdma_dev_addr *addr = &id_priv->id.route.addr.dev_addr; + struct ib_sa_path_rec path_rec; + + memset(&path_rec, 0, sizeof path_rec); + path_rec.sgid = *ib_addr_get_sgid(addr); + path_rec.dgid = *ib_addr_get_dgid(addr); + path_rec.pkey = cpu_to_be16(ib_addr_get_pkey(addr)); + path_rec.numb_path = 1; + + id_priv->query_id = ib_sa_path_rec_get(id_priv->id.device, + id_priv->id.port_num, &path_rec, + IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID | + IB_SA_PATH_REC_PKEY | IB_SA_PATH_REC_NUMB_PATH, + timeout_ms, GFP_KERNEL, + cma_query_handler, work, &id_priv->query); + + return (id_priv->query_id < 0) ? id_priv->query_id : 0; +} + +static void cma_work_handler(void *data) +{ + struct cma_work *work = data; + struct rdma_id_private *id_priv = work->id; + int destroy = 0; + + atomic_inc(&id_priv->dev_remove); + if (!cma_comp_exch(id_priv, work->old_state, work->new_state)) + goto out; + + if (id_priv->id.event_handler(&id_priv->id, &work->event)) { + cma_exch(id_priv, CMA_DESTROYING); + destroy = 1; + } +out: + cma_release_remove(id_priv); + cma_deref_id(id_priv); + if (destroy) + rdma_destroy_id(&id_priv->id); + kfree(work); +} + +static int cma_resolve_ib_route(struct rdma_id_private *id_priv, int timeout_ms) +{ + struct rdma_route *route = &id_priv->id.route; + struct cma_work *work; + int ret; + + work = kzalloc(sizeof *work, GFP_KERNEL); + if (!work) + return -ENOMEM; + + work->id = id_priv; + INIT_WORK(&work->work, cma_work_handler, work); + work->old_state = CMA_ROUTE_QUERY; + work->new_state = CMA_ROUTE_RESOLVED; + work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED; + + route->path_rec = kmalloc(sizeof *route->path_rec, GFP_KERNEL); + if (!route->path_rec) { + ret = -ENOMEM; + goto err1; + } + + ret = cma_query_ib_route(id_priv, timeout_ms, work); + if (ret) + goto err2; + + return 0; +err2: + kfree(route->path_rec); + route->path_rec = NULL; +err1: + kfree(work); + return ret; +} + +int rdma_set_ib_paths(struct rdma_cm_id *id, + struct ib_sa_path_rec *path_rec, int num_paths) +{ + struct rdma_id_private *id_priv; + int ret; + + id_priv = container_of(id, struct rdma_id_private, id); + if (!cma_comp_exch(id_priv, CMA_ADDR_RESOLVED, CMA_ROUTE_RESOLVED)) + return -EINVAL; + + id->route.path_rec = kmalloc(sizeof *path_rec * num_paths, GFP_KERNEL); + if (!id->route.path_rec) { + ret = -ENOMEM; + goto err; + } + + memcpy(id->route.path_rec, path_rec, sizeof *path_rec * num_paths); + return 0; +err: + cma_comp_exch(id_priv, CMA_ROUTE_RESOLVED, CMA_ADDR_RESOLVED); + return ret; +} +EXPORT_SYMBOL(rdma_set_ib_paths); + +int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms) +{ + struct rdma_id_private *id_priv; + int ret; + + id_priv = container_of(id, struct rdma_id_private, id); + if (!cma_comp_exch(id_priv, CMA_ADDR_RESOLVED, CMA_ROUTE_QUERY)) + return -EINVAL; + + atomic_inc(&id_priv->refcount); + switch (id->device->node_type) { + case IB_NODE_CA: + ret = cma_resolve_ib_route(id_priv, timeout_ms); + break; + default: + ret = -ENOSYS; + break; + } + if (ret) + goto err; + + return 0; +err: + cma_comp_exch(id_priv, CMA_ROUTE_QUERY, CMA_ADDR_RESOLVED); + cma_deref_id(id_priv); + return ret; +} +EXPORT_SYMBOL(rdma_resolve_route); + +static int cma_bind_loopback(struct rdma_id_private *id_priv) +{ + struct cma_device *cma_dev; + struct ib_port_attr port_attr; + union ib_gid *gid; + u16 pkey; + int ret; + u8 p; + + mutex_lock(&lock); + list_for_each_entry(cma_dev, &dev_list, list) + for (p = 1; p <= cma_dev->device->phys_port_cnt; ++p) + if (!ib_query_port (cma_dev->device, p, &port_attr) && + port_attr.state == IB_PORT_ACTIVE) + goto port_found; + + if (!list_empty(&dev_list)) { + p = 1; + cma_dev = list_entry(dev_list.next, struct cma_device, list); + } else { + ret = -ENODEV; + goto out; + } + +port_found: + gid = ib_addr_get_sgid(&id_priv->id.route.addr.dev_addr); + ret = ib_get_cached_gid(cma_dev->device, p, 0, gid); + if (ret) + goto out; + + ret = ib_get_cached_pkey(cma_dev->device, p, 0, &pkey); + if (ret) + goto out; + + ib_addr_set_pkey(&id_priv->id.route.addr.dev_addr, pkey); + id_priv->id.port_num = p; + cma_attach_to_dev(id_priv, cma_dev); +out: + mutex_unlock(&lock); + return ret; +} + +static void addr_handler(int status, struct sockaddr *src_addr, + struct rdma_dev_addr *dev_addr, void *context) +{ + struct rdma_id_private *id_priv = context; + enum rdma_cm_event_type event; + + atomic_inc(&id_priv->dev_remove); + if (!id_priv->cma_dev && !status) + status = cma_acquire_dev(id_priv); + + if (status) { + if (!cma_comp_exch(id_priv, CMA_ADDR_QUERY, CMA_ADDR_BOUND)) + goto out; + event = RDMA_CM_EVENT_ADDR_ERROR; + } else { + if (!cma_comp_exch(id_priv, CMA_ADDR_QUERY, CMA_ADDR_RESOLVED)) + goto out; + memcpy(&id_priv->id.route.addr.src_addr, src_addr, + ip_addr_size(src_addr)); + event = RDMA_CM_EVENT_ADDR_RESOLVED; + } + + if (cma_notify_user(id_priv, event, status, NULL, 0)) { + cma_exch(id_priv, CMA_DESTROYING); + cma_release_remove(id_priv); + cma_deref_id(id_priv); + rdma_destroy_id(&id_priv->id); + return; + } +out: + cma_release_remove(id_priv); + cma_deref_id(id_priv); +} + +static int cma_resolve_loopback(struct rdma_id_private *id_priv) +{ + struct cma_work *work; + struct sockaddr_in *src_in, *dst_in; + int ret; + + work = kzalloc(sizeof *work, GFP_KERNEL); + if (!work) + return -ENOMEM; + + if (!id_priv->cma_dev) { + ret = cma_bind_loopback(id_priv); + if (ret) + goto err; + } + + ib_addr_set_dgid(&id_priv->id.route.addr.dev_addr, + ib_addr_get_sgid(&id_priv->id.route.addr.dev_addr)); + + if (cma_zero_addr(&id_priv->id.route.addr.src_addr)) { + src_in = (struct sockaddr_in *)&id_priv->id.route.addr.src_addr; + dst_in = (struct sockaddr_in *)&id_priv->id.route.addr.dst_addr; + src_in->sin_family = dst_in->sin_family; + src_in->sin_addr.s_addr = dst_in->sin_addr.s_addr; + } + + work->id = id_priv; + INIT_WORK(&work->work, cma_work_handler, work); + work->old_state = CMA_ADDR_QUERY; + work->new_state = CMA_ADDR_RESOLVED; + work->event.event = RDMA_CM_EVENT_ADDR_RESOLVED; + queue_work(cma_wq, &work->work); + return 0; +err: + kfree(work); + return ret; +} + +static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, + struct sockaddr *dst_addr) +{ + if (src_addr && src_addr->sa_family) + return rdma_bind_addr(id, src_addr); + else + return cma_bind_any(id, dst_addr->sa_family); +} + +int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, + struct sockaddr *dst_addr, int timeout_ms) +{ + struct rdma_id_private *id_priv; + int ret; + + id_priv = container_of(id, struct rdma_id_private, id); + if (id_priv->state == CMA_IDLE) { + ret = cma_bind_addr(id, src_addr, dst_addr); + if (ret) + return ret; + } + + if (!cma_comp_exch(id_priv, CMA_ADDR_BOUND, CMA_ADDR_QUERY)) + return -EINVAL; + + atomic_inc(&id_priv->refcount); + memcpy(&id->route.addr.dst_addr, dst_addr, ip_addr_size(dst_addr)); + if (cma_any_addr(dst_addr)) + ret = cma_resolve_loopback(id_priv); + else + ret = rdma_resolve_ip(&id->route.addr.src_addr, dst_addr, + &id->route.addr.dev_addr, + timeout_ms, addr_handler, id_priv); + if (ret) + goto err; + + return 0; +err: + cma_comp_exch(id_priv, CMA_ADDR_QUERY, CMA_ADDR_BOUND); + cma_deref_id(id_priv); + return ret; +} +EXPORT_SYMBOL(rdma_resolve_addr); + +static void cma_bind_port(struct rdma_bind_list *bind_list, + struct rdma_id_private *id_priv) +{ + struct sockaddr_in *sin; + + sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr; + sin->sin_port = htons(bind_list->port); + id_priv->bind_list = bind_list; + hlist_add_head(&id_priv->node, &bind_list->owners); +} + +static int cma_alloc_port(struct idr *ps, struct rdma_id_private *id_priv, + unsigned short snum) +{ + struct rdma_bind_list *bind_list; + int port, start, ret; + + bind_list = kzalloc(sizeof *bind_list, GFP_KERNEL); + if (!bind_list) + return -ENOMEM; + + start = snum ? snum : sysctl_local_port_range[0]; + + do { + ret = idr_get_new_above(ps, bind_list, start, &port); + } while ((ret == -EAGAIN) && idr_pre_get(ps, GFP_KERNEL)); + + if (ret) + goto err; + + if ((snum && port != snum) || + (!snum && port > sysctl_local_port_range[1])) { + idr_remove(ps, port); + ret = -EADDRNOTAVAIL; + goto err; + } + + bind_list->ps = ps; + bind_list->port = (unsigned short) port; + cma_bind_port(bind_list, id_priv); + return 0; +err: + kfree(bind_list); + return ret; +} + +static int cma_use_port(struct idr *ps, struct rdma_id_private *id_priv) +{ + struct rdma_id_private *cur_id; + struct sockaddr_in *sin, *cur_sin; + struct rdma_bind_list *bind_list; + struct hlist_node *node; + unsigned short snum; + + sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr; + snum = ntohs(sin->sin_port); + if (snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE)) + return -EACCES; + + bind_list = idr_find(ps, snum); + if (!bind_list) + return cma_alloc_port(ps, id_priv, snum); + + /* + * We don't support binding to any address if anyone is bound to + * a specific address on the same port. + */ + if (cma_any_addr(&id_priv->id.route.addr.src_addr)) + return -EADDRNOTAVAIL; + + hlist_for_each_entry(cur_id, node, &bind_list->owners, node) { + if (cma_any_addr(&cur_id->id.route.addr.src_addr)) + return -EADDRNOTAVAIL; + + cur_sin = (struct sockaddr_in *) &cur_id->id.route.addr.src_addr; + if (sin->sin_addr.s_addr == cur_sin->sin_addr.s_addr) + return -EADDRINUSE; + } + + cma_bind_port(bind_list, id_priv); + return 0; +} + +static int cma_get_port(struct rdma_id_private *id_priv) +{ + struct idr *ps; + int ret; + + switch (id_priv->id.ps) { + case RDMA_PS_SDP: + ps = &sdp_ps; + break; + case RDMA_PS_TCP: + ps = &tcp_ps; + break; + default: + return -EPROTONOSUPPORT; + } + + mutex_lock(&lock); + if (cma_any_port(&id_priv->id.route.addr.src_addr)) + ret = cma_alloc_port(ps, id_priv, 0); + else + ret = cma_use_port(ps, id_priv); + mutex_unlock(&lock); + + return ret; +} + +int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr) +{ + struct rdma_id_private *id_priv; + int ret; + + if (addr->sa_family != AF_INET) + return -EAFNOSUPPORT; + + id_priv = container_of(id, struct rdma_id_private, id); + if (!cma_comp_exch(id_priv, CMA_IDLE, CMA_ADDR_BOUND)) + return -EINVAL; + + if (!cma_any_addr(addr)) { + ret = rdma_translate_ip(addr, &id->route.addr.dev_addr); + if (!ret) + ret = cma_acquire_dev(id_priv); + if (ret) + goto err; + } + + memcpy(&id->route.addr.src_addr, addr, ip_addr_size(addr)); + ret = cma_get_port(id_priv); + if (ret) + goto err; + + return 0; +err: + cma_comp_exch(id_priv, CMA_ADDR_BOUND, CMA_IDLE); + return ret; +} +EXPORT_SYMBOL(rdma_bind_addr); + +static int cma_format_hdr(void *hdr, enum rdma_port_space ps, + struct rdma_route *route) +{ + struct sockaddr_in *src4, *dst4; + struct cma_hdr *cma_hdr; + struct sdp_hh *sdp_hdr; + + src4 = (struct sockaddr_in *) &route->addr.src_addr; + dst4 = (struct sockaddr_in *) &route->addr.dst_addr; + + switch (ps) { + case RDMA_PS_SDP: + sdp_hdr = hdr; + if (sdp_get_majv(sdp_hdr->sdp_version) != SDP_MAJ_VERSION) + return -EINVAL; + sdp_set_ip_ver(sdp_hdr, 4); + sdp_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr; + sdp_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr; + sdp_hdr->port = src4->sin_port; + break; + default: + cma_hdr = hdr; + cma_hdr->cma_version = CMA_VERSION; + cma_set_ip_ver(cma_hdr, 4); + cma_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr; + cma_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr; + cma_hdr->port = src4->sin_port; + break; + } + return 0; +} + +static int cma_connect_ib(struct rdma_id_private *id_priv, + struct rdma_conn_param *conn_param) +{ + struct ib_cm_req_param req; + struct rdma_route *route; + void *private_data; + int offset, ret; + + memset(&req, 0, sizeof req); + offset = cma_user_data_offset(id_priv->id.ps); + req.private_data_len = offset + conn_param->private_data_len; + private_data = kzalloc(req.private_data_len, GFP_ATOMIC); + if (!private_data) + return -ENOMEM; + + if (conn_param->private_data && conn_param->private_data_len) + memcpy(private_data + offset, conn_param->private_data, + conn_param->private_data_len); + + id_priv->cm_id.ib = ib_create_cm_id(id_priv->id.device, cma_ib_handler, + id_priv); + if (IS_ERR(id_priv->cm_id.ib)) { + ret = PTR_ERR(id_priv->cm_id.ib); + goto out; + } + + route = &id_priv->id.route; + ret = cma_format_hdr(private_data, id_priv->id.ps, route); + if (ret) + goto out; + req.private_data = private_data; + + req.primary_path = &route->path_rec[0]; + if (route->num_paths == 2) + req.alternate_path = &route->path_rec[1]; + + req.service_id = cma_get_service_id(id_priv->id.ps, + &route->addr.dst_addr); + req.qp_num = id_priv->qp_num; + req.qp_type = id_priv->qp_type; + req.starting_psn = id_priv->seq_num; + req.responder_resources = conn_param->responder_resources; + req.initiator_depth = conn_param->initiator_depth; + req.flow_control = conn_param->flow_control; + req.retry_count = conn_param->retry_count; + req.rnr_retry_count = conn_param->rnr_retry_count; + req.remote_cm_response_timeout = CMA_CM_RESPONSE_TIMEOUT; + req.local_cm_response_timeout = CMA_CM_RESPONSE_TIMEOUT; + req.max_cm_retries = CMA_MAX_CM_RETRIES; + req.srq = id_priv->srq ? 1 : 0; + + ret = ib_send_cm_req(id_priv->cm_id.ib, &req); +out: + kfree(private_data); + return ret; +} + +int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param) +{ + struct rdma_id_private *id_priv; + int ret; + + id_priv = container_of(id, struct rdma_id_private, id); + if (!cma_comp_exch(id_priv, CMA_ROUTE_RESOLVED, CMA_CONNECT)) + return -EINVAL; + + if (!id->qp) { + id_priv->qp_num = conn_param->qp_num; + id_priv->qp_type = conn_param->qp_type; + id_priv->srq = conn_param->srq; + } + + switch (id->device->node_type) { + case IB_NODE_CA: + ret = cma_connect_ib(id_priv, conn_param); + break; + default: + ret = -ENOSYS; + break; + } + if (ret) + goto err; + + return 0; +err: + cma_comp_exch(id_priv, CMA_CONNECT, CMA_ROUTE_RESOLVED); + return ret; +} +EXPORT_SYMBOL(rdma_connect); + +static int cma_accept_ib(struct rdma_id_private *id_priv, + struct rdma_conn_param *conn_param) +{ + struct ib_cm_rep_param rep; + int ret; + + ret = cma_modify_qp_rtr(&id_priv->id); + if (ret) + return ret; + + memset(&rep, 0, sizeof rep); + rep.qp_num = id_priv->qp_num; + rep.starting_psn = id_priv->seq_num; + rep.private_data = conn_param->private_data; + rep.private_data_len = conn_param->private_data_len; + rep.responder_resources = conn_param->responder_resources; + rep.initiator_depth = conn_param->initiator_depth; + rep.target_ack_delay = CMA_CM_RESPONSE_TIMEOUT; + rep.failover_accepted = 0; + rep.flow_control = conn_param->flow_control; + rep.rnr_retry_count = conn_param->rnr_retry_count; + rep.srq = id_priv->srq ? 1 : 0; + + return ib_send_cm_rep(id_priv->cm_id.ib, &rep); +} + +int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param) +{ + struct rdma_id_private *id_priv; + int ret; + + id_priv = container_of(id, struct rdma_id_private, id); + if (!cma_comp(id_priv, CMA_CONNECT)) + return -EINVAL; + + if (!id->qp && conn_param) { + id_priv->qp_num = conn_param->qp_num; + id_priv->qp_type = conn_param->qp_type; + id_priv->srq = conn_param->srq; + } + + switch (id->device->node_type) { + case IB_NODE_CA: + if (conn_param) + ret = cma_accept_ib(id_priv, conn_param); + else + ret = cma_rep_recv(id_priv); + break; + default: + ret = -ENOSYS; + break; + } + + if (ret) + goto reject; + + return 0; +reject: + cma_modify_qp_err(id); + rdma_reject(id, NULL, 0); + return ret; +} +EXPORT_SYMBOL(rdma_accept); + +int rdma_reject(struct rdma_cm_id *id, const void *private_data, + u8 private_data_len) +{ + struct rdma_id_private *id_priv; + int ret; + + id_priv = container_of(id, struct rdma_id_private, id); + if (!cma_comp(id_priv, CMA_CONNECT)) + return -EINVAL; + + switch (id->device->node_type) { + case IB_NODE_CA: + ret = ib_send_cm_rej(id_priv->cm_id.ib, + IB_CM_REJ_CONSUMER_DEFINED, NULL, 0, + private_data, private_data_len); + break; + default: + ret = -ENOSYS; + break; + } + return ret; +} +EXPORT_SYMBOL(rdma_reject); + +int rdma_disconnect(struct rdma_cm_id *id) +{ + struct rdma_id_private *id_priv; + int ret; + + id_priv = container_of(id, struct rdma_id_private, id); + if (!cma_comp(id_priv, CMA_CONNECT) && + !cma_comp(id_priv, CMA_DISCONNECT)) + return -EINVAL; + + ret = cma_modify_qp_err(id); + if (ret) + goto out; + + switch (id->device->node_type) { + case IB_NODE_CA: + /* Initiate or respond to a disconnect. */ + if (ib_send_cm_dreq(id_priv->cm_id.ib, NULL, 0)) + ib_send_cm_drep(id_priv->cm_id.ib, NULL, 0); + break; + default: + break; + } +out: + return ret; +} +EXPORT_SYMBOL(rdma_disconnect); + +static void cma_add_one(struct ib_device *device) +{ + struct cma_device *cma_dev; + struct rdma_id_private *id_priv; + + cma_dev = kmalloc(sizeof *cma_dev, GFP_KERNEL); + if (!cma_dev) + return; + + cma_dev->device = device; + cma_dev->node_guid = device->node_guid; + if (!cma_dev->node_guid) + goto err; + + init_completion(&cma_dev->comp); + atomic_set(&cma_dev->refcount, 1); + INIT_LIST_HEAD(&cma_dev->id_list); + ib_set_client_data(device, &cma_client, cma_dev); + + mutex_lock(&lock); + list_add_tail(&cma_dev->list, &dev_list); + list_for_each_entry(id_priv, &listen_any_list, list) + cma_listen_on_dev(id_priv, cma_dev); + mutex_unlock(&lock); + return; +err: + kfree(cma_dev); +} + +static int cma_remove_id_dev(struct rdma_id_private *id_priv) +{ + enum cma_state state; + + /* Record that we want to remove the device */ + state = cma_exch(id_priv, CMA_DEVICE_REMOVAL); + if (state == CMA_DESTROYING) + return 0; + + cma_cancel_operation(id_priv, state); + wait_event(id_priv->wait_remove, !atomic_read(&id_priv->dev_remove)); + + /* Check for destruction from another callback. */ + if (!cma_comp(id_priv, CMA_DEVICE_REMOVAL)) + return 0; + + return cma_notify_user(id_priv, RDMA_CM_EVENT_DEVICE_REMOVAL, + 0, NULL, 0); +} + +static void cma_process_remove(struct cma_device *cma_dev) +{ + struct list_head remove_list; + struct rdma_id_private *id_priv; + int ret; + + INIT_LIST_HEAD(&remove_list); + + mutex_lock(&lock); + while (!list_empty(&cma_dev->id_list)) { + id_priv = list_entry(cma_dev->id_list.next, + struct rdma_id_private, list); + + if (cma_internal_listen(id_priv)) { + cma_destroy_listen(id_priv); + continue; + } + + list_del(&id_priv->list); + list_add_tail(&id_priv->list, &remove_list); + atomic_inc(&id_priv->refcount); + mutex_unlock(&lock); + + ret = cma_remove_id_dev(id_priv); + cma_deref_id(id_priv); + if (ret) + rdma_destroy_id(&id_priv->id); + + mutex_lock(&lock); + } + mutex_unlock(&lock); + + cma_deref_dev(cma_dev); + wait_for_completion(&cma_dev->comp); +} + +static void cma_remove_one(struct ib_device *device) +{ + struct cma_device *cma_dev; + + cma_dev = ib_get_client_data(device, &cma_client); + if (!cma_dev) + return; + + mutex_lock(&lock); + list_del(&cma_dev->list); + mutex_unlock(&lock); + + cma_process_remove(cma_dev); + kfree(cma_dev); +} + +static int cma_init(void) +{ + int ret; + + cma_wq = create_singlethread_workqueue("rdma_cm_wq"); + if (!cma_wq) + return -ENOMEM; + + ret = ib_register_client(&cma_client); + if (ret) + goto err; + return 0; + +err: + destroy_workqueue(cma_wq); + return ret; +} + +static void cma_cleanup(void) +{ + ib_unregister_client(&cma_client); + destroy_workqueue(cma_wq); + idr_destroy(&sdp_ps); + idr_destroy(&tcp_ps); +} + +module_init(cma_init); +module_exit(cma_cleanup); diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h new file mode 100644 index 000000000000..402c63d7226b --- /dev/null +++ b/include/rdma/rdma_cm.h @@ -0,0 +1,256 @@ +/* + * Copyright (c) 2005 Voltaire Inc. All rights reserved. + * Copyright (c) 2005 Intel Corporation. All rights reserved. + * + * This Software is licensed under one of the following licenses: + * + * 1) under the terms of the "Common Public License 1.0" a copy of which is + * available from the Open Source Initiative, see + * http://www.opensource.org/licenses/cpl.php. + * + * 2) under the terms of the "The BSD License" a copy of which is + * available from the Open Source Initiative, see + * http://www.opensource.org/licenses/bsd-license.php. + * + * 3) under the terms of the "GNU General Public License (GPL) Version 2" a + * copy of which is available from the Open Source Initiative, see + * http://www.opensource.org/licenses/gpl-license.php. + * + * Licensee has the right to choose one of the above licenses. + * + * Redistributions of source code must retain the above copyright + * notice and one of the license notices. + * + * Redistributions in binary form must reproduce both the above copyright + * notice, one of the license notices in the documentation + * and/or other materials provided with the distribution. + * + */ + +#if !defined(RDMA_CM_H) +#define RDMA_CM_H + +#include +#include +#include +#include + +/* + * Upon receiving a device removal event, users must destroy the associated + * RDMA identifier and release all resources allocated with the device. + */ +enum rdma_cm_event_type { + RDMA_CM_EVENT_ADDR_RESOLVED, + RDMA_CM_EVENT_ADDR_ERROR, + RDMA_CM_EVENT_ROUTE_RESOLVED, + RDMA_CM_EVENT_ROUTE_ERROR, + RDMA_CM_EVENT_CONNECT_REQUEST, + RDMA_CM_EVENT_CONNECT_RESPONSE, + RDMA_CM_EVENT_CONNECT_ERROR, + RDMA_CM_EVENT_UNREACHABLE, + RDMA_CM_EVENT_REJECTED, + RDMA_CM_EVENT_ESTABLISHED, + RDMA_CM_EVENT_DISCONNECTED, + RDMA_CM_EVENT_DEVICE_REMOVAL, +}; + +enum rdma_port_space { + RDMA_PS_SDP = 0x0001, + RDMA_PS_TCP = 0x0106, + RDMA_PS_UDP = 0x0111, + RDMA_PS_SCTP = 0x0183 +}; + +struct rdma_addr { + struct sockaddr src_addr; + u8 src_pad[sizeof(struct sockaddr_in6) - + sizeof(struct sockaddr)]; + struct sockaddr dst_addr; + u8 dst_pad[sizeof(struct sockaddr_in6) - + sizeof(struct sockaddr)]; + struct rdma_dev_addr dev_addr; +}; + +struct rdma_route { + struct rdma_addr addr; + struct ib_sa_path_rec *path_rec; + int num_paths; +}; + +struct rdma_cm_event { + enum rdma_cm_event_type event; + int status; + void *private_data; + u8 private_data_len; +}; + +struct rdma_cm_id; + +/** + * rdma_cm_event_handler - Callback used to report user events. + * + * Notes: Users may not call rdma_destroy_id from this callback to destroy + * the passed in id, or a corresponding listen id. Returning a + * non-zero value from the callback will destroy the passed in id. + */ +typedef int (*rdma_cm_event_handler)(struct rdma_cm_id *id, + struct rdma_cm_event *event); + +struct rdma_cm_id { + struct ib_device *device; + void *context; + struct ib_qp *qp; + rdma_cm_event_handler event_handler; + struct rdma_route route; + enum rdma_port_space ps; + u8 port_num; +}; + +/** + * rdma_create_id - Create an RDMA identifier. + * + * @event_handler: User callback invoked to report events associated with the + * returned rdma_id. + * @context: User specified context associated with the id. + * @ps: RDMA port space. + */ +struct rdma_cm_id *rdma_create_id(rdma_cm_event_handler event_handler, + void *context, enum rdma_port_space ps); + +void rdma_destroy_id(struct rdma_cm_id *id); + +/** + * rdma_bind_addr - Bind an RDMA identifier to a source address and + * associated RDMA device, if needed. + * + * @id: RDMA identifier. + * @addr: Local address information. Wildcard values are permitted. + * + * This associates a source address with the RDMA identifier before calling + * rdma_listen. If a specific local address is given, the RDMA identifier will + * be bound to a local RDMA device. + */ +int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr); + +/** + * rdma_resolve_addr - Resolve destination and optional source addresses + * from IP addresses to an RDMA address. If successful, the specified + * rdma_cm_id will be bound to a local device. + * + * @id: RDMA identifier. + * @src_addr: Source address information. This parameter may be NULL. + * @dst_addr: Destination address information. + * @timeout_ms: Time to wait for resolution to complete. + */ +int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, + struct sockaddr *dst_addr, int timeout_ms); + +/** + * rdma_resolve_route - Resolve the RDMA address bound to the RDMA identifier + * into route information needed to establish a connection. + * + * This is called on the client side of a connection. + * Users must have first called rdma_resolve_addr to resolve a dst_addr + * into an RDMA address before calling this routine. + */ +int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms); + +/** + * rdma_create_qp - Allocate a QP and associate it with the specified RDMA + * identifier. + * + * QPs allocated to an rdma_cm_id will automatically be transitioned by the CMA + * through their states. + */ +int rdma_create_qp(struct rdma_cm_id *id, struct ib_pd *pd, + struct ib_qp_init_attr *qp_init_attr); + +/** + * rdma_destroy_qp - Deallocate the QP associated with the specified RDMA + * identifier. + * + * Users must destroy any QP associated with an RDMA identifier before + * destroying the RDMA ID. + */ +void rdma_destroy_qp(struct rdma_cm_id *id); + +/** + * rdma_init_qp_attr - Initializes the QP attributes for use in transitioning + * to a specified QP state. + * @id: Communication identifier associated with the QP attributes to + * initialize. + * @qp_attr: On input, specifies the desired QP state. On output, the + * mandatory and desired optional attributes will be set in order to + * modify the QP to the specified state. + * @qp_attr_mask: The QP attribute mask that may be used to transition the + * QP to the specified state. + * + * Users must set the @qp_attr->qp_state to the desired QP state. This call + * will set all required attributes for the given transition, along with + * known optional attributes. Users may override the attributes returned from + * this call before calling ib_modify_qp. + * + * Users that wish to have their QP automatically transitioned through its + * states can associate a QP with the rdma_cm_id by calling rdma_create_qp(). + */ +int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr, + int *qp_attr_mask); + +struct rdma_conn_param { + const void *private_data; + u8 private_data_len; + u8 responder_resources; + u8 initiator_depth; + u8 flow_control; + u8 retry_count; /* ignored when accepting */ + u8 rnr_retry_count; + /* Fields below ignored if a QP is created on the rdma_cm_id. */ + u8 srq; + u32 qp_num; + enum ib_qp_type qp_type; +}; + +/** + * rdma_connect - Initiate an active connection request. + * + * Users must have resolved a route for the rdma_cm_id to connect with + * by having called rdma_resolve_route before calling this routine. + */ +int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param); + +/** + * rdma_listen - This function is called by the passive side to + * listen for incoming connection requests. + * + * Users must have bound the rdma_cm_id to a local address by calling + * rdma_bind_addr before calling this routine. + */ +int rdma_listen(struct rdma_cm_id *id, int backlog); + +/** + * rdma_accept - Called to accept a connection request or response. + * @id: Connection identifier associated with the request. + * @conn_param: Information needed to establish the connection. This must be + * provided if accepting a connection request. If accepting a connection + * response, this parameter must be NULL. + * + * Typically, this routine is only called by the listener to accept a connection + * request. It must also be called on the active side of a connection if the + * user is performing their own QP transitions. + */ +int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param); + +/** + * rdma_reject - Called to reject a connection request or response. + */ +int rdma_reject(struct rdma_cm_id *id, const void *private_data, + u8 private_data_len); + +/** + * rdma_disconnect - This function disconnects the associated QP and + * transitions it into the error state. + */ +int rdma_disconnect(struct rdma_cm_id *id); + +#endif /* RDMA_CM_H */ + diff --git a/include/rdma/rdma_cm_ib.h b/include/rdma/rdma_cm_ib.h new file mode 100644 index 000000000000..e8c3af1804d4 --- /dev/null +++ b/include/rdma/rdma_cm_ib.h @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2006 Intel Corporation. All rights reserved. + * + * This Software is licensed under one of the following licenses: + * + * 1) under the terms of the "Common Public License 1.0" a copy of which is + * available from the Open Source Initiative, see + * http://www.opensource.org/licenses/cpl.php. + * + * 2) under the terms of the "The BSD License" a copy of which is + * available from the Open Source Initiative, see + * http://www.opensource.org/licenses/bsd-license.php. + * + * 3) under the terms of the "GNU General Public License (GPL) Version 2" a + * copy of which is available from the Open Source Initiative, see + * http://www.opensource.org/licenses/gpl-license.php. + * + * Licensee has the right to choose one of the above licenses. + * + * Redistributions of source code must retain the above copyright + * notice and one of the license notices. + * + * Redistributions in binary form must reproduce both the above copyright + * notice, one of the license notices in the documentation + * and/or other materials provided with the distribution. + * + */ + +#if !defined(RDMA_CM_IB_H) +#define RDMA_CM_IB_H + +#include + +/** + * rdma_set_ib_paths - Manually sets the path records used to establish a + * connection. + * @id: Connection identifier associated with the request. + * @path_rec: Reference to the path record + * + * This call permits a user to specify routing information for rdma_cm_id's + * bound to Infiniband devices. It is called on the client side of a + * connection and replaces the call to rdma_resolve_route. + */ +int rdma_set_ib_paths(struct rdma_cm_id *id, + struct ib_sa_path_rec *path_rec, int num_paths); + +#endif /* RDMA_CM_IB_H */ -- cgit v1.2.3 From 6fb9cdbf2cdb2ea187e57ec2e16cc59df2adf86a Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Sat, 17 Jun 2006 20:37:34 -0700 Subject: IB: Add caching of ports' LMC Add an LMC cache to struct ib_device, and add a function ib_get_cached_lmc() to query the cache. Signed-off-by: Jack Morgenstein Signed-off-by: Sean Hefty Signed-off-by: Roland Dreier --- drivers/infiniband/core/cache.c | 30 +++++++++++++++++++++++++++++- include/rdma/ib_cache.h | 13 +++++++++++++ include/rdma/ib_verbs.h | 1 + 3 files changed, 43 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c index 50364c0b090c..e05ca2cdc73f 100644 --- a/drivers/infiniband/core/cache.c +++ b/drivers/infiniband/core/cache.c @@ -191,6 +191,24 @@ int ib_find_cached_pkey(struct ib_device *device, } EXPORT_SYMBOL(ib_find_cached_pkey); +int ib_get_cached_lmc(struct ib_device *device, + u8 port_num, + u8 *lmc) +{ + unsigned long flags; + int ret = 0; + + if (port_num < start_port(device) || port_num > end_port(device)) + return -EINVAL; + + read_lock_irqsave(&device->cache.lock, flags); + *lmc = device->cache.lmc_cache[port_num - start_port(device)]; + read_unlock_irqrestore(&device->cache.lock, flags); + + return ret; +} +EXPORT_SYMBOL(ib_get_cached_lmc); + static void ib_cache_update(struct ib_device *device, u8 port) { @@ -251,6 +269,8 @@ static void ib_cache_update(struct ib_device *device, device->cache.pkey_cache[port - start_port(device)] = pkey_cache; device->cache.gid_cache [port - start_port(device)] = gid_cache; + device->cache.lmc_cache[port - start_port(device)] = tprops->lmc; + write_unlock_irq(&device->cache.lock); kfree(old_pkey_cache); @@ -305,7 +325,13 @@ static void ib_cache_setup_one(struct ib_device *device) kmalloc(sizeof *device->cache.gid_cache * (end_port(device) - start_port(device) + 1), GFP_KERNEL); - if (!device->cache.pkey_cache || !device->cache.gid_cache) { + device->cache.lmc_cache = kmalloc(sizeof *device->cache.lmc_cache * + (end_port(device) - + start_port(device) + 1), + GFP_KERNEL); + + if (!device->cache.pkey_cache || !device->cache.gid_cache || + !device->cache.lmc_cache) { printk(KERN_WARNING "Couldn't allocate cache " "for %s\n", device->name); goto err; @@ -333,6 +359,7 @@ err_cache: err: kfree(device->cache.pkey_cache); kfree(device->cache.gid_cache); + kfree(device->cache.lmc_cache); } static void ib_cache_cleanup_one(struct ib_device *device) @@ -349,6 +376,7 @@ static void ib_cache_cleanup_one(struct ib_device *device) kfree(device->cache.pkey_cache); kfree(device->cache.gid_cache); + kfree(device->cache.lmc_cache); } static struct ib_client cache_client = { diff --git a/include/rdma/ib_cache.h b/include/rdma/ib_cache.h index 5bf9834f7dca..f179d233ffc3 100644 --- a/include/rdma/ib_cache.h +++ b/include/rdma/ib_cache.h @@ -102,4 +102,17 @@ int ib_find_cached_pkey(struct ib_device *device, u16 pkey, u16 *index); +/** + * ib_get_cached_lmc - Returns a cached lmc table entry + * @device: The device to query. + * @port_num: The port number of the device to query. + * @lmc: The lmc value for the specified port for that device. + * + * ib_get_cached_lmc() fetches the specified lmc table entry stored in + * the local software cache. + */ +int ib_get_cached_lmc(struct ib_device *device, + u8 port_num, + u8 *lmc); + #endif /* _IB_CACHE_H */ diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 6bbf1b364400..aeb4fcd86a9e 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -827,6 +827,7 @@ struct ib_cache { struct ib_event_handler event_handler; struct ib_pkey_cache **pkey_cache; struct ib_gid_cache **gid_cache; + u8 *lmc_cache; }; struct ib_device { -- cgit v1.2.3 From 63942c9a981ecfa2aabfb27ff1a87b88f2ee9f5b Mon Sep 17 00:00:00 2001 From: Leonid Arsh Date: Sat, 17 Jun 2006 20:37:35 -0700 Subject: IB: Add client reregister event type Add IB_EVENT_CLIENT_REREGISTER to enum so low-level drivers can generate "client reregister" events. Signed-off-by: Leonid Arsh Signed-off-by: Roland Dreier --- include/rdma/ib_verbs.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index aeb4fcd86a9e..10a6268c2cc2 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -260,7 +260,8 @@ enum ib_event_type { IB_EVENT_SM_CHANGE, IB_EVENT_SRQ_ERR, IB_EVENT_SRQ_LIMIT_REACHED, - IB_EVENT_QP_LAST_WQE_REACHED + IB_EVENT_QP_LAST_WQE_REACHED, + IB_EVENT_CLIENT_REREGISTER }; struct ib_event { -- cgit v1.2.3 From da2ab62ab5e430e6ffafc2d0e6046dcd2780f570 Mon Sep 17 00:00:00 2001 From: Leonid Arsh Date: Sat, 17 Jun 2006 20:37:36 -0700 Subject: IB: Move struct port_info from ipath to Move ipath's struct port_info into , so that it can be used by mthca to implement client reregister support. Remove the __attribute__((packed)) because all the members of the struct are naturally aligned anyway. Signed-off-by: Leonid Arsh Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ipath/ipath_mad.c | 40 ++------------------------------- include/rdma/ib_smi.h | 36 +++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+), 38 deletions(-) (limited to 'include') diff --git a/drivers/infiniband/hw/ipath/ipath_mad.c b/drivers/infiniband/hw/ipath/ipath_mad.c index f7f8391fe43f..49acd1e15707 100644 --- a/drivers/infiniband/hw/ipath/ipath_mad.c +++ b/drivers/infiniband/hw/ipath/ipath_mad.c @@ -137,47 +137,11 @@ static int recv_subn_get_guidinfo(struct ib_smp *smp, return reply(smp); } -struct port_info { - __be64 mkey; - __be64 gid_prefix; - __be16 lid; - __be16 sm_lid; - __be32 cap_mask; - __be16 diag_code; - __be16 mkey_lease_period; - u8 local_port_num; - u8 link_width_enabled; - u8 link_width_supported; - u8 link_width_active; - u8 linkspeed_portstate; /* 4 bits, 4 bits */ - u8 portphysstate_linkdown; /* 4 bits, 4 bits */ - u8 mkeyprot_resv_lmc; /* 2 bits, 3, 3 */ - u8 linkspeedactive_enabled; /* 4 bits, 4 bits */ - u8 neighbormtu_mastersmsl; /* 4 bits, 4 bits */ - u8 vlcap_inittype; /* 4 bits, 4 bits */ - u8 vl_high_limit; - u8 vl_arb_high_cap; - u8 vl_arb_low_cap; - u8 inittypereply_mtucap; /* 4 bits, 4 bits */ - u8 vlstallcnt_hoqlife; /* 3 bits, 5 bits */ - u8 operationalvl_pei_peo_fpi_fpo; /* 4 bits, 1, 1, 1, 1 */ - __be16 mkey_violations; - __be16 pkey_violations; - __be16 qkey_violations; - u8 guid_cap; - u8 clientrereg_resv_subnetto; /* 1 bit, 2 bits, 5 */ - u8 resv_resptimevalue; /* 3 bits, 5 bits */ - u8 localphyerrors_overrunerrors; /* 4 bits, 4 bits */ - __be16 max_credit_hint; - u8 resv; - u8 link_roundtrip_latency[3]; -} __attribute__ ((packed)); - static int recv_subn_get_portinfo(struct ib_smp *smp, struct ib_device *ibdev, u8 port) { struct ipath_ibdev *dev; - struct port_info *pip = (struct port_info *)smp->data; + struct ib_port_info *pip = (struct ib_port_info *)smp->data; u16 lid; u8 ibcstat; u8 mtu; @@ -312,7 +276,7 @@ static int recv_subn_set_guidinfo(struct ib_smp *smp, static int recv_subn_set_portinfo(struct ib_smp *smp, struct ib_device *ibdev, u8 port) { - struct port_info *pip = (struct port_info *)smp->data; + struct ib_port_info *pip = (struct ib_port_info *)smp->data; struct ib_event event; struct ipath_ibdev *dev; u32 flags; diff --git a/include/rdma/ib_smi.h b/include/rdma/ib_smi.h index 87f60737f695..f29af135ba83 100644 --- a/include/rdma/ib_smi.h +++ b/include/rdma/ib_smi.h @@ -85,6 +85,42 @@ struct ib_smp { #define IB_SMP_ATTR_LED_INFO __constant_htons(0x0031) #define IB_SMP_ATTR_VENDOR_MASK __constant_htons(0xFF00) +struct ib_port_info { + __be64 mkey; + __be64 gid_prefix; + __be16 lid; + __be16 sm_lid; + __be32 cap_mask; + __be16 diag_code; + __be16 mkey_lease_period; + u8 local_port_num; + u8 link_width_enabled; + u8 link_width_supported; + u8 link_width_active; + u8 linkspeed_portstate; /* 4 bits, 4 bits */ + u8 portphysstate_linkdown; /* 4 bits, 4 bits */ + u8 mkeyprot_resv_lmc; /* 2 bits, 3, 3 */ + u8 linkspeedactive_enabled; /* 4 bits, 4 bits */ + u8 neighbormtu_mastersmsl; /* 4 bits, 4 bits */ + u8 vlcap_inittype; /* 4 bits, 4 bits */ + u8 vl_high_limit; + u8 vl_arb_high_cap; + u8 vl_arb_low_cap; + u8 inittypereply_mtucap; /* 4 bits, 4 bits */ + u8 vlstallcnt_hoqlife; /* 3 bits, 5 bits */ + u8 operationalvl_pei_peo_fpi_fpo; /* 4 bits, 1, 1, 1, 1 */ + __be16 mkey_violations; + __be16 pkey_violations; + __be16 qkey_violations; + u8 guid_cap; + u8 clientrereg_resv_subnetto; /* 1 bit, 2 bits, 5 */ + u8 resv_resptimevalue; /* 3 bits, 5 bits */ + u8 localphyerrors_overrunerrors; /* 4 bits, 4 bits */ + __be16 max_credit_hint; + u8 resv; + u8 link_roundtrip_latency[3]; +}; + static inline u8 ib_get_smp_direction(struct ib_smp *smp) { -- cgit v1.2.3 From 73c0996b1ca60338fa50e42acfcebd32b7636a8b Mon Sep 17 00:00:00 2001 From: Ramachandra K Date: Sat, 17 Jun 2006 20:37:38 -0700 Subject: [SCSI] srp.h: Add I/O Class values Add enum values for I/O Class values from rev. 10 and rev. 16a SRP drafts. The values are used to detect targets that implement obsolete revisions of SRP, so that the initiator can use the old format for port identifier when connecting to them. Signed-off-by: Ramachandra K Signed-off-by: Roland Dreier --- include/scsi/srp.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/scsi/srp.h b/include/scsi/srp.h index 637f77eccf0c..ad178fa78f66 100644 --- a/include/scsi/srp.h +++ b/include/scsi/srp.h @@ -87,6 +87,11 @@ enum srp_login_rej_reason { SRP_LOGIN_REJ_CHANNEL_LIMIT_REACHED = 0x00010006 }; +enum { + SRP_REV10_IB_IO_CLASS = 0xff00, + SRP_REV16A_IB_IO_CLASS = 0x0100 +}; + struct srp_direct_buf { __be64 va; __be32 key; -- cgit v1.2.3 From 75af9088514432ef0c1052ba3767ceb0beb6f101 Mon Sep 17 00:00:00 2001 From: Sean Hefty Date: Sat, 17 Jun 2006 20:37:39 -0700 Subject: IB/ucm: Get rid of duplicate P_Key parameter The P_Key is provided into a SIDR REQ in two places, once as a parameter, and again in the path record. Remove the P_Key as a parameter and always use the one given in the path record. This change has no practical effect on ABI functionality. Signed-off-by: Sean Hefty Signed-off-by: Roland Dreier --- drivers/infiniband/core/cm.c | 2 +- drivers/infiniband/core/ucm.c | 1 - include/rdma/ib_cm.h | 3 +-- include/rdma/ib_user_cm.h | 2 +- 4 files changed, 3 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 1c7463b7c5fd..629ed26a0176 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -2615,7 +2615,7 @@ static void cm_format_sidr_req(struct cm_sidr_req_msg *sidr_req_msg, cm_format_mad_hdr(&sidr_req_msg->hdr, CM_SIDR_REQ_ATTR_ID, cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_SIDR)); sidr_req_msg->request_id = cm_id_priv->id.local_id; - sidr_req_msg->pkey = cpu_to_be16(param->pkey); + sidr_req_msg->pkey = cpu_to_be16(param->path->pkey); sidr_req_msg->service_id = param->service_id; if (param->private_data && param->private_data_len) diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c index 67caf36504ee..c1c6fda9452c 100644 --- a/drivers/infiniband/core/ucm.c +++ b/drivers/infiniband/core/ucm.c @@ -1044,7 +1044,6 @@ static ssize_t ib_ucm_send_sidr_req(struct ib_ucm_file *file, param.service_id = cmd.sid; param.timeout_ms = cmd.timeout; param.max_cm_retries = cmd.max_cm_retries; - param.pkey = cmd.pkey; ctx = ib_ucm_ctx_get(file, cmd.id); if (!IS_ERR(ctx)) { diff --git a/include/rdma/ib_cm.h b/include/rdma/ib_cm.h index 8f394f035684..c9b4738be9d6 100644 --- a/include/rdma/ib_cm.h +++ b/include/rdma/ib_cm.h @@ -546,7 +546,6 @@ struct ib_cm_sidr_req_param { const void *private_data; u8 private_data_len; u8 max_cm_retries; - u16 pkey; }; /** @@ -570,7 +569,7 @@ struct ib_cm_sidr_rep_param { }; /** - * ib_send_cm_sidr_rep - Sends a service ID resolution request to the + * ib_send_cm_sidr_rep - Sends a service ID resolution reply to the * remote node. * @cm_id: Communication identifier associated with the received service ID * resolution request. diff --git a/include/rdma/ib_user_cm.h b/include/rdma/ib_user_cm.h index a9e1b22d245c..066c20b7cdfb 100644 --- a/include/rdma/ib_user_cm.h +++ b/include/rdma/ib_user_cm.h @@ -200,7 +200,7 @@ struct ib_ucm_sidr_req { __be64 sid; __u64 data; __u64 path; - __u16 pkey; + __u16 reserved_pkey; __u8 len; __u8 max_cm_retries; __u8 reserved[4]; -- cgit v1.2.3 From 4e00d69454a8747798de11dc4eeef1edeee5ce98 Mon Sep 17 00:00:00 2001 From: Sean Hefty Date: Sat, 17 Jun 2006 20:37:39 -0700 Subject: IB: Add ib_init_ah_from_wc() Add a function to initialize address handle attributes from a work completion. This functionality is duplicated by both verbs and the CM. Signed-off-by: Sean Hefty Signed-off-by: Roland Dreier --- drivers/infiniband/core/verbs.c | 44 ++++++++++++++++++++++++++--------------- include/rdma/ib_verbs.h | 14 +++++++++++++ 2 files changed, 42 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index b78e7dc69330..468999c38803 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -125,35 +125,47 @@ struct ib_ah *ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr) } EXPORT_SYMBOL(ib_create_ah); -struct ib_ah *ib_create_ah_from_wc(struct ib_pd *pd, struct ib_wc *wc, - struct ib_grh *grh, u8 port_num) +int ib_init_ah_from_wc(struct ib_device *device, u8 port_num, struct ib_wc *wc, + struct ib_grh *grh, struct ib_ah_attr *ah_attr) { - struct ib_ah_attr ah_attr; u32 flow_class; u16 gid_index; int ret; - memset(&ah_attr, 0, sizeof ah_attr); - ah_attr.dlid = wc->slid; - ah_attr.sl = wc->sl; - ah_attr.src_path_bits = wc->dlid_path_bits; - ah_attr.port_num = port_num; + memset(ah_attr, 0, sizeof *ah_attr); + ah_attr->dlid = wc->slid; + ah_attr->sl = wc->sl; + ah_attr->src_path_bits = wc->dlid_path_bits; + ah_attr->port_num = port_num; if (wc->wc_flags & IB_WC_GRH) { - ah_attr.ah_flags = IB_AH_GRH; - ah_attr.grh.dgid = grh->sgid; + ah_attr->ah_flags = IB_AH_GRH; + ah_attr->grh.dgid = grh->sgid; - ret = ib_find_cached_gid(pd->device, &grh->dgid, &port_num, + ret = ib_find_cached_gid(device, &grh->dgid, &port_num, &gid_index); if (ret) - return ERR_PTR(ret); + return ret; - ah_attr.grh.sgid_index = (u8) gid_index; + ah_attr->grh.sgid_index = (u8) gid_index; flow_class = be32_to_cpu(grh->version_tclass_flow); - ah_attr.grh.flow_label = flow_class & 0xFFFFF; - ah_attr.grh.traffic_class = (flow_class >> 20) & 0xFF; - ah_attr.grh.hop_limit = grh->hop_limit; + ah_attr->grh.flow_label = flow_class & 0xFFFFF; + ah_attr->grh.hop_limit = grh->hop_limit; + ah_attr->grh.traffic_class = (flow_class >> 20) & 0xFF; } + return 0; +} +EXPORT_SYMBOL(ib_init_ah_from_wc); + +struct ib_ah *ib_create_ah_from_wc(struct ib_pd *pd, struct ib_wc *wc, + struct ib_grh *grh, u8 port_num) +{ + struct ib_ah_attr ah_attr; + int ret; + + ret = ib_init_ah_from_wc(pd->device, port_num, wc, grh, &ah_attr); + if (ret) + return ERR_PTR(ret); return ib_create_ah(pd, &ah_attr); } diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 10a6268c2cc2..7ced208edacf 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1087,6 +1087,20 @@ int ib_dealloc_pd(struct ib_pd *pd); */ struct ib_ah *ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr); +/** + * ib_init_ah_from_wc - Initializes address handle attributes from a + * work completion. + * @device: Device on which the received message arrived. + * @port_num: Port on which the received message arrived. + * @wc: Work completion associated with the received message. + * @grh: References the received global route header. This parameter is + * ignored unless the work completion indicates that the GRH is valid. + * @ah_attr: Returned attributes that can be used when creating an address + * handle for replying to the message. + */ +int ib_init_ah_from_wc(struct ib_device *device, u8 port_num, struct ib_wc *wc, + struct ib_grh *grh, struct ib_ah_attr *ah_attr); + /** * ib_create_ah_from_wc - Creates an address handle associated with the * sender of the specified work completion. -- cgit v1.2.3 From 6d969a471ba107d94cf03dab3c69f45b9733f500 Mon Sep 17 00:00:00 2001 From: Sean Hefty Date: Sat, 17 Jun 2006 20:37:39 -0700 Subject: IB/sa: Add ib_init_ah_from_path() Add a call to initialize address handle attributes given a path record. This is used by the CM, and would be useful for users of UD QPs. Signed-off-by: Sean Hefty Signed-off-by: Roland Dreier --- drivers/infiniband/core/sa_query.c | 31 +++++++++++++++++++++++++++++++ include/rdma/ib_sa.h | 7 +++++++ 2 files changed, 38 insertions(+) (limited to 'include') diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index 501cc054cb3b..08d9dd5e0487 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -47,6 +47,7 @@ #include #include +#include MODULE_AUTHOR("Roland Dreier"); MODULE_DESCRIPTION("InfiniBand subnet administration query support"); @@ -441,6 +442,36 @@ void ib_sa_cancel_query(int id, struct ib_sa_query *query) } EXPORT_SYMBOL(ib_sa_cancel_query); +int ib_init_ah_from_path(struct ib_device *device, u8 port_num, + struct ib_sa_path_rec *rec, struct ib_ah_attr *ah_attr) +{ + int ret; + u16 gid_index; + + memset(ah_attr, 0, sizeof *ah_attr); + ah_attr->dlid = be16_to_cpu(rec->dlid); + ah_attr->sl = rec->sl; + ah_attr->src_path_bits = be16_to_cpu(rec->slid) & 0x7f; + ah_attr->port_num = port_num; + + if (rec->hop_limit > 1) { + ah_attr->ah_flags = IB_AH_GRH; + ah_attr->grh.dgid = rec->dgid; + + ret = ib_find_cached_gid(device, &rec->sgid, &port_num, + &gid_index); + if (ret) + return ret; + + ah_attr->grh.sgid_index = gid_index; + ah_attr->grh.flow_label = be32_to_cpu(rec->flow_label); + ah_attr->grh.hop_limit = rec->hop_limit; + ah_attr->grh.traffic_class = rec->traffic_class; + } + return 0; +} +EXPORT_SYMBOL(ib_init_ah_from_path); + static void init_mad(struct ib_sa_mad *mad, struct ib_mad_agent *agent) { unsigned long flags; diff --git a/include/rdma/ib_sa.h b/include/rdma/ib_sa.h index ad63c215efe5..c99e4420fd7e 100644 --- a/include/rdma/ib_sa.h +++ b/include/rdma/ib_sa.h @@ -370,5 +370,12 @@ ib_sa_mcmember_rec_delete(struct ib_device *device, u8 port_num, context, query); } +/** + * ib_init_ah_from_path - Initialize address handle attributes based on an SA + * path record. + */ +int ib_init_ah_from_path(struct ib_device *device, u8 port_num, + struct ib_sa_path_rec *rec, + struct ib_ah_attr *ah_attr); #endif /* IB_SA_H */ -- cgit v1.2.3 From 9ead190bfde2a434c74ea604382d08acb2eceef5 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Sat, 17 Jun 2006 20:44:49 -0700 Subject: IB/uverbs: Don't serialize with ib_uverbs_idr_mutex Currently, all userspace verbs operations that call into the kernel are serialized by ib_uverbs_idr_mutex. This can be a scalability issue for some workloads, especially for devices driven by the ipath driver, which needs to call into the kernel even for datapath operations. Fix this by adding reference counts to the userspace objects, and then converting ib_uverbs_idr_mutex into a spinlock that only protects the idrs long enough to take a reference on the object being looked up. Because remove operations may fail, we have to do a slightly funky two-step deletion, which is described in the comments at the top of uverbs_cmd.c. This also still leaves ib_uverbs_idr_lock as a single lock that is possibly subject to contention. However, the lock hold time will only be a single idr operation, so multiple threads should still be able to make progress, even if ib_uverbs_idr_lock is being ping-ponged. Surprisingly, these changes even shrink the object code: add/remove: 23/5 grow/shrink: 4/21 up/down: 633/-693 (-60) Signed-off-by: Roland Dreier --- drivers/infiniband/core/uverbs.h | 4 +- drivers/infiniband/core/uverbs_cmd.c | 890 ++++++++++++++++++++-------------- drivers/infiniband/core/uverbs_main.c | 35 +- include/rdma/ib_verbs.h | 4 + 4 files changed, 557 insertions(+), 376 deletions(-) (limited to 'include') diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index 3372d67ff139..bb9bee56a824 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -132,7 +132,7 @@ struct ib_ucq_object { u32 async_events_reported; }; -extern struct mutex ib_uverbs_idr_mutex; +extern spinlock_t ib_uverbs_idr_lock; extern struct idr ib_uverbs_pd_idr; extern struct idr ib_uverbs_mr_idr; extern struct idr ib_uverbs_mw_idr; @@ -141,6 +141,8 @@ extern struct idr ib_uverbs_cq_idr; extern struct idr ib_uverbs_qp_idr; extern struct idr ib_uverbs_srq_idr; +void idr_remove_uobj(struct idr *idp, struct ib_uobject *uobj); + struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file, int is_async, int *fd); void ib_uverbs_release_event_file(struct kref *ref); diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 403dd811ec7f..76bf61e9b552 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -50,7 +50,64 @@ (udata)->outlen = (olen); \ } while (0) -static int idr_add_uobj(struct idr *idr, void *obj, struct ib_uobject *uobj) +/* + * The ib_uobject locking scheme is as follows: + * + * - ib_uverbs_idr_lock protects the uverbs idrs themselves, so it + * needs to be held during all idr operations. When an object is + * looked up, a reference must be taken on the object's kref before + * dropping this lock. + * + * - Each object also has an rwsem. This rwsem must be held for + * reading while an operation that uses the object is performed. + * For example, while registering an MR, the associated PD's + * uobject.mutex must be held for reading. The rwsem must be held + * for writing while initializing or destroying an object. + * + * - In addition, each object has a "live" flag. If this flag is not + * set, then lookups of the object will fail even if it is found in + * the idr. This handles a reader that blocks and does not acquire + * the rwsem until after the object is destroyed. The destroy + * operation will set the live flag to 0 and then drop the rwsem; + * this will allow the reader to acquire the rwsem, see that the + * live flag is 0, and then drop the rwsem and its reference to + * object. The underlying storage will not be freed until the last + * reference to the object is dropped. + */ + +static void init_uobj(struct ib_uobject *uobj, u64 user_handle, + struct ib_ucontext *context) +{ + uobj->user_handle = user_handle; + uobj->context = context; + kref_init(&uobj->ref); + init_rwsem(&uobj->mutex); + uobj->live = 0; +} + +static void release_uobj(struct kref *kref) +{ + kfree(container_of(kref, struct ib_uobject, ref)); +} + +static void put_uobj(struct ib_uobject *uobj) +{ + kref_put(&uobj->ref, release_uobj); +} + +static void put_uobj_read(struct ib_uobject *uobj) +{ + up_read(&uobj->mutex); + put_uobj(uobj); +} + +static void put_uobj_write(struct ib_uobject *uobj) +{ + up_write(&uobj->mutex); + put_uobj(uobj); +} + +static int idr_add_uobj(struct idr *idr, struct ib_uobject *uobj) { int ret; @@ -58,7 +115,9 @@ retry: if (!idr_pre_get(idr, GFP_KERNEL)) return -ENOMEM; + spin_lock(&ib_uverbs_idr_lock); ret = idr_get_new(idr, uobj, &uobj->id); + spin_unlock(&ib_uverbs_idr_lock); if (ret == -EAGAIN) goto retry; @@ -66,6 +125,121 @@ retry: return ret; } +void idr_remove_uobj(struct idr *idr, struct ib_uobject *uobj) +{ + spin_lock(&ib_uverbs_idr_lock); + idr_remove(idr, uobj->id); + spin_unlock(&ib_uverbs_idr_lock); +} + +static struct ib_uobject *__idr_get_uobj(struct idr *idr, int id, + struct ib_ucontext *context) +{ + struct ib_uobject *uobj; + + spin_lock(&ib_uverbs_idr_lock); + uobj = idr_find(idr, id); + if (uobj) + kref_get(&uobj->ref); + spin_unlock(&ib_uverbs_idr_lock); + + return uobj; +} + +static struct ib_uobject *idr_read_uobj(struct idr *idr, int id, + struct ib_ucontext *context) +{ + struct ib_uobject *uobj; + + uobj = __idr_get_uobj(idr, id, context); + if (!uobj) + return NULL; + + down_read(&uobj->mutex); + if (!uobj->live) { + put_uobj_read(uobj); + return NULL; + } + + return uobj; +} + +static struct ib_uobject *idr_write_uobj(struct idr *idr, int id, + struct ib_ucontext *context) +{ + struct ib_uobject *uobj; + + uobj = __idr_get_uobj(idr, id, context); + if (!uobj) + return NULL; + + down_write(&uobj->mutex); + if (!uobj->live) { + put_uobj_write(uobj); + return NULL; + } + + return uobj; +} + +static void *idr_read_obj(struct idr *idr, int id, struct ib_ucontext *context) +{ + struct ib_uobject *uobj; + + uobj = idr_read_uobj(idr, id, context); + return uobj ? uobj->object : NULL; +} + +static struct ib_pd *idr_read_pd(int pd_handle, struct ib_ucontext *context) +{ + return idr_read_obj(&ib_uverbs_pd_idr, pd_handle, context); +} + +static void put_pd_read(struct ib_pd *pd) +{ + put_uobj_read(pd->uobject); +} + +static struct ib_cq *idr_read_cq(int cq_handle, struct ib_ucontext *context) +{ + return idr_read_obj(&ib_uverbs_cq_idr, cq_handle, context); +} + +static void put_cq_read(struct ib_cq *cq) +{ + put_uobj_read(cq->uobject); +} + +static struct ib_ah *idr_read_ah(int ah_handle, struct ib_ucontext *context) +{ + return idr_read_obj(&ib_uverbs_ah_idr, ah_handle, context); +} + +static void put_ah_read(struct ib_ah *ah) +{ + put_uobj_read(ah->uobject); +} + +static struct ib_qp *idr_read_qp(int qp_handle, struct ib_ucontext *context) +{ + return idr_read_obj(&ib_uverbs_qp_idr, qp_handle, context); +} + +static void put_qp_read(struct ib_qp *qp) +{ + put_uobj_read(qp->uobject); +} + +static struct ib_srq *idr_read_srq(int srq_handle, struct ib_ucontext *context) +{ + return idr_read_obj(&ib_uverbs_srq_idr, srq_handle, context); +} + +static void put_srq_read(struct ib_srq *srq) +{ + put_uobj_read(srq->uobject); +} + ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, const char __user *buf, int in_len, int out_len) @@ -296,7 +470,8 @@ ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file, if (!uobj) return -ENOMEM; - uobj->context = file->ucontext; + init_uobj(uobj, 0, file->ucontext); + down_write(&uobj->mutex); pd = file->device->ib_dev->alloc_pd(file->device->ib_dev, file->ucontext, &udata); @@ -309,11 +484,10 @@ ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file, pd->uobject = uobj; atomic_set(&pd->usecnt, 0); - mutex_lock(&ib_uverbs_idr_mutex); - - ret = idr_add_uobj(&ib_uverbs_pd_idr, pd, uobj); + uobj->object = pd; + ret = idr_add_uobj(&ib_uverbs_pd_idr, uobj); if (ret) - goto err_up; + goto err_idr; memset(&resp, 0, sizeof resp); resp.pd_handle = uobj->id; @@ -321,26 +495,27 @@ ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file, if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) { ret = -EFAULT; - goto err_idr; + goto err_copy; } mutex_lock(&file->mutex); list_add_tail(&uobj->list, &file->ucontext->pd_list); mutex_unlock(&file->mutex); - mutex_unlock(&ib_uverbs_idr_mutex); + uobj->live = 1; + + up_write(&uobj->mutex); return in_len; -err_idr: - idr_remove(&ib_uverbs_pd_idr, uobj->id); +err_copy: + idr_remove_uobj(&ib_uverbs_pd_idr, uobj); -err_up: - mutex_unlock(&ib_uverbs_idr_mutex); +err_idr: ib_dealloc_pd(pd); err: - kfree(uobj); + put_uobj_write(uobj); return ret; } @@ -349,37 +524,34 @@ ssize_t ib_uverbs_dealloc_pd(struct ib_uverbs_file *file, int in_len, int out_len) { struct ib_uverbs_dealloc_pd cmd; - struct ib_pd *pd; struct ib_uobject *uobj; - int ret = -EINVAL; + int ret; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - mutex_lock(&ib_uverbs_idr_mutex); + uobj = idr_write_uobj(&ib_uverbs_pd_idr, cmd.pd_handle, file->ucontext); + if (!uobj) + return -EINVAL; - pd = idr_find(&ib_uverbs_pd_idr, cmd.pd_handle); - if (!pd || pd->uobject->context != file->ucontext) - goto out; + ret = ib_dealloc_pd(uobj->object); + if (!ret) + uobj->live = 0; - uobj = pd->uobject; + put_uobj_write(uobj); - ret = ib_dealloc_pd(pd); if (ret) - goto out; + return ret; - idr_remove(&ib_uverbs_pd_idr, cmd.pd_handle); + idr_remove_uobj(&ib_uverbs_pd_idr, uobj); mutex_lock(&file->mutex); list_del(&uobj->list); mutex_unlock(&file->mutex); - kfree(uobj); + put_uobj(uobj); -out: - mutex_unlock(&ib_uverbs_idr_mutex); - - return ret ? ret : in_len; + return in_len; } ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file, @@ -419,7 +591,8 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file, if (!obj) return -ENOMEM; - obj->uobject.context = file->ucontext; + init_uobj(&obj->uobject, 0, file->ucontext); + down_write(&obj->uobject.mutex); /* * We ask for writable memory if any access flags other than @@ -436,23 +609,14 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file, obj->umem.virt_base = cmd.hca_va; - mutex_lock(&ib_uverbs_idr_mutex); - - pd = idr_find(&ib_uverbs_pd_idr, cmd.pd_handle); - if (!pd || pd->uobject->context != file->ucontext) { - ret = -EINVAL; - goto err_up; - } - - if (!pd->device->reg_user_mr) { - ret = -ENOSYS; - goto err_up; - } + pd = idr_read_pd(cmd.pd_handle, file->ucontext); + if (!pd) + goto err_release; mr = pd->device->reg_user_mr(pd, &obj->umem, cmd.access_flags, &udata); if (IS_ERR(mr)) { ret = PTR_ERR(mr); - goto err_up; + goto err_put; } mr->device = pd->device; @@ -461,43 +625,48 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file, atomic_inc(&pd->usecnt); atomic_set(&mr->usecnt, 0); - memset(&resp, 0, sizeof resp); - resp.lkey = mr->lkey; - resp.rkey = mr->rkey; - - ret = idr_add_uobj(&ib_uverbs_mr_idr, mr, &obj->uobject); + obj->uobject.object = mr; + ret = idr_add_uobj(&ib_uverbs_mr_idr, &obj->uobject); if (ret) goto err_unreg; + memset(&resp, 0, sizeof resp); + resp.lkey = mr->lkey; + resp.rkey = mr->rkey; resp.mr_handle = obj->uobject.id; if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) { ret = -EFAULT; - goto err_idr; + goto err_copy; } + put_pd_read(pd); + mutex_lock(&file->mutex); list_add_tail(&obj->uobject.list, &file->ucontext->mr_list); mutex_unlock(&file->mutex); - mutex_unlock(&ib_uverbs_idr_mutex); + obj->uobject.live = 1; + + up_write(&obj->uobject.mutex); return in_len; -err_idr: - idr_remove(&ib_uverbs_mr_idr, obj->uobject.id); +err_copy: + idr_remove_uobj(&ib_uverbs_mr_idr, &obj->uobject); err_unreg: ib_dereg_mr(mr); -err_up: - mutex_unlock(&ib_uverbs_idr_mutex); +err_put: + put_pd_read(pd); +err_release: ib_umem_release(file->device->ib_dev, &obj->umem); err_free: - kfree(obj); + put_uobj_write(&obj->uobject); return ret; } @@ -507,37 +676,40 @@ ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file, { struct ib_uverbs_dereg_mr cmd; struct ib_mr *mr; + struct ib_uobject *uobj; struct ib_umem_object *memobj; int ret = -EINVAL; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - mutex_lock(&ib_uverbs_idr_mutex); - - mr = idr_find(&ib_uverbs_mr_idr, cmd.mr_handle); - if (!mr || mr->uobject->context != file->ucontext) - goto out; + uobj = idr_write_uobj(&ib_uverbs_mr_idr, cmd.mr_handle, file->ucontext); + if (!uobj) + return -EINVAL; - memobj = container_of(mr->uobject, struct ib_umem_object, uobject); + memobj = container_of(uobj, struct ib_umem_object, uobject); + mr = uobj->object; ret = ib_dereg_mr(mr); + if (!ret) + uobj->live = 0; + + put_uobj_write(uobj); + if (ret) - goto out; + return ret; - idr_remove(&ib_uverbs_mr_idr, cmd.mr_handle); + idr_remove_uobj(&ib_uverbs_mr_idr, uobj); mutex_lock(&file->mutex); - list_del(&memobj->uobject.list); + list_del(&uobj->list); mutex_unlock(&file->mutex); ib_umem_release(file->device->ib_dev, &memobj->umem); - kfree(memobj); -out: - mutex_unlock(&ib_uverbs_idr_mutex); + put_uobj(uobj); - return ret ? ret : in_len; + return in_len; } ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file, @@ -576,7 +748,7 @@ ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file, struct ib_uverbs_create_cq cmd; struct ib_uverbs_create_cq_resp resp; struct ib_udata udata; - struct ib_ucq_object *uobj; + struct ib_ucq_object *obj; struct ib_uverbs_event_file *ev_file = NULL; struct ib_cq *cq; int ret; @@ -594,10 +766,13 @@ ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file, if (cmd.comp_vector >= file->device->num_comp_vectors) return -EINVAL; - uobj = kmalloc(sizeof *uobj, GFP_KERNEL); - if (!uobj) + obj = kmalloc(sizeof *obj, GFP_KERNEL); + if (!obj) return -ENOMEM; + init_uobj(&obj->uobject, cmd.user_handle, file->ucontext); + down_write(&obj->uobject.mutex); + if (cmd.comp_channel >= 0) { ev_file = ib_uverbs_lookup_comp_file(cmd.comp_channel); if (!ev_file) { @@ -606,63 +781,64 @@ ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file, } } - uobj->uobject.user_handle = cmd.user_handle; - uobj->uobject.context = file->ucontext; - uobj->uverbs_file = file; - uobj->comp_events_reported = 0; - uobj->async_events_reported = 0; - INIT_LIST_HEAD(&uobj->comp_list); - INIT_LIST_HEAD(&uobj->async_list); + obj->uverbs_file = file; + obj->comp_events_reported = 0; + obj->async_events_reported = 0; + INIT_LIST_HEAD(&obj->comp_list); + INIT_LIST_HEAD(&obj->async_list); cq = file->device->ib_dev->create_cq(file->device->ib_dev, cmd.cqe, file->ucontext, &udata); if (IS_ERR(cq)) { ret = PTR_ERR(cq); - goto err; + goto err_file; } cq->device = file->device->ib_dev; - cq->uobject = &uobj->uobject; + cq->uobject = &obj->uobject; cq->comp_handler = ib_uverbs_comp_handler; cq->event_handler = ib_uverbs_cq_event_handler; cq->cq_context = ev_file; atomic_set(&cq->usecnt, 0); - mutex_lock(&ib_uverbs_idr_mutex); - - ret = idr_add_uobj(&ib_uverbs_cq_idr, cq, &uobj->uobject); + obj->uobject.object = cq; + ret = idr_add_uobj(&ib_uverbs_cq_idr, &obj->uobject); if (ret) - goto err_up; + goto err_free; memset(&resp, 0, sizeof resp); - resp.cq_handle = uobj->uobject.id; + resp.cq_handle = obj->uobject.id; resp.cqe = cq->cqe; if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) { ret = -EFAULT; - goto err_idr; + goto err_copy; } mutex_lock(&file->mutex); - list_add_tail(&uobj->uobject.list, &file->ucontext->cq_list); + list_add_tail(&obj->uobject.list, &file->ucontext->cq_list); mutex_unlock(&file->mutex); - mutex_unlock(&ib_uverbs_idr_mutex); + obj->uobject.live = 1; + + up_write(&obj->uobject.mutex); return in_len; -err_idr: - idr_remove(&ib_uverbs_cq_idr, uobj->uobject.id); +err_copy: + idr_remove_uobj(&ib_uverbs_cq_idr, &obj->uobject); -err_up: - mutex_unlock(&ib_uverbs_idr_mutex); + +err_free: ib_destroy_cq(cq); -err: +err_file: if (ev_file) - ib_uverbs_release_ucq(file, ev_file, uobj); - kfree(uobj); + ib_uverbs_release_ucq(file, ev_file, obj); + +err: + put_uobj_write(&obj->uobject); return ret; } @@ -683,11 +859,9 @@ ssize_t ib_uverbs_resize_cq(struct ib_uverbs_file *file, (unsigned long) cmd.response + sizeof resp, in_len - sizeof cmd, out_len - sizeof resp); - mutex_lock(&ib_uverbs_idr_mutex); - - cq = idr_find(&ib_uverbs_cq_idr, cmd.cq_handle); - if (!cq || cq->uobject->context != file->ucontext || !cq->device->resize_cq) - goto out; + cq = idr_read_cq(cmd.cq_handle, file->ucontext); + if (!cq) + return -EINVAL; ret = cq->device->resize_cq(cq, cmd.cqe, &udata); if (ret) @@ -701,7 +875,7 @@ ssize_t ib_uverbs_resize_cq(struct ib_uverbs_file *file, ret = -EFAULT; out: - mutex_unlock(&ib_uverbs_idr_mutex); + put_cq_read(cq); return ret ? ret : in_len; } @@ -712,6 +886,7 @@ ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file, { struct ib_uverbs_poll_cq cmd; struct ib_uverbs_poll_cq_resp *resp; + struct ib_uobject *uobj; struct ib_cq *cq; struct ib_wc *wc; int ret = 0; @@ -732,15 +907,17 @@ ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file, goto out_wc; } - mutex_lock(&ib_uverbs_idr_mutex); - cq = idr_find(&ib_uverbs_cq_idr, cmd.cq_handle); - if (!cq || cq->uobject->context != file->ucontext) { + uobj = idr_read_uobj(&ib_uverbs_cq_idr, cmd.cq_handle, file->ucontext); + if (!uobj) { ret = -EINVAL; goto out; } + cq = uobj->object; resp->count = ib_poll_cq(cq, cmd.ne, wc); + put_uobj_read(uobj); + for (i = 0; i < resp->count; i++) { resp->wc[i].wr_id = wc[i].wr_id; resp->wc[i].status = wc[i].status; @@ -762,7 +939,6 @@ ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file, ret = -EFAULT; out: - mutex_unlock(&ib_uverbs_idr_mutex); kfree(resp); out_wc: @@ -775,22 +951,23 @@ ssize_t ib_uverbs_req_notify_cq(struct ib_uverbs_file *file, int out_len) { struct ib_uverbs_req_notify_cq cmd; + struct ib_uobject *uobj; struct ib_cq *cq; - int ret = -EINVAL; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - mutex_lock(&ib_uverbs_idr_mutex); - cq = idr_find(&ib_uverbs_cq_idr, cmd.cq_handle); - if (cq && cq->uobject->context == file->ucontext) { - ib_req_notify_cq(cq, cmd.solicited_only ? - IB_CQ_SOLICITED : IB_CQ_NEXT_COMP); - ret = in_len; - } - mutex_unlock(&ib_uverbs_idr_mutex); + uobj = idr_read_uobj(&ib_uverbs_cq_idr, cmd.cq_handle, file->ucontext); + if (!uobj) + return -EINVAL; + cq = uobj->object; - return ret; + ib_req_notify_cq(cq, cmd.solicited_only ? + IB_CQ_SOLICITED : IB_CQ_NEXT_COMP); + + put_uobj_read(uobj); + + return in_len; } ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file, @@ -799,52 +976,50 @@ ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file, { struct ib_uverbs_destroy_cq cmd; struct ib_uverbs_destroy_cq_resp resp; + struct ib_uobject *uobj; struct ib_cq *cq; - struct ib_ucq_object *uobj; + struct ib_ucq_object *obj; struct ib_uverbs_event_file *ev_file; - u64 user_handle; int ret = -EINVAL; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - memset(&resp, 0, sizeof resp); - - mutex_lock(&ib_uverbs_idr_mutex); + uobj = idr_write_uobj(&ib_uverbs_cq_idr, cmd.cq_handle, file->ucontext); + if (!uobj) + return -EINVAL; + cq = uobj->object; + ev_file = cq->cq_context; + obj = container_of(cq->uobject, struct ib_ucq_object, uobject); - cq = idr_find(&ib_uverbs_cq_idr, cmd.cq_handle); - if (!cq || cq->uobject->context != file->ucontext) - goto out; + ret = ib_destroy_cq(cq); + if (!ret) + uobj->live = 0; - user_handle = cq->uobject->user_handle; - uobj = container_of(cq->uobject, struct ib_ucq_object, uobject); - ev_file = cq->cq_context; + put_uobj_write(uobj); - ret = ib_destroy_cq(cq); if (ret) - goto out; + return ret; - idr_remove(&ib_uverbs_cq_idr, cmd.cq_handle); + idr_remove_uobj(&ib_uverbs_cq_idr, uobj); mutex_lock(&file->mutex); - list_del(&uobj->uobject.list); + list_del(&uobj->list); mutex_unlock(&file->mutex); - ib_uverbs_release_ucq(file, ev_file, uobj); + ib_uverbs_release_ucq(file, ev_file, obj); - resp.comp_events_reported = uobj->comp_events_reported; - resp.async_events_reported = uobj->async_events_reported; + memset(&resp, 0, sizeof resp); + resp.comp_events_reported = obj->comp_events_reported; + resp.async_events_reported = obj->async_events_reported; - kfree(uobj); + put_uobj(uobj); if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) - ret = -EFAULT; - -out: - mutex_unlock(&ib_uverbs_idr_mutex); + return -EFAULT; - return ret ? ret : in_len; + return in_len; } ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file, @@ -854,7 +1029,7 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file, struct ib_uverbs_create_qp cmd; struct ib_uverbs_create_qp_resp resp; struct ib_udata udata; - struct ib_uqp_object *uobj; + struct ib_uqp_object *obj; struct ib_pd *pd; struct ib_cq *scq, *rcq; struct ib_srq *srq; @@ -872,23 +1047,21 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file, (unsigned long) cmd.response + sizeof resp, in_len - sizeof cmd, out_len - sizeof resp); - uobj = kmalloc(sizeof *uobj, GFP_KERNEL); - if (!uobj) + obj = kmalloc(sizeof *obj, GFP_KERNEL); + if (!obj) return -ENOMEM; - mutex_lock(&ib_uverbs_idr_mutex); + init_uobj(&obj->uevent.uobject, cmd.user_handle, file->ucontext); + down_write(&obj->uevent.uobject.mutex); - pd = idr_find(&ib_uverbs_pd_idr, cmd.pd_handle); - scq = idr_find(&ib_uverbs_cq_idr, cmd.send_cq_handle); - rcq = idr_find(&ib_uverbs_cq_idr, cmd.recv_cq_handle); - srq = cmd.is_srq ? idr_find(&ib_uverbs_srq_idr, cmd.srq_handle) : NULL; + pd = idr_read_pd(cmd.pd_handle, file->ucontext); + scq = idr_read_cq(cmd.send_cq_handle, file->ucontext); + rcq = idr_read_cq(cmd.recv_cq_handle, file->ucontext); + srq = cmd.is_srq ? idr_read_srq(cmd.srq_handle, file->ucontext) : NULL; - if (!pd || pd->uobject->context != file->ucontext || - !scq || scq->uobject->context != file->ucontext || - !rcq || rcq->uobject->context != file->ucontext || - (cmd.is_srq && (!srq || srq->uobject->context != file->ucontext))) { + if (!pd || !scq || !rcq || (cmd.is_srq && !srq)) { ret = -EINVAL; - goto err_up; + goto err_put; } attr.event_handler = ib_uverbs_qp_event_handler; @@ -905,16 +1078,14 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file, attr.cap.max_recv_sge = cmd.max_recv_sge; attr.cap.max_inline_data = cmd.max_inline_data; - uobj->uevent.uobject.user_handle = cmd.user_handle; - uobj->uevent.uobject.context = file->ucontext; - uobj->uevent.events_reported = 0; - INIT_LIST_HEAD(&uobj->uevent.event_list); - INIT_LIST_HEAD(&uobj->mcast_list); + obj->uevent.events_reported = 0; + INIT_LIST_HEAD(&obj->uevent.event_list); + INIT_LIST_HEAD(&obj->mcast_list); qp = pd->device->create_qp(pd, &attr, &udata); if (IS_ERR(qp)) { ret = PTR_ERR(qp); - goto err_up; + goto err_put; } qp->device = pd->device; @@ -922,7 +1093,7 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file, qp->send_cq = attr.send_cq; qp->recv_cq = attr.recv_cq; qp->srq = attr.srq; - qp->uobject = &uobj->uevent.uobject; + qp->uobject = &obj->uevent.uobject; qp->event_handler = attr.event_handler; qp->qp_context = attr.qp_context; qp->qp_type = attr.qp_type; @@ -932,14 +1103,14 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file, if (attr.srq) atomic_inc(&attr.srq->usecnt); - memset(&resp, 0, sizeof resp); - resp.qpn = qp->qp_num; - - ret = idr_add_uobj(&ib_uverbs_qp_idr, qp, &uobj->uevent.uobject); + obj->uevent.uobject.object = qp; + ret = idr_add_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject); if (ret) goto err_destroy; - resp.qp_handle = uobj->uevent.uobject.id; + memset(&resp, 0, sizeof resp); + resp.qpn = qp->qp_num; + resp.qp_handle = obj->uevent.uobject.id; resp.max_recv_sge = attr.cap.max_recv_sge; resp.max_send_sge = attr.cap.max_send_sge; resp.max_recv_wr = attr.cap.max_recv_wr; @@ -949,27 +1120,42 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file, if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) { ret = -EFAULT; - goto err_idr; + goto err_copy; } + put_pd_read(pd); + put_cq_read(scq); + put_cq_read(rcq); + if (srq) + put_srq_read(srq); + mutex_lock(&file->mutex); - list_add_tail(&uobj->uevent.uobject.list, &file->ucontext->qp_list); + list_add_tail(&obj->uevent.uobject.list, &file->ucontext->qp_list); mutex_unlock(&file->mutex); - mutex_unlock(&ib_uverbs_idr_mutex); + obj->uevent.uobject.live = 1; + + up_write(&obj->uevent.uobject.mutex); return in_len; -err_idr: - idr_remove(&ib_uverbs_qp_idr, uobj->uevent.uobject.id); +err_copy: + idr_remove_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject); err_destroy: ib_destroy_qp(qp); -err_up: - mutex_unlock(&ib_uverbs_idr_mutex); - - kfree(uobj); +err_put: + if (pd) + put_pd_read(pd); + if (scq) + put_cq_read(scq); + if (rcq) + put_cq_read(rcq); + if (srq) + put_srq_read(srq); + + put_uobj_write(&obj->uevent.uobject); return ret; } @@ -994,15 +1180,15 @@ ssize_t ib_uverbs_query_qp(struct ib_uverbs_file *file, goto out; } - mutex_lock(&ib_uverbs_idr_mutex); - - qp = idr_find(&ib_uverbs_qp_idr, cmd.qp_handle); - if (qp && qp->uobject->context == file->ucontext) - ret = ib_query_qp(qp, attr, cmd.attr_mask, init_attr); - else + qp = idr_read_qp(cmd.qp_handle, file->ucontext); + if (!qp) { ret = -EINVAL; + goto out; + } + + ret = ib_query_qp(qp, attr, cmd.attr_mask, init_attr); - mutex_unlock(&ib_uverbs_idr_mutex); + put_qp_read(qp); if (ret) goto out; @@ -1089,10 +1275,8 @@ ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file, if (!attr) return -ENOMEM; - mutex_lock(&ib_uverbs_idr_mutex); - - qp = idr_find(&ib_uverbs_qp_idr, cmd.qp_handle); - if (!qp || qp->uobject->context != file->ucontext) { + qp = idr_read_qp(cmd.qp_handle, file->ucontext); + if (!qp) { ret = -EINVAL; goto out; } @@ -1144,13 +1328,15 @@ ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file, attr->alt_ah_attr.port_num = cmd.alt_dest.port_num; ret = ib_modify_qp(qp, attr, cmd.attr_mask); + + put_qp_read(qp); + if (ret) goto out; ret = in_len; out: - mutex_unlock(&ib_uverbs_idr_mutex); kfree(attr); return ret; @@ -1162,8 +1348,9 @@ ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file, { struct ib_uverbs_destroy_qp cmd; struct ib_uverbs_destroy_qp_resp resp; + struct ib_uobject *uobj; struct ib_qp *qp; - struct ib_uqp_object *uobj; + struct ib_uqp_object *obj; int ret = -EINVAL; if (copy_from_user(&cmd, buf, sizeof cmd)) @@ -1171,43 +1358,43 @@ ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file, memset(&resp, 0, sizeof resp); - mutex_lock(&ib_uverbs_idr_mutex); - - qp = idr_find(&ib_uverbs_qp_idr, cmd.qp_handle); - if (!qp || qp->uobject->context != file->ucontext) - goto out; - - uobj = container_of(qp->uobject, struct ib_uqp_object, uevent.uobject); + uobj = idr_write_uobj(&ib_uverbs_qp_idr, cmd.qp_handle, file->ucontext); + if (!uobj) + return -EINVAL; + qp = uobj->object; + obj = container_of(uobj, struct ib_uqp_object, uevent.uobject); - if (!list_empty(&uobj->mcast_list)) { - ret = -EBUSY; - goto out; + if (!list_empty(&obj->mcast_list)) { + put_uobj_write(uobj); + return -EBUSY; } ret = ib_destroy_qp(qp); + if (!ret) + uobj->live = 0; + + put_uobj_write(uobj); + if (ret) - goto out; + return ret; - idr_remove(&ib_uverbs_qp_idr, cmd.qp_handle); + idr_remove_uobj(&ib_uverbs_qp_idr, uobj); mutex_lock(&file->mutex); - list_del(&uobj->uevent.uobject.list); + list_del(&uobj->list); mutex_unlock(&file->mutex); - ib_uverbs_release_uevent(file, &uobj->uevent); + ib_uverbs_release_uevent(file, &obj->uevent); - resp.events_reported = uobj->uevent.events_reported; + resp.events_reported = obj->uevent.events_reported; - kfree(uobj); + put_uobj(uobj); if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) - ret = -EFAULT; - -out: - mutex_unlock(&ib_uverbs_idr_mutex); + return -EFAULT; - return ret ? ret : in_len; + return in_len; } ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file, @@ -1220,6 +1407,7 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file, struct ib_send_wr *wr = NULL, *last, *next, *bad_wr; struct ib_qp *qp; int i, sg_ind; + int is_ud; ssize_t ret = -EINVAL; if (copy_from_user(&cmd, buf, sizeof cmd)) @@ -1236,12 +1424,11 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file, if (!user_wr) return -ENOMEM; - mutex_lock(&ib_uverbs_idr_mutex); - - qp = idr_find(&ib_uverbs_qp_idr, cmd.qp_handle); - if (!qp || qp->uobject->context != file->ucontext) + qp = idr_read_qp(cmd.qp_handle, file->ucontext); + if (!qp) goto out; + is_ud = qp->qp_type == IB_QPT_UD; sg_ind = 0; last = NULL; for (i = 0; i < cmd.wr_count; ++i) { @@ -1249,12 +1436,12 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file, buf + sizeof cmd + i * cmd.wqe_size, cmd.wqe_size)) { ret = -EFAULT; - goto out; + goto out_put; } if (user_wr->num_sge + sg_ind > cmd.sge_count) { ret = -EINVAL; - goto out; + goto out_put; } next = kmalloc(ALIGN(sizeof *next, sizeof (struct ib_sge)) + @@ -1262,7 +1449,7 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file, GFP_KERNEL); if (!next) { ret = -ENOMEM; - goto out; + goto out_put; } if (!last) @@ -1278,12 +1465,12 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file, next->send_flags = user_wr->send_flags; next->imm_data = (__be32 __force) user_wr->imm_data; - if (qp->qp_type == IB_QPT_UD) { - next->wr.ud.ah = idr_find(&ib_uverbs_ah_idr, - user_wr->wr.ud.ah); + if (is_ud) { + next->wr.ud.ah = idr_read_ah(user_wr->wr.ud.ah, + file->ucontext); if (!next->wr.ud.ah) { ret = -EINVAL; - goto out; + goto out_put; } next->wr.ud.remote_qpn = user_wr->wr.ud.remote_qpn; next->wr.ud.remote_qkey = user_wr->wr.ud.remote_qkey; @@ -1320,7 +1507,7 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file, sg_ind * sizeof (struct ib_sge), next->num_sge * sizeof (struct ib_sge))) { ret = -EFAULT; - goto out; + goto out_put; } sg_ind += next->num_sge; } else @@ -1340,10 +1527,13 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file, &resp, sizeof resp)) ret = -EFAULT; -out: - mutex_unlock(&ib_uverbs_idr_mutex); +out_put: + put_qp_read(qp); +out: while (wr) { + if (is_ud && wr->wr.ud.ah) + put_ah_read(wr->wr.ud.ah); next = wr->next; kfree(wr); wr = next; @@ -1458,14 +1648,15 @@ ssize_t ib_uverbs_post_recv(struct ib_uverbs_file *file, if (IS_ERR(wr)) return PTR_ERR(wr); - mutex_lock(&ib_uverbs_idr_mutex); - - qp = idr_find(&ib_uverbs_qp_idr, cmd.qp_handle); - if (!qp || qp->uobject->context != file->ucontext) + qp = idr_read_qp(cmd.qp_handle, file->ucontext); + if (!qp) goto out; resp.bad_wr = 0; ret = qp->device->post_recv(qp, wr, &bad_wr); + + put_qp_read(qp); + if (ret) for (next = wr; next; next = next->next) { ++resp.bad_wr; @@ -1479,8 +1670,6 @@ ssize_t ib_uverbs_post_recv(struct ib_uverbs_file *file, ret = -EFAULT; out: - mutex_unlock(&ib_uverbs_idr_mutex); - while (wr) { next = wr->next; kfree(wr); @@ -1509,14 +1698,15 @@ ssize_t ib_uverbs_post_srq_recv(struct ib_uverbs_file *file, if (IS_ERR(wr)) return PTR_ERR(wr); - mutex_lock(&ib_uverbs_idr_mutex); - - srq = idr_find(&ib_uverbs_srq_idr, cmd.srq_handle); - if (!srq || srq->uobject->context != file->ucontext) + srq = idr_read_srq(cmd.srq_handle, file->ucontext); + if (!srq) goto out; resp.bad_wr = 0; ret = srq->device->post_srq_recv(srq, wr, &bad_wr); + + put_srq_read(srq); + if (ret) for (next = wr; next; next = next->next) { ++resp.bad_wr; @@ -1530,8 +1720,6 @@ ssize_t ib_uverbs_post_srq_recv(struct ib_uverbs_file *file, ret = -EFAULT; out: - mutex_unlock(&ib_uverbs_idr_mutex); - while (wr) { next = wr->next; kfree(wr); @@ -1563,17 +1751,15 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file, if (!uobj) return -ENOMEM; - mutex_lock(&ib_uverbs_idr_mutex); + init_uobj(uobj, cmd.user_handle, file->ucontext); + down_write(&uobj->mutex); - pd = idr_find(&ib_uverbs_pd_idr, cmd.pd_handle); - if (!pd || pd->uobject->context != file->ucontext) { + pd = idr_read_pd(cmd.pd_handle, file->ucontext); + if (!pd) { ret = -EINVAL; - goto err_up; + goto err; } - uobj->user_handle = cmd.user_handle; - uobj->context = file->ucontext; - attr.dlid = cmd.attr.dlid; attr.sl = cmd.attr.sl; attr.src_path_bits = cmd.attr.src_path_bits; @@ -1589,12 +1775,13 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file, ah = ib_create_ah(pd, &attr); if (IS_ERR(ah)) { ret = PTR_ERR(ah); - goto err_up; + goto err; } - ah->uobject = uobj; + ah->uobject = uobj; + uobj->object = ah; - ret = idr_add_uobj(&ib_uverbs_ah_idr, ah, uobj); + ret = idr_add_uobj(&ib_uverbs_ah_idr, uobj); if (ret) goto err_destroy; @@ -1603,27 +1790,29 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file, if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) { ret = -EFAULT; - goto err_idr; + goto err_copy; } + put_pd_read(pd); + mutex_lock(&file->mutex); list_add_tail(&uobj->list, &file->ucontext->ah_list); mutex_unlock(&file->mutex); - mutex_unlock(&ib_uverbs_idr_mutex); + uobj->live = 1; + + up_write(&uobj->mutex); return in_len; -err_idr: - idr_remove(&ib_uverbs_ah_idr, uobj->id); +err_copy: + idr_remove_uobj(&ib_uverbs_ah_idr, uobj); err_destroy: ib_destroy_ah(ah); -err_up: - mutex_unlock(&ib_uverbs_idr_mutex); - - kfree(uobj); +err: + put_uobj_write(uobj); return ret; } @@ -1633,35 +1822,34 @@ ssize_t ib_uverbs_destroy_ah(struct ib_uverbs_file *file, struct ib_uverbs_destroy_ah cmd; struct ib_ah *ah; struct ib_uobject *uobj; - int ret = -EINVAL; + int ret; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - mutex_lock(&ib_uverbs_idr_mutex); + uobj = idr_write_uobj(&ib_uverbs_ah_idr, cmd.ah_handle, file->ucontext); + if (!uobj) + return -EINVAL; + ah = uobj->object; - ah = idr_find(&ib_uverbs_ah_idr, cmd.ah_handle); - if (!ah || ah->uobject->context != file->ucontext) - goto out; + ret = ib_destroy_ah(ah); + if (!ret) + uobj->live = 0; - uobj = ah->uobject; + put_uobj_write(uobj); - ret = ib_destroy_ah(ah); if (ret) - goto out; + return ret; - idr_remove(&ib_uverbs_ah_idr, cmd.ah_handle); + idr_remove_uobj(&ib_uverbs_ah_idr, uobj); mutex_lock(&file->mutex); list_del(&uobj->list); mutex_unlock(&file->mutex); - kfree(uobj); + put_uobj(uobj); -out: - mutex_unlock(&ib_uverbs_idr_mutex); - - return ret ? ret : in_len; + return in_len; } ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file, @@ -1670,47 +1858,43 @@ ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file, { struct ib_uverbs_attach_mcast cmd; struct ib_qp *qp; - struct ib_uqp_object *uobj; + struct ib_uqp_object *obj; struct ib_uverbs_mcast_entry *mcast; - int ret = -EINVAL; + int ret; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - mutex_lock(&ib_uverbs_idr_mutex); - - qp = idr_find(&ib_uverbs_qp_idr, cmd.qp_handle); - if (!qp || qp->uobject->context != file->ucontext) - goto out; + qp = idr_read_qp(cmd.qp_handle, file->ucontext); + if (!qp) + return -EINVAL; - uobj = container_of(qp->uobject, struct ib_uqp_object, uevent.uobject); + obj = container_of(qp->uobject, struct ib_uqp_object, uevent.uobject); - list_for_each_entry(mcast, &uobj->mcast_list, list) + list_for_each_entry(mcast, &obj->mcast_list, list) if (cmd.mlid == mcast->lid && !memcmp(cmd.gid, mcast->gid.raw, sizeof mcast->gid.raw)) { ret = 0; - goto out; + goto out_put; } mcast = kmalloc(sizeof *mcast, GFP_KERNEL); if (!mcast) { ret = -ENOMEM; - goto out; + goto out_put; } mcast->lid = cmd.mlid; memcpy(mcast->gid.raw, cmd.gid, sizeof mcast->gid.raw); ret = ib_attach_mcast(qp, &mcast->gid, cmd.mlid); - if (!ret) { - uobj = container_of(qp->uobject, struct ib_uqp_object, - uevent.uobject); - list_add_tail(&mcast->list, &uobj->mcast_list); - } else + if (!ret) + list_add_tail(&mcast->list, &obj->mcast_list); + else kfree(mcast); -out: - mutex_unlock(&ib_uverbs_idr_mutex); +out_put: + put_qp_read(qp); return ret ? ret : in_len; } @@ -1720,7 +1904,7 @@ ssize_t ib_uverbs_detach_mcast(struct ib_uverbs_file *file, int out_len) { struct ib_uverbs_detach_mcast cmd; - struct ib_uqp_object *uobj; + struct ib_uqp_object *obj; struct ib_qp *qp; struct ib_uverbs_mcast_entry *mcast; int ret = -EINVAL; @@ -1728,19 +1912,17 @@ ssize_t ib_uverbs_detach_mcast(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - mutex_lock(&ib_uverbs_idr_mutex); - - qp = idr_find(&ib_uverbs_qp_idr, cmd.qp_handle); - if (!qp || qp->uobject->context != file->ucontext) - goto out; + qp = idr_read_qp(cmd.qp_handle, file->ucontext); + if (!qp) + return -EINVAL; ret = ib_detach_mcast(qp, (union ib_gid *) cmd.gid, cmd.mlid); if (ret) - goto out; + goto out_put; - uobj = container_of(qp->uobject, struct ib_uqp_object, uevent.uobject); + obj = container_of(qp->uobject, struct ib_uqp_object, uevent.uobject); - list_for_each_entry(mcast, &uobj->mcast_list, list) + list_for_each_entry(mcast, &obj->mcast_list, list) if (cmd.mlid == mcast->lid && !memcmp(cmd.gid, mcast->gid.raw, sizeof mcast->gid.raw)) { list_del(&mcast->list); @@ -1748,8 +1930,8 @@ ssize_t ib_uverbs_detach_mcast(struct ib_uverbs_file *file, break; } -out: - mutex_unlock(&ib_uverbs_idr_mutex); +out_put: + put_qp_read(qp); return ret ? ret : in_len; } @@ -1761,7 +1943,7 @@ ssize_t ib_uverbs_create_srq(struct ib_uverbs_file *file, struct ib_uverbs_create_srq cmd; struct ib_uverbs_create_srq_resp resp; struct ib_udata udata; - struct ib_uevent_object *uobj; + struct ib_uevent_object *obj; struct ib_pd *pd; struct ib_srq *srq; struct ib_srq_init_attr attr; @@ -1777,17 +1959,17 @@ ssize_t ib_uverbs_create_srq(struct ib_uverbs_file *file, (unsigned long) cmd.response + sizeof resp, in_len - sizeof cmd, out_len - sizeof resp); - uobj = kmalloc(sizeof *uobj, GFP_KERNEL); - if (!uobj) + obj = kmalloc(sizeof *obj, GFP_KERNEL); + if (!obj) return -ENOMEM; - mutex_lock(&ib_uverbs_idr_mutex); - - pd = idr_find(&ib_uverbs_pd_idr, cmd.pd_handle); + init_uobj(&obj->uobject, 0, file->ucontext); + down_write(&obj->uobject.mutex); - if (!pd || pd->uobject->context != file->ucontext) { + pd = idr_read_pd(cmd.pd_handle, file->ucontext); + if (!pd) { ret = -EINVAL; - goto err_up; + goto err; } attr.event_handler = ib_uverbs_srq_event_handler; @@ -1796,59 +1978,59 @@ ssize_t ib_uverbs_create_srq(struct ib_uverbs_file *file, attr.attr.max_sge = cmd.max_sge; attr.attr.srq_limit = cmd.srq_limit; - uobj->uobject.user_handle = cmd.user_handle; - uobj->uobject.context = file->ucontext; - uobj->events_reported = 0; - INIT_LIST_HEAD(&uobj->event_list); + obj->events_reported = 0; + INIT_LIST_HEAD(&obj->event_list); srq = pd->device->create_srq(pd, &attr, &udata); if (IS_ERR(srq)) { ret = PTR_ERR(srq); - goto err_up; + goto err; } srq->device = pd->device; srq->pd = pd; - srq->uobject = &uobj->uobject; + srq->uobject = &obj->uobject; srq->event_handler = attr.event_handler; srq->srq_context = attr.srq_context; atomic_inc(&pd->usecnt); atomic_set(&srq->usecnt, 0); - memset(&resp, 0, sizeof resp); - - ret = idr_add_uobj(&ib_uverbs_srq_idr, srq, &uobj->uobject); + obj->uobject.object = srq; + ret = idr_add_uobj(&ib_uverbs_srq_idr, &obj->uobject); if (ret) goto err_destroy; - resp.srq_handle = uobj->uobject.id; + memset(&resp, 0, sizeof resp); + resp.srq_handle = obj->uobject.id; resp.max_wr = attr.attr.max_wr; resp.max_sge = attr.attr.max_sge; if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) { ret = -EFAULT; - goto err_idr; + goto err_copy; } + put_pd_read(pd); + mutex_lock(&file->mutex); - list_add_tail(&uobj->uobject.list, &file->ucontext->srq_list); + list_add_tail(&obj->uobject.list, &file->ucontext->srq_list); mutex_unlock(&file->mutex); - mutex_unlock(&ib_uverbs_idr_mutex); + obj->uobject.live = 1; + + up_write(&obj->uobject.mutex); return in_len; -err_idr: - idr_remove(&ib_uverbs_srq_idr, uobj->uobject.id); +err_copy: + idr_remove_uobj(&ib_uverbs_srq_idr, &obj->uobject); err_destroy: ib_destroy_srq(srq); -err_up: - mutex_unlock(&ib_uverbs_idr_mutex); - - kfree(uobj); +err: + put_uobj_write(&obj->uobject); return ret; } @@ -1864,21 +2046,16 @@ ssize_t ib_uverbs_modify_srq(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - mutex_lock(&ib_uverbs_idr_mutex); - - srq = idr_find(&ib_uverbs_srq_idr, cmd.srq_handle); - if (!srq || srq->uobject->context != file->ucontext) { - ret = -EINVAL; - goto out; - } + srq = idr_read_srq(cmd.srq_handle, file->ucontext); + if (!srq) + return -EINVAL; attr.max_wr = cmd.max_wr; attr.srq_limit = cmd.srq_limit; ret = ib_modify_srq(srq, &attr, cmd.attr_mask); -out: - mutex_unlock(&ib_uverbs_idr_mutex); + put_srq_read(srq); return ret ? ret : in_len; } @@ -1899,18 +2076,16 @@ ssize_t ib_uverbs_query_srq(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - mutex_lock(&ib_uverbs_idr_mutex); + srq = idr_read_srq(cmd.srq_handle, file->ucontext); + if (!srq) + return -EINVAL; - srq = idr_find(&ib_uverbs_srq_idr, cmd.srq_handle); - if (srq && srq->uobject->context == file->ucontext) - ret = ib_query_srq(srq, &attr); - else - ret = -EINVAL; + ret = ib_query_srq(srq, &attr); - mutex_unlock(&ib_uverbs_idr_mutex); + put_srq_read(srq); if (ret) - goto out; + return ret; memset(&resp, 0, sizeof resp); @@ -1920,10 +2095,9 @@ ssize_t ib_uverbs_query_srq(struct ib_uverbs_file *file, if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) - ret = -EFAULT; + return -EFAULT; -out: - return ret ? ret : in_len; + return in_len; } ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file, @@ -1932,45 +2106,45 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file, { struct ib_uverbs_destroy_srq cmd; struct ib_uverbs_destroy_srq_resp resp; + struct ib_uobject *uobj; struct ib_srq *srq; - struct ib_uevent_object *uobj; + struct ib_uevent_object *obj; int ret = -EINVAL; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - mutex_lock(&ib_uverbs_idr_mutex); - - memset(&resp, 0, sizeof resp); + uobj = idr_write_uobj(&ib_uverbs_srq_idr, cmd.srq_handle, file->ucontext); + if (!uobj) + return -EINVAL; + srq = uobj->object; + obj = container_of(uobj, struct ib_uevent_object, uobject); - srq = idr_find(&ib_uverbs_srq_idr, cmd.srq_handle); - if (!srq || srq->uobject->context != file->ucontext) - goto out; + ret = ib_destroy_srq(srq); + if (!ret) + uobj->live = 0; - uobj = container_of(srq->uobject, struct ib_uevent_object, uobject); + put_uobj_write(uobj); - ret = ib_destroy_srq(srq); if (ret) - goto out; + return ret; - idr_remove(&ib_uverbs_srq_idr, cmd.srq_handle); + idr_remove_uobj(&ib_uverbs_srq_idr, uobj); mutex_lock(&file->mutex); - list_del(&uobj->uobject.list); + list_del(&uobj->list); mutex_unlock(&file->mutex); - ib_uverbs_release_uevent(file, uobj); + ib_uverbs_release_uevent(file, obj); - resp.events_reported = uobj->events_reported; + memset(&resp, 0, sizeof resp); + resp.events_reported = obj->events_reported; - kfree(uobj); + put_uobj(uobj); if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) ret = -EFAULT; -out: - mutex_unlock(&ib_uverbs_idr_mutex); - return ret ? ret : in_len; } diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index ff092a0a94da..5ec2d49e9bb6 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -66,7 +66,7 @@ enum { static struct class *uverbs_class; -DEFINE_MUTEX(ib_uverbs_idr_mutex); +DEFINE_SPINLOCK(ib_uverbs_idr_lock); DEFINE_IDR(ib_uverbs_pd_idr); DEFINE_IDR(ib_uverbs_mr_idr); DEFINE_IDR(ib_uverbs_mw_idr); @@ -183,21 +183,21 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file, if (!context) return 0; - mutex_lock(&ib_uverbs_idr_mutex); - list_for_each_entry_safe(uobj, tmp, &context->ah_list, list) { - struct ib_ah *ah = idr_find(&ib_uverbs_ah_idr, uobj->id); - idr_remove(&ib_uverbs_ah_idr, uobj->id); + struct ib_ah *ah = uobj->object; + + idr_remove_uobj(&ib_uverbs_ah_idr, uobj); ib_destroy_ah(ah); list_del(&uobj->list); kfree(uobj); } list_for_each_entry_safe(uobj, tmp, &context->qp_list, list) { - struct ib_qp *qp = idr_find(&ib_uverbs_qp_idr, uobj->id); + struct ib_qp *qp = uobj->object; struct ib_uqp_object *uqp = container_of(uobj, struct ib_uqp_object, uevent.uobject); - idr_remove(&ib_uverbs_qp_idr, uobj->id); + + idr_remove_uobj(&ib_uverbs_qp_idr, uobj); ib_uverbs_detach_umcast(qp, uqp); ib_destroy_qp(qp); list_del(&uobj->list); @@ -206,11 +206,12 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file, } list_for_each_entry_safe(uobj, tmp, &context->cq_list, list) { - struct ib_cq *cq = idr_find(&ib_uverbs_cq_idr, uobj->id); + struct ib_cq *cq = uobj->object; struct ib_uverbs_event_file *ev_file = cq->cq_context; struct ib_ucq_object *ucq = container_of(uobj, struct ib_ucq_object, uobject); - idr_remove(&ib_uverbs_cq_idr, uobj->id); + + idr_remove_uobj(&ib_uverbs_cq_idr, uobj); ib_destroy_cq(cq); list_del(&uobj->list); ib_uverbs_release_ucq(file, ev_file, ucq); @@ -218,10 +219,11 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file, } list_for_each_entry_safe(uobj, tmp, &context->srq_list, list) { - struct ib_srq *srq = idr_find(&ib_uverbs_srq_idr, uobj->id); + struct ib_srq *srq = uobj->object; struct ib_uevent_object *uevent = container_of(uobj, struct ib_uevent_object, uobject); - idr_remove(&ib_uverbs_srq_idr, uobj->id); + + idr_remove_uobj(&ib_uverbs_srq_idr, uobj); ib_destroy_srq(srq); list_del(&uobj->list); ib_uverbs_release_uevent(file, uevent); @@ -231,11 +233,11 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file, /* XXX Free MWs */ list_for_each_entry_safe(uobj, tmp, &context->mr_list, list) { - struct ib_mr *mr = idr_find(&ib_uverbs_mr_idr, uobj->id); + struct ib_mr *mr = uobj->object; struct ib_device *mrdev = mr->device; struct ib_umem_object *memobj; - idr_remove(&ib_uverbs_mr_idr, uobj->id); + idr_remove_uobj(&ib_uverbs_mr_idr, uobj); ib_dereg_mr(mr); memobj = container_of(uobj, struct ib_umem_object, uobject); @@ -246,15 +248,14 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file, } list_for_each_entry_safe(uobj, tmp, &context->pd_list, list) { - struct ib_pd *pd = idr_find(&ib_uverbs_pd_idr, uobj->id); - idr_remove(&ib_uverbs_pd_idr, uobj->id); + struct ib_pd *pd = uobj->object; + + idr_remove_uobj(&ib_uverbs_pd_idr, uobj); ib_dealloc_pd(pd); list_del(&uobj->list); kfree(uobj); } - mutex_unlock(&ib_uverbs_idr_mutex); - return context->device->dealloc_ucontext(context); } diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 7ced208edacf..ee1f3a355666 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -697,8 +697,12 @@ struct ib_ucontext { struct ib_uobject { u64 user_handle; /* handle given to us by userspace */ struct ib_ucontext *context; /* associated user context */ + void *object; /* containing object */ struct list_head list; /* link to context's list */ u32 id; /* index into kernel idr */ + struct kref ref; + struct rw_semaphore mutex; /* protects .live */ + int live; }; struct ib_umem { -- cgit v1.2.3