summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorTatyana Nikolova <Tatyana.E.Nikolova@intel.com>2014-03-26 17:07:35 -0500
committerRoland Dreier <roland@purestorage.com>2014-06-10 10:11:45 -0700
commit30dc5e63d6a5ad24894b5512d10b228d73645a44 (patch)
treecbb54ab8d06b165dbbd83a243cf743c8b6ad64da /include
parentd6d211db37e75de2ddc3a4f979038c40df7cc79c (diff)
downloadlwn-30dc5e63d6a5ad24894b5512d10b228d73645a44.tar.gz
lwn-30dc5e63d6a5ad24894b5512d10b228d73645a44.zip
RDMA/core: Add support for iWARP Port Mapper user space service
This patch adds iWARP Port Mapper (IWPM) Version 2 support. The iWARP Port Mapper implementation is based on the port mapper specification section in the Sockets Direct Protocol paper - http://www.rdmaconsortium.org/home/draft-pinkerton-iwarp-sdp-v1.0.pdf Existing iWARP RDMA providers use the same IP address as the native TCP/IP stack when creating RDMA connections. They need a mechanism to claim the TCP ports used for RDMA connections to prevent TCP port collisions when other host applications use TCP ports. The iWARP Port Mapper provides a standard mechanism to accomplish this. Without this service it is possible for RDMA application to bind/listen on the same port which is already being used by native TCP host application. If that happens the incoming TCP connection data can be passed to the RDMA stack with error. The iWARP Port Mapper solution doesn't contain any changes to the existing network stack in the kernel space. All the changes are contained with the infiniband tree and also in user space. The iWARP Port Mapper service is implemented as a user space daemon process. Source for the IWPM service is located at http://git.openfabrics.org/git?p=~tnikolova/libiwpm-1.0.0/.git;a=summary The iWARP driver (port mapper client) sends to the IWPM service the local IP address and TCP port it has received from the RDMA application, when starting a connection. The IWPM service performs a socket bind from user space to get an available TCP port, called a mapped port, and communicates it back to the client. In that sense, the IWPM service is used to map the TCP port, which the RDMA application uses to any port available from the host TCP port space. The mapped ports are used in iWARP RDMA connections to avoid collisions with native TCP stack which is aware that these ports are taken. When an RDMA connection using a mapped port is terminated, the client notifies the IWPM service, which then releases the TCP port. The message exchange between the IWPM service and the iWARP drivers (between user space and kernel space) is implemented using netlink sockets. 1) Netlink interface functions are added: ibnl_unicast() and ibnl_mulitcast() for sending netlink messages to user space 2) The signature of the existing ibnl_put_msg() is changed to be more generic 3) Two netlink clients are added: RDMA_NL_NES, RDMA_NL_C4IW corresponding to the two iWarp drivers - nes and cxgb4 which use the IWPM service 4) Enums are added to enumerate the attributes in the netlink messages, which are exchanged between the user space IWPM service and the iWARP drivers Signed-off-by: Tatyana Nikolova <tatyana.e.nikolova@intel.com> Signed-off-by: Steve Wise <swise@opengridcomputing.com> Reviewed-by: PJ Waskiewicz <pj.waskiewicz@solidfire.com> [ Fold in range checking fixes and nlh_next removal as suggested by Dan Carpenter and Steve Wise. Fix sparse endianness in hash. - Roland ] Signed-off-by: Roland Dreier <roland@purestorage.com>
Diffstat (limited to 'include')
-rw-r--r--include/rdma/iw_portmap.h199
-rw-r--r--include/rdma/rdma_netlink.h23
-rw-r--r--include/uapi/rdma/rdma_netlink.h96
3 files changed, 316 insertions, 2 deletions
diff --git a/include/rdma/iw_portmap.h b/include/rdma/iw_portmap.h
new file mode 100644
index 000000000000..928b2775e992
--- /dev/null
+++ b/include/rdma/iw_portmap.h
@@ -0,0 +1,199 @@
+/*
+ * Copyright (c) 2014 Intel Corporation. All rights reserved.
+ * Copyright (c) 2014 Chelsio, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef _IW_PORTMAP_H
+#define _IW_PORTMAP_H
+
+#define IWPM_ULIBNAME_SIZE 32
+#define IWPM_DEVNAME_SIZE 32
+#define IWPM_IFNAME_SIZE 16
+#define IWPM_IPADDR_SIZE 16
+
+enum {
+ IWPM_INVALID_NLMSG_ERR = 10,
+ IWPM_CREATE_MAPPING_ERR,
+ IWPM_DUPLICATE_MAPPING_ERR,
+ IWPM_UNKNOWN_MAPPING_ERR,
+ IWPM_CLIENT_DEV_INFO_ERR,
+ IWPM_USER_LIB_INFO_ERR,
+ IWPM_REMOTE_QUERY_REJECT
+};
+
+struct iwpm_dev_data {
+ char dev_name[IWPM_DEVNAME_SIZE];
+ char if_name[IWPM_IFNAME_SIZE];
+};
+
+struct iwpm_sa_data {
+ struct sockaddr_storage loc_addr;
+ struct sockaddr_storage mapped_loc_addr;
+ struct sockaddr_storage rem_addr;
+ struct sockaddr_storage mapped_rem_addr;
+};
+
+/**
+ * iwpm_init - Allocate resources for the iwarp port mapper
+ *
+ * Should be called when network interface goes up.
+ */
+int iwpm_init(u8);
+
+/**
+ * iwpm_exit - Deallocate resources for the iwarp port mapper
+ *
+ * Should be called when network interface goes down.
+ */
+int iwpm_exit(u8);
+
+/**
+ * iwpm_valid_pid - Check if the userspace iwarp port mapper pid is valid
+ *
+ * Returns true if the pid is greater than zero, otherwise returns false
+ */
+int iwpm_valid_pid(void);
+
+/**
+ * iwpm_register_pid - Send a netlink query to userspace
+ * to get the iwarp port mapper pid
+ * @pm_msg: Contains driver info to send to the userspace port mapper
+ * @nl_client: The index of the netlink client
+ */
+int iwpm_register_pid(struct iwpm_dev_data *pm_msg, u8 nl_client);
+
+/**
+ * iwpm_add_mapping - Send a netlink add mapping request to
+ * the userspace port mapper
+ * @pm_msg: Contains the local ip/tcp address info to send
+ * @nl_client: The index of the netlink client
+ *
+ * If the request is successful, the pm_msg stores
+ * the port mapper response (mapped address info)
+ */
+int iwpm_add_mapping(struct iwpm_sa_data *pm_msg, u8 nl_client);
+
+/**
+ * iwpm_add_and_query_mapping - Send a netlink add and query mapping request
+ * to the userspace port mapper
+ * @pm_msg: Contains the local and remote ip/tcp address info to send
+ * @nl_client: The index of the netlink client
+ *
+ * If the request is successful, the pm_msg stores the
+ * port mapper response (mapped local and remote address info)
+ */
+int iwpm_add_and_query_mapping(struct iwpm_sa_data *pm_msg, u8 nl_client);
+
+/**
+ * iwpm_remove_mapping - Send a netlink remove mapping request
+ * to the userspace port mapper
+ *
+ * @local_addr: Local ip/tcp address to remove
+ * @nl_client: The index of the netlink client
+ */
+int iwpm_remove_mapping(struct sockaddr_storage *local_addr, u8 nl_client);
+
+/**
+ * iwpm_register_pid_cb - Process the port mapper response to
+ * iwpm_register_pid query
+ * @skb:
+ * @cb: Contains the received message (payload and netlink header)
+ *
+ * If successful, the function receives the userspace port mapper pid
+ * which is used in future communication with the port mapper
+ */
+int iwpm_register_pid_cb(struct sk_buff *, struct netlink_callback *);
+
+/**
+ * iwpm_add_mapping_cb - Process the port mapper response to
+ * iwpm_add_mapping request
+ * @skb:
+ * @cb: Contains the received message (payload and netlink header)
+ */
+int iwpm_add_mapping_cb(struct sk_buff *, struct netlink_callback *);
+
+/**
+ * iwpm_add_and_query_mapping_cb - Process the port mapper response to
+ * iwpm_add_and_query_mapping request
+ * @skb:
+ * @cb: Contains the received message (payload and netlink header)
+ */
+int iwpm_add_and_query_mapping_cb(struct sk_buff *, struct netlink_callback *);
+
+/**
+ * iwpm_mapping_error_cb - Process port mapper notification for error
+ *
+ * @skb:
+ * @cb: Contains the received message (payload and netlink header)
+ */
+int iwpm_mapping_error_cb(struct sk_buff *, struct netlink_callback *);
+
+/**
+ * iwpm_mapping_info_cb - Process a notification that the userspace
+ * port mapper daemon is started
+ * @skb:
+ * @cb: Contains the received message (payload and netlink header)
+ *
+ * Using the received port mapper pid, send all the local mapping
+ * info records to the userspace port mapper
+ */
+int iwpm_mapping_info_cb(struct sk_buff *, struct netlink_callback *);
+
+/**
+ * iwpm_ack_mapping_info_cb - Process the port mapper ack for
+ * the provided local mapping info records
+ * @skb:
+ * @cb: Contains the received message (payload and netlink header)
+ */
+int iwpm_ack_mapping_info_cb(struct sk_buff *, struct netlink_callback *);
+
+/**
+ * iwpm_create_mapinfo - Store local and mapped IPv4/IPv6 address
+ * info in a hash table
+ * @local_addr: Local ip/tcp address
+ * @mapped_addr: Mapped local ip/tcp address
+ * @nl_client: The index of the netlink client
+ */
+int iwpm_create_mapinfo(struct sockaddr_storage *local_addr,
+ struct sockaddr_storage *mapped_addr, u8 nl_client);
+
+/**
+ * iwpm_remove_mapinfo - Remove local and mapped IPv4/IPv6 address
+ * info from the hash table
+ * @local_addr: Local ip/tcp address
+ * @mapped_addr: Mapped local ip/tcp address
+ *
+ * Returns err code if mapping info is not found in the hash table,
+ * otherwise returns 0
+ */
+int iwpm_remove_mapinfo(struct sockaddr_storage *local_addr,
+ struct sockaddr_storage *mapped_addr);
+
+#endif /* _IW_PORTMAP_H */
diff --git a/include/rdma/rdma_netlink.h b/include/rdma/rdma_netlink.h
index e38de79eeb48..0790882e0c9b 100644
--- a/include/rdma/rdma_netlink.h
+++ b/include/rdma/rdma_netlink.h
@@ -43,7 +43,7 @@ int ibnl_remove_client(int index);
* Returns the allocated buffer on success and NULL on failure.
*/
void *ibnl_put_msg(struct sk_buff *skb, struct nlmsghdr **nlh, int seq,
- int len, int client, int op);
+ int len, int client, int op, int flags);
/**
* Put a new attribute in a supplied skb.
* @skb: The netlink skb.
@@ -56,4 +56,25 @@ void *ibnl_put_msg(struct sk_buff *skb, struct nlmsghdr **nlh, int seq,
int ibnl_put_attr(struct sk_buff *skb, struct nlmsghdr *nlh,
int len, void *data, int type);
+/**
+ * Send the supplied skb to a specific userspace PID.
+ * @skb: The netlink skb
+ * @nlh: Header of the netlink message to send
+ * @pid: Userspace netlink process ID
+ * Returns 0 on success or a negative error code.
+ */
+int ibnl_unicast(struct sk_buff *skb, struct nlmsghdr *nlh,
+ __u32 pid);
+
+/**
+ * Send the supplied skb to a netlink group.
+ * @skb: The netlink skb
+ * @nlh: Header of the netlink message to send
+ * @group: Netlink group ID
+ * @flags: allocation flags
+ * Returns 0 on success or a negative error code.
+ */
+int ibnl_multicast(struct sk_buff *skb, struct nlmsghdr *nlh,
+ unsigned int group, gfp_t flags);
+
#endif /* _RDMA_NETLINK_H */
diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h
index 8297285b6288..de69170a30ce 100644
--- a/include/uapi/rdma/rdma_netlink.h
+++ b/include/uapi/rdma/rdma_netlink.h
@@ -4,7 +4,16 @@
#include <linux/types.h>
enum {
- RDMA_NL_RDMA_CM = 1
+ RDMA_NL_RDMA_CM = 1,
+ RDMA_NL_NES,
+ RDMA_NL_C4IW,
+ RDMA_NL_NUM_CLIENTS
+};
+
+enum {
+ RDMA_NL_GROUP_CM = 1,
+ RDMA_NL_GROUP_IWPM,
+ RDMA_NL_NUM_GROUPS
};
#define RDMA_NL_GET_CLIENT(type) ((type & (((1 << 6) - 1) << 10)) >> 10)
@@ -22,6 +31,18 @@ enum {
RDMA_NL_RDMA_CM_NUM_ATTR,
};
+/* iwarp port mapper op-codes */
+enum {
+ RDMA_NL_IWPM_REG_PID = 0,
+ RDMA_NL_IWPM_ADD_MAPPING,
+ RDMA_NL_IWPM_QUERY_MAPPING,
+ RDMA_NL_IWPM_REMOVE_MAPPING,
+ RDMA_NL_IWPM_HANDLE_ERR,
+ RDMA_NL_IWPM_MAPINFO,
+ RDMA_NL_IWPM_MAPINFO_NUM,
+ RDMA_NL_IWPM_NUM_OPS
+};
+
struct rdma_cm_id_stats {
__u32 qp_num;
__u32 bound_dev_if;
@@ -33,5 +54,78 @@ struct rdma_cm_id_stats {
__u8 qp_type;
};
+enum {
+ IWPM_NLA_REG_PID_UNSPEC = 0,
+ IWPM_NLA_REG_PID_SEQ,
+ IWPM_NLA_REG_IF_NAME,
+ IWPM_NLA_REG_IBDEV_NAME,
+ IWPM_NLA_REG_ULIB_NAME,
+ IWPM_NLA_REG_PID_MAX
+};
+
+enum {
+ IWPM_NLA_RREG_PID_UNSPEC = 0,
+ IWPM_NLA_RREG_PID_SEQ,
+ IWPM_NLA_RREG_IBDEV_NAME,
+ IWPM_NLA_RREG_ULIB_NAME,
+ IWPM_NLA_RREG_ULIB_VER,
+ IWPM_NLA_RREG_PID_ERR,
+ IWPM_NLA_RREG_PID_MAX
+
+};
+
+enum {
+ IWPM_NLA_MANAGE_MAPPING_UNSPEC = 0,
+ IWPM_NLA_MANAGE_MAPPING_SEQ,
+ IWPM_NLA_MANAGE_ADDR,
+ IWPM_NLA_MANAGE_MAPPED_LOC_ADDR,
+ IWPM_NLA_RMANAGE_MAPPING_ERR,
+ IWPM_NLA_RMANAGE_MAPPING_MAX
+};
+
+#define IWPM_NLA_MANAGE_MAPPING_MAX 3
+#define IWPM_NLA_QUERY_MAPPING_MAX 4
+#define IWPM_NLA_MAPINFO_SEND_MAX 3
+
+enum {
+ IWPM_NLA_QUERY_MAPPING_UNSPEC = 0,
+ IWPM_NLA_QUERY_MAPPING_SEQ,
+ IWPM_NLA_QUERY_LOCAL_ADDR,
+ IWPM_NLA_QUERY_REMOTE_ADDR,
+ IWPM_NLA_RQUERY_MAPPED_LOC_ADDR,
+ IWPM_NLA_RQUERY_MAPPED_REM_ADDR,
+ IWPM_NLA_RQUERY_MAPPING_ERR,
+ IWPM_NLA_RQUERY_MAPPING_MAX
+};
+
+enum {
+ IWPM_NLA_MAPINFO_REQ_UNSPEC = 0,
+ IWPM_NLA_MAPINFO_ULIB_NAME,
+ IWPM_NLA_MAPINFO_ULIB_VER,
+ IWPM_NLA_MAPINFO_REQ_MAX
+};
+
+enum {
+ IWPM_NLA_MAPINFO_UNSPEC = 0,
+ IWPM_NLA_MAPINFO_LOCAL_ADDR,
+ IWPM_NLA_MAPINFO_MAPPED_ADDR,
+ IWPM_NLA_MAPINFO_MAX
+};
+
+enum {
+ IWPM_NLA_MAPINFO_NUM_UNSPEC = 0,
+ IWPM_NLA_MAPINFO_SEQ,
+ IWPM_NLA_MAPINFO_SEND_NUM,
+ IWPM_NLA_MAPINFO_ACK_NUM,
+ IWPM_NLA_MAPINFO_NUM_MAX
+};
+
+enum {
+ IWPM_NLA_ERR_UNSPEC = 0,
+ IWPM_NLA_ERR_SEQ,
+ IWPM_NLA_ERR_CODE,
+ IWPM_NLA_ERR_MAX
+};
+
#endif /* _UAPI_RDMA_NETLINK_H */