summaryrefslogtreecommitdiff
path: root/drivers/infiniband/ulp
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-10-19 12:29:23 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2014-10-19 12:29:23 -0700
commit2eb7f910c158fd675ab33aff67904512779996e8 (patch)
treecc5b453afad998da001ee3611f358b7bdc218981 /drivers/infiniband/ulp
parent1f6075f99073a8b5ec9649ae8c0bf2e06fdd42f1 (diff)
parent7b909bb49ac204bfd2e628707db37beb490dbc5c (diff)
downloadlwn-2eb7f910c158fd675ab33aff67904512779996e8.tar.gz
lwn-2eb7f910c158fd675ab33aff67904512779996e8.zip
Merge tag 'rdma-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband
Pull infiniband/RDMA updates from Roland Dreier: - large set of iSER initiator improvements - hardware driver fixes for cxgb4, mlx5 and ocrdma - small fixes to core midlayer * tag 'rdma-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband: (47 commits) RDMA/cxgb4: Fix ntuple calculation for ipv6 and remove duplicate line RDMA/cxgb4: Add missing neigh_release in find_route RDMA/cxgb4: Take IPv6 into account for best_mtu and set_emss RDMA/cxgb4: Make c4iw_wr_log_size_order static IB/core: Fix XRC race condition in ib_uverbs_open_qp IB/core: Clear AH attr variable to prevent garbage data RDMA/ocrdma: Save the bit environment, spare unncessary parenthesis RDMA/ocrdma: The kernel has a perfectly good BIT() macro - use it RDMA/ocrdma: Don't memset() buffers we just allocated with kzalloc() RDMA/ocrdma: Remove a unused-label warning RDMA/ocrdma: Convert kernel VA to PA for mmap in user RDMA/ocrdma: Get vlan tag from ib_qp_attrs RDMA/ocrdma: Add default GID at index 0 IB/mlx5, iser, isert: Add Signature API additions Target/iser: Centralize ib_sig_domain setting IB/iser: Centralize ib_sig_domain settings IB/mlx5: Use extended internal signature layout IB/iser: Set IP_CSUM as default guard type IB/iser: Remove redundant assignment IB/mlx5: Use enumerations for PI copy mask ...
Diffstat (limited to 'drivers/infiniband/ulp')
-rw-r--r--drivers/infiniband/ulp/iser/iscsi_iser.c313
-rw-r--r--drivers/infiniband/ulp/iser/iscsi_iser.h408
-rw-r--r--drivers/infiniband/ulp/iser/iser_initiator.c198
-rw-r--r--drivers/infiniband/ulp/iser/iser_memory.c99
-rw-r--r--drivers/infiniband/ulp/iser/iser_verbs.c667
-rw-r--r--drivers/infiniband/ulp/isert/ib_isert.c65
6 files changed, 1081 insertions, 669 deletions
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c
index 93ce62fe1594..f42ab14105ac 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.c
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.c
@@ -83,7 +83,7 @@ module_param_named(max_lun, iscsi_max_lun, uint, S_IRUGO);
int iser_debug_level = 0;
bool iser_pi_enable = false;
-int iser_pi_guard = 0;
+int iser_pi_guard = 1;
MODULE_DESCRIPTION("iSER (iSCSI Extensions for RDMA) Datamover");
MODULE_LICENSE("Dual BSD/GPL");
@@ -97,14 +97,24 @@ module_param_named(pi_enable, iser_pi_enable, bool, 0644);
MODULE_PARM_DESC(pi_enable, "Enable T10-PI offload support (default:disabled)");
module_param_named(pi_guard, iser_pi_guard, int, 0644);
-MODULE_PARM_DESC(pi_guard, "T10-PI guard_type, 0:CRC|1:IP_CSUM (default:CRC)");
+MODULE_PARM_DESC(pi_guard, "T10-PI guard_type, 0:CRC|1:IP_CSUM (default:IP_CSUM)");
static struct workqueue_struct *release_wq;
struct iser_global ig;
+/*
+ * iscsi_iser_recv() - Process a successfull recv completion
+ * @conn: iscsi connection
+ * @hdr: iscsi header
+ * @rx_data: buffer containing receive data payload
+ * @rx_data_len: length of rx_data
+ *
+ * Notes: In case of data length errors or iscsi PDU completion failures
+ * this routine will signal iscsi layer of connection failure.
+ */
void
-iscsi_iser_recv(struct iscsi_conn *conn,
- struct iscsi_hdr *hdr, char *rx_data, int rx_data_len)
+iscsi_iser_recv(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
+ char *rx_data, int rx_data_len)
{
int rc = 0;
int datalen;
@@ -135,20 +145,30 @@ error:
iscsi_conn_failure(conn, rc);
}
-static int iscsi_iser_pdu_alloc(struct iscsi_task *task, uint8_t opcode)
+/**
+ * iscsi_iser_pdu_alloc() - allocate an iscsi-iser PDU
+ * @task: iscsi task
+ * @opcode: iscsi command opcode
+ *
+ * Netes: This routine can't fail, just assign iscsi task
+ * hdr and max hdr size.
+ */
+static int
+iscsi_iser_pdu_alloc(struct iscsi_task *task, uint8_t opcode)
{
struct iscsi_iser_task *iser_task = task->dd_data;
task->hdr = (struct iscsi_hdr *)&iser_task->desc.iscsi_header;
task->hdr_max = sizeof(iser_task->desc.iscsi_header);
+
return 0;
}
int iser_initialize_task_headers(struct iscsi_task *task,
struct iser_tx_desc *tx_desc)
{
- struct iser_conn *ib_conn = task->conn->dd_data;
- struct iser_device *device = ib_conn->device;
+ struct iser_conn *iser_conn = task->conn->dd_data;
+ struct iser_device *device = iser_conn->ib_conn.device;
struct iscsi_iser_task *iser_task = task->dd_data;
u64 dma_addr;
@@ -162,14 +182,18 @@ int iser_initialize_task_headers(struct iscsi_task *task,
tx_desc->tx_sg[0].length = ISER_HEADERS_LEN;
tx_desc->tx_sg[0].lkey = device->mr->lkey;
- iser_task->ib_conn = ib_conn;
+ iser_task->iser_conn = iser_conn;
return 0;
}
+
/**
- * iscsi_iser_task_init - Initialize task
+ * iscsi_iser_task_init() - Initialize iscsi-iser task
* @task: iscsi task
*
* Initialize the task for the scsi command or mgmt command.
+ *
+ * Return: Returns zero on success or -ENOMEM when failing
+ * to init task headers (dma mapping error).
*/
static int
iscsi_iser_task_init(struct iscsi_task *task)
@@ -191,7 +215,7 @@ iscsi_iser_task_init(struct iscsi_task *task)
}
/**
- * iscsi_iser_mtask_xmit - xmit management(immediate) task
+ * iscsi_iser_mtask_xmit() - xmit management (immediate) task
* @conn: iscsi connection
* @task: task management task
*
@@ -249,6 +273,12 @@ iscsi_iser_task_xmit_unsol_data_exit:
return error;
}
+/**
+ * iscsi_iser_task_xmit() - xmit iscsi-iser task
+ * @task: iscsi task
+ *
+ * Return: zero on success or escalates $error on failure.
+ */
static int
iscsi_iser_task_xmit(struct iscsi_task *task)
{
@@ -286,12 +316,24 @@ iscsi_iser_task_xmit(struct iscsi_task *task)
return error;
}
+/**
+ * iscsi_iser_cleanup_task() - cleanup an iscsi-iser task
+ * @task: iscsi task
+ *
+ * Notes: In case the RDMA device is already NULL (might have
+ * been removed in DEVICE_REMOVAL CM event it will bail-out
+ * without doing dma unmapping.
+ */
static void iscsi_iser_cleanup_task(struct iscsi_task *task)
{
struct iscsi_iser_task *iser_task = task->dd_data;
struct iser_tx_desc *tx_desc = &iser_task->desc;
- struct iser_conn *ib_conn = task->conn->dd_data;
- struct iser_device *device = ib_conn->device;
+ struct iser_conn *iser_conn = task->conn->dd_data;
+ struct iser_device *device = iser_conn->ib_conn.device;
+
+ /* DEVICE_REMOVAL event might have already released the device */
+ if (!device)
+ return;
ib_dma_unmap_single(device->ib_device,
tx_desc->dma_addr, ISER_HEADERS_LEN, DMA_TO_DEVICE);
@@ -306,7 +348,20 @@ static void iscsi_iser_cleanup_task(struct iscsi_task *task)
}
}
-static u8 iscsi_iser_check_protection(struct iscsi_task *task, sector_t *sector)
+/**
+ * iscsi_iser_check_protection() - check protection information status of task.
+ * @task: iscsi task
+ * @sector: error sector if exsists (output)
+ *
+ * Return: zero if no data-integrity errors have occured
+ * 0x1: data-integrity error occured in the guard-block
+ * 0x2: data-integrity error occured in the reference tag
+ * 0x3: data-integrity error occured in the application tag
+ *
+ * In addition the error sector is marked.
+ */
+static u8
+iscsi_iser_check_protection(struct iscsi_task *task, sector_t *sector)
{
struct iscsi_iser_task *iser_task = task->dd_data;
@@ -318,8 +373,17 @@ static u8 iscsi_iser_check_protection(struct iscsi_task *task, sector_t *sector)
sector);
}
+/**
+ * iscsi_iser_conn_create() - create a new iscsi-iser connection
+ * @cls_session: iscsi class connection
+ * @conn_idx: connection index within the session (for MCS)
+ *
+ * Return: iscsi_cls_conn when iscsi_conn_setup succeeds or NULL
+ * otherwise.
+ */
static struct iscsi_cls_conn *
-iscsi_iser_conn_create(struct iscsi_cls_session *cls_session, uint32_t conn_idx)
+iscsi_iser_conn_create(struct iscsi_cls_session *cls_session,
+ uint32_t conn_idx)
{
struct iscsi_conn *conn;
struct iscsi_cls_conn *cls_conn;
@@ -338,13 +402,25 @@ iscsi_iser_conn_create(struct iscsi_cls_session *cls_session, uint32_t conn_idx)
return cls_conn;
}
+/**
+ * iscsi_iser_conn_bind() - bind iscsi and iser connection structures
+ * @cls_session: iscsi class session
+ * @cls_conn: iscsi class connection
+ * @transport_eph: transport end-point handle
+ * @is_leading: indicate if this is the session leading connection (MCS)
+ *
+ * Return: zero on success, $error if iscsi_conn_bind fails and
+ * -EINVAL in case end-point doesn't exsits anymore or iser connection
+ * state is not UP (teardown already started).
+ */
static int
iscsi_iser_conn_bind(struct iscsi_cls_session *cls_session,
- struct iscsi_cls_conn *cls_conn, uint64_t transport_eph,
+ struct iscsi_cls_conn *cls_conn,
+ uint64_t transport_eph,
int is_leading)
{
struct iscsi_conn *conn = cls_conn->dd_data;
- struct iser_conn *ib_conn;
+ struct iser_conn *iser_conn;
struct iscsi_endpoint *ep;
int error;
@@ -360,66 +436,100 @@ iscsi_iser_conn_bind(struct iscsi_cls_session *cls_session,
(unsigned long long)transport_eph);
return -EINVAL;
}
- ib_conn = ep->dd_data;
+ iser_conn = ep->dd_data;
- mutex_lock(&ib_conn->state_mutex);
- if (ib_conn->state != ISER_CONN_UP) {
+ mutex_lock(&iser_conn->state_mutex);
+ if (iser_conn->state != ISER_CONN_UP) {
error = -EINVAL;
iser_err("iser_conn %p state is %d, teardown started\n",
- ib_conn, ib_conn->state);
+ iser_conn, iser_conn->state);
goto out;
}
- error = iser_alloc_rx_descriptors(ib_conn, conn->session);
+ error = iser_alloc_rx_descriptors(iser_conn, conn->session);
if (error)
goto out;
/* binds the iSER connection retrieved from the previously
* connected ep_handle to the iSCSI layer connection. exchanges
* connection pointers */
- iser_info("binding iscsi conn %p to ib_conn %p\n", conn, ib_conn);
+ iser_info("binding iscsi conn %p to iser_conn %p\n", conn, iser_conn);
- conn->dd_data = ib_conn;
- ib_conn->iscsi_conn = conn;
+ conn->dd_data = iser_conn;
+ iser_conn->iscsi_conn = conn;
out:
- mutex_unlock(&ib_conn->state_mutex);
+ mutex_unlock(&iser_conn->state_mutex);
return error;
}
+/**
+ * iscsi_iser_conn_start() - start iscsi-iser connection
+ * @cls_conn: iscsi class connection
+ *
+ * Notes: Here iser intialize (or re-initialize) stop_completion as
+ * from this point iscsi must call conn_stop in session/connection
+ * teardown so iser transport must wait for it.
+ */
static int
iscsi_iser_conn_start(struct iscsi_cls_conn *cls_conn)
{
struct iscsi_conn *iscsi_conn;
- struct iser_conn *ib_conn;
+ struct iser_conn *iser_conn;
iscsi_conn = cls_conn->dd_data;
- ib_conn = iscsi_conn->dd_data;
- reinit_completion(&ib_conn->stop_completion);
+ iser_conn = iscsi_conn->dd_data;
+ reinit_completion(&iser_conn->stop_completion);
return iscsi_conn_start(cls_conn);
}
+/**
+ * iscsi_iser_conn_stop() - stop iscsi-iser connection
+ * @cls_conn: iscsi class connection
+ * @flag: indicate if recover or terminate (passed as is)
+ *
+ * Notes: Calling iscsi_conn_stop might theoretically race with
+ * DEVICE_REMOVAL event and dereference a previously freed RDMA device
+ * handle, so we call it under iser the state lock to protect against
+ * this kind of race.
+ */
static void
iscsi_iser_conn_stop(struct iscsi_cls_conn *cls_conn, int flag)
{
struct iscsi_conn *conn = cls_conn->dd_data;
- struct iser_conn *ib_conn = conn->dd_data;
+ struct iser_conn *iser_conn = conn->dd_data;
- iser_dbg("stopping iscsi_conn: %p, ib_conn: %p\n", conn, ib_conn);
- iscsi_conn_stop(cls_conn, flag);
+ iser_info("stopping iscsi_conn: %p, iser_conn: %p\n", conn, iser_conn);
/*
* Userspace may have goofed up and not bound the connection or
* might have only partially setup the connection.
*/
- if (ib_conn) {
+ if (iser_conn) {
+ mutex_lock(&iser_conn->state_mutex);
+ iscsi_conn_stop(cls_conn, flag);
+ iser_conn_terminate(iser_conn);
+
+ /* unbind */
+ iser_conn->iscsi_conn = NULL;
conn->dd_data = NULL;
- complete(&ib_conn->stop_completion);
+
+ complete(&iser_conn->stop_completion);
+ mutex_unlock(&iser_conn->state_mutex);
+ } else {
+ iscsi_conn_stop(cls_conn, flag);
}
}
-static void iscsi_iser_session_destroy(struct iscsi_cls_session *cls_session)
+/**
+ * iscsi_iser_session_destroy() - destroy iscsi-iser session
+ * @cls_session: iscsi class session
+ *
+ * Removes and free iscsi host.
+ */
+static void
+iscsi_iser_session_destroy(struct iscsi_cls_session *cls_session)
{
struct Scsi_Host *shost = iscsi_session_to_shost(cls_session);
@@ -439,6 +549,16 @@ iser_dif_prot_caps(int prot_caps)
SHOST_DIX_TYPE3_PROTECTION : 0);
}
+/**
+ * iscsi_iser_session_create() - create an iscsi-iser session
+ * @ep: iscsi end-point handle
+ * @cmds_max: maximum commands in this session
+ * @qdepth: session command queue depth
+ * @initial_cmdsn: initiator command sequnce number
+ *
+ * Allocates and adds a scsi host, expose DIF supprot if
+ * exists, and sets up an iscsi session.
+ */
static struct iscsi_cls_session *
iscsi_iser_session_create(struct iscsi_endpoint *ep,
uint16_t cmds_max, uint16_t qdepth,
@@ -447,7 +567,8 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep,
struct iscsi_cls_session *cls_session;
struct iscsi_session *session;
struct Scsi_Host *shost;
- struct iser_conn *ib_conn = NULL;
+ struct iser_conn *iser_conn = NULL;
+ struct ib_conn *ib_conn;
shost = iscsi_host_alloc(&iscsi_iser_sht, 0, 0);
if (!shost)
@@ -464,7 +585,8 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep,
* the leading conn's ep so this will be NULL;
*/
if (ep) {
- ib_conn = ep->dd_data;
+ iser_conn = ep->dd_data;
+ ib_conn = &iser_conn->ib_conn;
if (ib_conn->pi_support) {
u32 sig_caps = ib_conn->device->dev_attr.sig_prot_cap;
@@ -476,8 +598,8 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep,
}
}
- if (iscsi_host_add(shost,
- ep ? ib_conn->device->ib_device->dma_device : NULL))
+ if (iscsi_host_add(shost, ep ?
+ ib_conn->device->ib_device->dma_device : NULL))
goto free_host;
if (cmds_max > ISER_DEF_XMIT_CMDS_MAX) {
@@ -549,6 +671,13 @@ iscsi_iser_set_param(struct iscsi_cls_conn *cls_conn,
return 0;
}
+/**
+ * iscsi_iser_set_param() - set class connection parameter
+ * @cls_conn: iscsi class connection
+ * @stats: iscsi stats to output
+ *
+ * Output connection statistics.
+ */
static void
iscsi_iser_conn_get_stats(struct iscsi_cls_conn *cls_conn, struct iscsi_stats *stats)
{
@@ -577,18 +706,18 @@ iscsi_iser_conn_get_stats(struct iscsi_cls_conn *cls_conn, struct iscsi_stats *s
static int iscsi_iser_get_ep_param(struct iscsi_endpoint *ep,
enum iscsi_param param, char *buf)
{
- struct iser_conn *ib_conn = ep->dd_data;
+ struct iser_conn *iser_conn = ep->dd_data;
int len;
switch (param) {
case ISCSI_PARAM_CONN_PORT:
case ISCSI_PARAM_CONN_ADDRESS:
- if (!ib_conn || !ib_conn->cma_id)
+ if (!iser_conn || !iser_conn->ib_conn.cma_id)
return -ENOTCONN;
return iscsi_conn_get_addr_param((struct sockaddr_storage *)
- &ib_conn->cma_id->route.addr.dst_addr,
- param, buf);
+ &iser_conn->ib_conn.cma_id->route.addr.dst_addr,
+ param, buf);
break;
default:
return -ENOSYS;
@@ -597,29 +726,44 @@ static int iscsi_iser_get_ep_param(struct iscsi_endpoint *ep,
return len;
}
+/**
+ * iscsi_iser_ep_connect() - Initiate iSER connection establishment
+ * @shost: scsi_host
+ * @dst_addr: destination address
+ * @non-blocking: indicate if routine can block
+ *
+ * Allocate an iscsi endpoint, an iser_conn structure and bind them.
+ * After that start RDMA connection establishment via rdma_cm. We
+ * don't allocate iser_conn embedded in iscsi_endpoint since in teardown
+ * the endpoint will be destroyed at ep_disconnect while iser_conn will
+ * cleanup its resources asynchronuously.
+ *
+ * Return: iscsi_endpoint created by iscsi layer or ERR_PTR(error)
+ * if fails.
+ */
static struct iscsi_endpoint *
iscsi_iser_ep_connect(struct Scsi_Host *shost, struct sockaddr *dst_addr,
int non_blocking)
{
int err;
- struct iser_conn *ib_conn;
+ struct iser_conn *iser_conn;
struct iscsi_endpoint *ep;
ep = iscsi_create_endpoint(0);
if (!ep)
return ERR_PTR(-ENOMEM);
- ib_conn = kzalloc(sizeof(*ib_conn), GFP_KERNEL);
- if (!ib_conn) {
+ iser_conn = kzalloc(sizeof(*iser_conn), GFP_KERNEL);
+ if (!iser_conn) {
err = -ENOMEM;
goto failure;
}
- ep->dd_data = ib_conn;
- ib_conn->ep = ep;
- iser_conn_init(ib_conn);
+ ep->dd_data = iser_conn;
+ iser_conn->ep = ep;
+ iser_conn_init(iser_conn);
- err = iser_connect(ib_conn, NULL, dst_addr, non_blocking);
+ err = iser_connect(iser_conn, NULL, dst_addr, non_blocking);
if (err)
goto failure;
@@ -629,25 +773,38 @@ failure:
return ERR_PTR(err);
}
+/**
+ * iscsi_iser_ep_poll() - poll for iser connection establishment to complete
+ * @ep: iscsi endpoint (created at ep_connect)
+ * @timeout_ms: polling timeout allowed in ms.
+ *
+ * This routine boils down to waiting for up_completion signaling
+ * that cma_id got CONNECTED event.
+ *
+ * Return: 1 if succeeded in connection establishment, 0 if timeout expired
+ * (libiscsi will retry will kick in) or -1 if interrupted by signal
+ * or more likely iser connection state transitioned to TEMINATING or
+ * DOWN during the wait period.
+ */
static int
iscsi_iser_ep_poll(struct iscsi_endpoint *ep, int timeout_ms)
{
- struct iser_conn *ib_conn;
+ struct iser_conn *iser_conn;
int rc;
- ib_conn = ep->dd_data;
- rc = wait_for_completion_interruptible_timeout(&ib_conn->up_completion,
+ iser_conn = ep->dd_data;
+ rc = wait_for_completion_interruptible_timeout(&iser_conn->up_completion,
msecs_to_jiffies(timeout_ms));
/* if conn establishment failed, return error code to iscsi */
if (rc == 0) {
- mutex_lock(&ib_conn->state_mutex);
- if (ib_conn->state == ISER_CONN_TERMINATING ||
- ib_conn->state == ISER_CONN_DOWN)
+ mutex_lock(&iser_conn->state_mutex);
+ if (iser_conn->state == ISER_CONN_TERMINATING ||
+ iser_conn->state == ISER_CONN_DOWN)
rc = -1;
- mutex_unlock(&ib_conn->state_mutex);
+ mutex_unlock(&iser_conn->state_mutex);
}
- iser_info("ib conn %p rc = %d\n", ib_conn, rc);
+ iser_info("ib conn %p rc = %d\n", iser_conn, rc);
if (rc > 0)
return 1; /* success, this is the equivalent of POLLOUT */
@@ -657,15 +814,26 @@ iscsi_iser_ep_poll(struct iscsi_endpoint *ep, int timeout_ms)
return rc; /* signal */
}
+/**
+ * iscsi_iser_ep_disconnect() - Initiate connection teardown process
+ * @ep: iscsi endpoint handle
+ *
+ * This routine is not blocked by iser and RDMA termination process
+ * completion as we queue a deffered work for iser/RDMA destruction
+ * and cleanup or actually call it immediately in case we didn't pass
+ * iscsi conn bind/start stage, thus it is safe.
+ */
static void
iscsi_iser_ep_disconnect(struct iscsi_endpoint *ep)
{
- struct iser_conn *ib_conn;
+ struct iser_conn *iser_conn;
+
+ iser_conn = ep->dd_data;
+ iser_info("ep %p iser conn %p state %d\n",
+ ep, iser_conn, iser_conn->state);
- ib_conn = ep->dd_data;
- iser_info("ep %p ib conn %p state %d\n", ep, ib_conn, ib_conn->state);
- mutex_lock(&ib_conn->state_mutex);
- iser_conn_terminate(ib_conn);
+ mutex_lock(&iser_conn->state_mutex);
+ iser_conn_terminate(iser_conn);
/*
* if iser_conn and iscsi_conn are bound, we must wait for
@@ -673,14 +841,14 @@ iscsi_iser_ep_disconnect(struct iscsi_endpoint *ep)
* the iser resources. Otherwise we are safe to free resources
* immediately.
*/
- if (ib_conn->iscsi_conn) {
- INIT_WORK(&ib_conn->release_work, iser_release_work);
- queue_work(release_wq, &ib_conn->release_work);
- mutex_unlock(&ib_conn->state_mutex);
+ if (iser_conn->iscsi_conn) {
+ INIT_WORK(&iser_conn->release_work, iser_release_work);
+ queue_work(release_wq, &iser_conn->release_work);
+ mutex_unlock(&iser_conn->state_mutex);
} else {
- ib_conn->state = ISER_CONN_DOWN;
- mutex_unlock(&ib_conn->state_mutex);
- iser_conn_release(ib_conn);
+ iser_conn->state = ISER_CONN_DOWN;
+ mutex_unlock(&iser_conn->state_mutex);
+ iser_conn_release(iser_conn);
}
iscsi_destroy_endpoint(ep);
}
@@ -843,7 +1011,7 @@ register_transport_failure:
static void __exit iser_exit(void)
{
- struct iser_conn *ib_conn, *n;
+ struct iser_conn *iser_conn, *n;
int connlist_empty;
iser_dbg("Removing iSER datamover...\n");
@@ -856,8 +1024,9 @@ static void __exit iser_exit(void)
if (!connlist_empty) {
iser_err("Error cleanup stage completed but we still have iser "
"connections, destroying them anyway.\n");
- list_for_each_entry_safe(ib_conn, n, &ig.connlist, conn_list) {
- iser_conn_release(ib_conn);
+ list_for_each_entry_safe(iser_conn, n, &ig.connlist,
+ conn_list) {
+ iser_conn_release(iser_conn);
}
}
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h
index 9f0e0e34d6ca..cd4174ca9a76 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.h
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.h
@@ -69,39 +69,38 @@
#define DRV_NAME "iser"
#define PFX DRV_NAME ": "
-#define DRV_VER "1.4.1"
+#define DRV_VER "1.4.8"
-#define iser_dbg(fmt, arg...) \
- do { \
- if (iser_debug_level > 2) \
- printk(KERN_DEBUG PFX "%s:" fmt,\
- __func__ , ## arg); \
+#define iser_dbg(fmt, arg...) \
+ do { \
+ if (iser_debug_level > 2) \
+ printk(KERN_DEBUG PFX "%s: " fmt,\
+ __func__ , ## arg); \
} while (0)
#define iser_warn(fmt, arg...) \
do { \
if (iser_debug_level > 0) \
- pr_warn(PFX "%s:" fmt, \
+ pr_warn(PFX "%s: " fmt, \
__func__ , ## arg); \
} while (0)
#define iser_info(fmt, arg...) \
do { \
if (iser_debug_level > 1) \
- pr_info(PFX "%s:" fmt, \
+ pr_info(PFX "%s: " fmt, \
__func__ , ## arg); \
} while (0)
#define iser_err(fmt, arg...) \
do { \
- printk(KERN_ERR PFX "%s:" fmt, \
+ printk(KERN_ERR PFX "%s: " fmt, \
__func__ , ## arg); \
} while (0)
#define SHIFT_4K 12
#define SIZE_4K (1ULL << SHIFT_4K)
#define MASK_4K (~(SIZE_4K-1))
-
/* support up to 512KB in one RDMA */
#define ISCSI_ISER_SG_TABLESIZE (0x80000 >> SHIFT_4K)
#define ISER_DEF_XMIT_CMDS_DEFAULT 512
@@ -145,18 +144,32 @@
ISER_MAX_TX_MISC_PDUS + \
ISER_MAX_RX_MISC_PDUS)
+#define ISER_WC_BATCH_COUNT 16
+#define ISER_SIGNAL_CMD_COUNT 32
+
#define ISER_VER 0x10
#define ISER_WSV 0x08
#define ISER_RSV 0x04
#define ISER_FASTREG_LI_WRID 0xffffffffffffffffULL
+#define ISER_BEACON_WRID 0xfffffffffffffffeULL
+/**
+ * struct iser_hdr - iSER header
+ *
+ * @flags: flags support (zbva, remote_inv)
+ * @rsvd: reserved
+ * @write_stag: write rkey
+ * @write_va: write virtual address
+ * @reaf_stag: read rkey
+ * @read_va: read virtual address
+ */
struct iser_hdr {
u8 flags;
u8 rsvd[3];
- __be32 write_stag; /* write rkey */
+ __be32 write_stag;
__be64 write_va;
- __be32 read_stag; /* read rkey */
+ __be32 read_stag;
__be64 read_va;
} __attribute__((packed));
@@ -179,7 +192,7 @@ struct iser_cm_hdr {
/* Length of an object name string */
#define ISER_OBJECT_NAME_SIZE 64
-enum iser_ib_conn_state {
+enum iser_conn_state {
ISER_CONN_INIT, /* descriptor allocd, no conn */
ISER_CONN_PENDING, /* in the process of being established */
ISER_CONN_UP, /* up and running */
@@ -200,23 +213,42 @@ enum iser_data_dir {
ISER_DIRS_NUM
};
+/**
+ * struct iser_data_buf - iSER data buffer
+ *
+ * @buf: pointer to the sg list
+ * @size: num entries of this sg
+ * @data_len: total beffer byte len
+ * @dma_nents: returned by dma_map_sg
+ * @copy_buf: allocated copy buf for SGs unaligned
+ * for rdma which are copied
+ * @sg_single: SG-ified clone of a non SG SC or
+ * unaligned SG
+ */
struct iser_data_buf {
- void *buf; /* pointer to the sg list */
- unsigned int size; /* num entries of this sg */
- unsigned long data_len; /* total data len */
- unsigned int dma_nents; /* returned by dma_map_sg */
- char *copy_buf; /* allocated copy buf for SGs unaligned *
- * for rdma which are copied */
- struct scatterlist sg_single; /* SG-ified clone of a non SG SC or *
- * unaligned SG */
+ void *buf;
+ unsigned int size;
+ unsigned long data_len;
+ unsigned int dma_nents;
+ char *copy_buf;
+ struct scatterlist sg_single;
};
/* fwd declarations */
struct iser_device;
-struct iser_cq_desc;
struct iscsi_iser_task;
struct iscsi_endpoint;
+/**
+ * struct iser_mem_reg - iSER memory registration info
+ *
+ * @lkey: MR local key
+ * @rkey: MR remote key
+ * @va: MR start address (buffer va)
+ * @len: MR length
+ * @mem_h: pointer to registration context (FMR/Fastreg)
+ * @is_mr: indicates weather we registered the buffer
+ */
struct iser_mem_reg {
u32 lkey;
u32 rkey;
@@ -226,11 +258,20 @@ struct iser_mem_reg {
int is_mr;
};
+/**
+ * struct iser_regd_buf - iSER buffer registration desc
+ *
+ * @reg: memory registration info
+ * @virt_addr: virtual address of buffer
+ * @device: reference to iser device
+ * @direction: dma direction (for dma_unmap)
+ * @data_size: data buffer size in bytes
+ */
struct iser_regd_buf {
- struct iser_mem_reg reg; /* memory registration info */
+ struct iser_mem_reg reg;
void *virt_addr;
- struct iser_device *device; /* device->device for dma_unmap */
- enum dma_data_direction direction; /* direction for dma_unmap */
+ struct iser_device *device;
+ enum dma_data_direction direction;
unsigned int data_size;
};
@@ -240,19 +281,39 @@ enum iser_desc_type {
ISCSI_TX_DATAOUT
};
+/**
+ * struct iser_tx_desc - iSER TX descriptor (for send wr_id)
+ *
+ * @iser_header: iser header
+ * @iscsi_header: iscsi header
+ * @type: command/control/dataout
+ * @dam_addr: header buffer dma_address
+ * @tx_sg: sg[0] points to iser/iscsi headers
+ * sg[1] optionally points to either of immediate data
+ * unsolicited data-out or control
+ * @num_sge: number sges used on this TX task
+ */
struct iser_tx_desc {
struct iser_hdr iser_header;
struct iscsi_hdr iscsi_header;
enum iser_desc_type type;
u64 dma_addr;
- /* sg[0] points to iser/iscsi headers, sg[1] optionally points to either
- of immediate data, unsolicited data-out or control (login,text) */
struct ib_sge tx_sg[2];
int num_sge;
};
#define ISER_RX_PAD_SIZE (256 - (ISER_RX_PAYLOAD_SIZE + \
sizeof(u64) + sizeof(struct ib_sge)))
+/**
+ * struct iser_rx_desc - iSER RX descriptor (for recv wr_id)
+ *
+ * @iser_header: iser header
+ * @iscsi_header: iscsi header
+ * @data: received data segment
+ * @dma_addr: receive buffer dma address
+ * @rx_sg: ib_sge of receive buffer
+ * @pad: for sense data TODO: Modify to maximum sense length supported
+ */
struct iser_rx_desc {
struct iser_hdr iser_header;
struct iscsi_hdr iscsi_header;
@@ -265,25 +326,59 @@ struct iser_rx_desc {
#define ISER_MAX_CQ 4
struct iser_conn;
+struct ib_conn;
struct iscsi_iser_task;
+/**
+ * struct iser_comp - iSER completion context
+ *
+ * @device: pointer to device handle
+ * @cq: completion queue
+ * @wcs: work completion array
+ * @tasklet: Tasklet handle
+ * @active_qps: Number of active QPs attached
+ * to completion context
+ */
+struct iser_comp {
+ struct iser_device *device;
+ struct ib_cq *cq;
+ struct ib_wc wcs[ISER_WC_BATCH_COUNT];
+ struct tasklet_struct tasklet;
+ int active_qps;
+};
+
+/**
+ * struct iser_device - iSER device handle
+ *
+ * @ib_device: RDMA device
+ * @pd: Protection Domain for this device
+ * @dev_attr: Device attributes container
+ * @mr: Global DMA memory region
+ * @event_handler: IB events handle routine
+ * @ig_list: entry in devices list
+ * @refcount: Reference counter, dominated by open iser connections
+ * @comps_used: Number of completion contexts used, Min between online
+ * cpus and device max completion vectors
+ * @comps: Dinamically allocated array of completion handlers
+ * Memory registration pool Function pointers (FMR or Fastreg):
+ * @iser_alloc_rdma_reg_res: Allocation of memory regions pool
+ * @iser_free_rdma_reg_res: Free of memory regions pool
+ * @iser_reg_rdma_mem: Memory registration routine
+ * @iser_unreg_rdma_mem: Memory deregistration routine
+ */
struct iser_device {
struct ib_device *ib_device;
struct ib_pd *pd;
struct ib_device_attr dev_attr;
- struct ib_cq *rx_cq[ISER_MAX_CQ];
- struct ib_cq *tx_cq[ISER_MAX_CQ];
struct ib_mr *mr;
- struct tasklet_struct cq_tasklet[ISER_MAX_CQ];
struct ib_event_handler event_handler;
- struct list_head ig_list; /* entry in ig devices list */
+ struct list_head ig_list;
int refcount;
- int cq_active_qps[ISER_MAX_CQ];
- int cqs_used;
- struct iser_cq_desc *cq_desc;
- int (*iser_alloc_rdma_reg_res)(struct iser_conn *ib_conn,
+ int comps_used;
+ struct iser_comp comps[ISER_MAX_CQ];
+ int (*iser_alloc_rdma_reg_res)(struct ib_conn *ib_conn,
unsigned cmds_max);
- void (*iser_free_rdma_reg_res)(struct iser_conn *ib_conn);
+ void (*iser_free_rdma_reg_res)(struct ib_conn *ib_conn);
int (*iser_reg_rdma_mem)(struct iscsi_iser_task *iser_task,
enum iser_data_dir cmd_dir);
void (*iser_unreg_rdma_mem)(struct iscsi_iser_task *iser_task,
@@ -301,78 +396,160 @@ enum iser_reg_indicator {
ISER_FASTREG_PROTECTED = 1 << 3,
};
+/**
+ * struct iser_pi_context - Protection information context
+ *
+ * @prot_mr: protection memory region
+ * @prot_frpl: protection fastreg page list
+ * @sig_mr: signature feature enabled memory region
+ */
struct iser_pi_context {
struct ib_mr *prot_mr;
struct ib_fast_reg_page_list *prot_frpl;
struct ib_mr *sig_mr;
};
+/**
+ * struct fast_reg_descriptor - Fast registration descriptor
+ *
+ * @list: entry in connection fastreg pool
+ * @data_mr: data memory region
+ * @data_frpl: data fastreg page list
+ * @pi_ctx: protection information context
+ * @reg_indicators: fast registration indicators
+ */
struct fast_reg_descriptor {
struct list_head list;
- /* For fast registration - FRWR */
struct ib_mr *data_mr;
struct ib_fast_reg_page_list *data_frpl;
struct iser_pi_context *pi_ctx;
- /* registration indicators container */
u8 reg_indicators;
};
+/**
+ * struct ib_conn - Infiniband related objects
+ *
+ * @cma_id: rdma_cm connection maneger handle
+ * @qp: Connection Queue-pair
+ * @post_recv_buf_count: post receive counter
+ * @rx_wr: receive work request for batch posts
+ * @device: reference to iser device
+ * @comp: iser completion context
+ * @pi_support: Indicate device T10-PI support
+ * @beacon: beacon send wr to signal all flush errors were drained
+ * @flush_comp: completes when all connection completions consumed
+ * @lock: protects fmr/fastreg pool
+ * @union.fmr:
+ * @pool: FMR pool for fast registrations
+ * @page_vec: page vector to hold mapped commands pages
+ * used for registration
+ * @union.fastreg:
+ * @pool: Fast registration descriptors pool for fast
+ * registrations
+ * @pool_size: Size of pool
+ */
+struct ib_conn {
+ struct rdma_cm_id *cma_id;
+ struct ib_qp *qp;
+ int post_recv_buf_count;
+ struct ib_recv_wr rx_wr[ISER_MIN_POSTED_RX];
+ struct iser_device *device;
+ struct iser_comp *comp;
+ bool pi_support;
+ struct ib_send_wr beacon;
+ struct completion flush_comp;
+ spinlock_t lock;
+ union {
+ struct {
+ struct ib_fmr_pool *pool;
+ struct iser_page_vec *page_vec;
+ } fmr;
+ struct {
+ struct list_head pool;
+ int pool_size;
+ } fastreg;
+ };
+};
+
+/**
+ * struct iser_conn - iSER connection context
+ *
+ * @ib_conn: connection RDMA resources
+ * @iscsi_conn: link to matching iscsi connection
+ * @ep: transport handle
+ * @state: connection logical state
+ * @qp_max_recv_dtos: maximum number of data outs, corresponds
+ * to max number of post recvs
+ * @qp_max_recv_dtos_mask: (qp_max_recv_dtos - 1)
+ * @min_posted_rx: (qp_max_recv_dtos >> 2)
+ * @name: connection peer portal
+ * @release_work: deffered work for release job
+ * @state_mutex: protects iser onnection state
+ * @stop_completion: conn_stop completion
+ * @ib_completion: RDMA cleanup completion
+ * @up_completion: connection establishment completed
+ * (state is ISER_CONN_UP)
+ * @conn_list: entry in ig conn list
+ * @login_buf: login data buffer (stores login parameters)
+ * @login_req_buf: login request buffer
+ * @login_req_dma: login request buffer dma address
+ * @login_resp_buf: login response buffer
+ * @login_resp_dma: login response buffer dma address
+ * @rx_desc_head: head of rx_descs cyclic buffer
+ * @rx_descs: rx buffers array (cyclic buffer)
+ * @num_rx_descs: number of rx descriptors
+ */
struct iser_conn {
+ struct ib_conn ib_conn;
struct iscsi_conn *iscsi_conn;
struct iscsi_endpoint *ep;
- enum iser_ib_conn_state state; /* rdma connection state */
- atomic_t refcount;
- spinlock_t lock; /* used for state changes */
- struct iser_device *device; /* device context */
- struct rdma_cm_id *cma_id; /* CMA ID */
- struct ib_qp *qp; /* QP */
- unsigned qp_max_recv_dtos; /* num of rx buffers */
- unsigned qp_max_recv_dtos_mask; /* above minus 1 */
- unsigned min_posted_rx; /* qp_max_recv_dtos >> 2 */
- int post_recv_buf_count; /* posted rx count */
- atomic_t post_send_buf_count; /* posted tx count */
+ enum iser_conn_state state;
+ unsigned qp_max_recv_dtos;
+ unsigned qp_max_recv_dtos_mask;
+ unsigned min_posted_rx;
char name[ISER_OBJECT_NAME_SIZE];
struct work_struct release_work;
- struct completion stop_completion;
struct mutex state_mutex;
- struct completion flush_completion;
+ struct completion stop_completion;
+ struct completion ib_completion;
struct completion up_completion;
- struct list_head conn_list; /* entry in ig conn list */
+ struct list_head conn_list;
char *login_buf;
char *login_req_buf, *login_resp_buf;
u64 login_req_dma, login_resp_dma;
unsigned int rx_desc_head;
struct iser_rx_desc *rx_descs;
- struct ib_recv_wr rx_wr[ISER_MIN_POSTED_RX];
- bool pi_support;
-
- /* Connection memory registration pool */
- union {
- struct {
- struct ib_fmr_pool *pool; /* pool of IB FMRs */
- struct iser_page_vec *page_vec; /* represents SG to fmr maps*
- * maps serialized as tx is*/
- } fmr;
- struct {
- struct list_head pool;
- int pool_size;
- } fastreg;
- };
+ u32 num_rx_descs;
};
+/**
+ * struct iscsi_iser_task - iser task context
+ *
+ * @desc: TX descriptor
+ * @iser_conn: link to iser connection
+ * @status: current task status
+ * @sc: link to scsi command
+ * @command_sent: indicate if command was sent
+ * @dir: iser data direction
+ * @rdma_regd: task rdma registration desc
+ * @data: iser data buffer desc
+ * @data_copy: iser data copy buffer desc (bounce buffer)
+ * @prot: iser protection buffer desc
+ * @prot_copy: iser protection copy buffer desc (bounce buffer)
+ */
struct iscsi_iser_task {
struct iser_tx_desc desc;
- struct iser_conn *ib_conn;
+ struct iser_conn *iser_conn;
enum iser_task_status status;
struct scsi_cmnd *sc;
- int command_sent; /* set if command sent */
- int dir[ISER_DIRS_NUM]; /* set if dir use*/
- struct iser_regd_buf rdma_regd[ISER_DIRS_NUM];/* regd rdma buf */
- struct iser_data_buf data[ISER_DIRS_NUM]; /* orig. data des*/
- struct iser_data_buf data_copy[ISER_DIRS_NUM];/* contig. copy */
- struct iser_data_buf prot[ISER_DIRS_NUM]; /* prot desc */
- struct iser_data_buf prot_copy[ISER_DIRS_NUM];/* prot copy */
+ int command_sent;
+ int dir[ISER_DIRS_NUM];
+ struct iser_regd_buf rdma_regd[ISER_DIRS_NUM];
+ struct iser_data_buf data[ISER_DIRS_NUM];
+ struct iser_data_buf data_copy[ISER_DIRS_NUM];
+ struct iser_data_buf prot[ISER_DIRS_NUM];
+ struct iser_data_buf prot_copy[ISER_DIRS_NUM];
};
struct iser_page_vec {
@@ -382,17 +559,20 @@ struct iser_page_vec {
int data_size;
};
-struct iser_cq_desc {
- struct iser_device *device;
- int cq_index;
-};
-
+/**
+ * struct iser_global: iSER global context
+ *
+ * @device_list_mutex: protects device_list
+ * @device_list: iser devices global list
+ * @connlist_mutex: protects connlist
+ * @connlist: iser connections global list
+ * @desc_cache: kmem cache for tx dataout
+ */
struct iser_global {
- struct mutex device_list_mutex;/* */
- struct list_head device_list; /* all iSER devices */
+ struct mutex device_list_mutex;
+ struct list_head device_list;
struct mutex connlist_mutex;
- struct list_head connlist; /* all iSER IB connections */
-
+ struct list_head connlist;
struct kmem_cache *desc_cache;
};
@@ -401,9 +581,6 @@ extern int iser_debug_level;
extern bool iser_pi_enable;
extern int iser_pi_guard;
-/* allocate connection resources needed for rdma functionality */
-int iser_conn_set_full_featured_mode(struct iscsi_conn *conn);
-
int iser_send_control(struct iscsi_conn *conn,
struct iscsi_task *task);
@@ -415,29 +592,30 @@ int iser_send_data_out(struct iscsi_conn *conn,
struct iscsi_data *hdr);
void iscsi_iser_recv(struct iscsi_conn *conn,
- struct iscsi_hdr *hdr,
- char *rx_data,
- int rx_data_len);
+ struct iscsi_hdr *hdr,
+ char *rx_data,
+ int rx_data_len);
-void iser_conn_init(struct iser_conn *ib_conn);
+void iser_conn_init(struct iser_conn *iser_conn);
-void iser_conn_release(struct iser_conn *ib_conn);
+void iser_conn_release(struct iser_conn *iser_conn);
-void iser_conn_terminate(struct iser_conn *ib_conn);
+int iser_conn_terminate(struct iser_conn *iser_conn);
void iser_release_work(struct work_struct *work);
void iser_rcv_completion(struct iser_rx_desc *desc,
- unsigned long dto_xfer_len,
- struct iser_conn *ib_conn);
+ unsigned long dto_xfer_len,
+ struct ib_conn *ib_conn);
-void iser_snd_completion(struct iser_tx_desc *desc, struct iser_conn *ib_conn);
+void iser_snd_completion(struct iser_tx_desc *desc,
+ struct ib_conn *ib_conn);
void iser_task_rdma_init(struct iscsi_iser_task *task);
void iser_task_rdma_finalize(struct iscsi_iser_task *task);
-void iser_free_rx_descriptors(struct iser_conn *ib_conn);
+void iser_free_rx_descriptors(struct iser_conn *iser_conn);
void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *iser_task,
struct iser_data_buf *mem,
@@ -449,38 +627,40 @@ int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *task,
int iser_reg_rdma_mem_fastreg(struct iscsi_iser_task *task,
enum iser_data_dir cmd_dir);
-int iser_connect(struct iser_conn *ib_conn,
- struct sockaddr *src_addr,
- struct sockaddr *dst_addr,
- int non_blocking);
+int iser_connect(struct iser_conn *iser_conn,
+ struct sockaddr *src_addr,
+ struct sockaddr *dst_addr,
+ int non_blocking);
-int iser_reg_page_vec(struct iser_conn *ib_conn,
+int iser_reg_page_vec(struct ib_conn *ib_conn,
struct iser_page_vec *page_vec,
- struct iser_mem_reg *mem_reg);
+ struct iser_mem_reg *mem_reg);
void iser_unreg_mem_fmr(struct iscsi_iser_task *iser_task,
enum iser_data_dir cmd_dir);
void iser_unreg_mem_fastreg(struct iscsi_iser_task *iser_task,
enum iser_data_dir cmd_dir);
-int iser_post_recvl(struct iser_conn *ib_conn);
-int iser_post_recvm(struct iser_conn *ib_conn, int count);
-int iser_post_send(struct iser_conn *ib_conn, struct iser_tx_desc *tx_desc);
+int iser_post_recvl(struct iser_conn *iser_conn);
+int iser_post_recvm(struct iser_conn *iser_conn, int count);
+int iser_post_send(struct ib_conn *ib_conn, struct iser_tx_desc *tx_desc,
+ bool signal);
int iser_dma_map_task_data(struct iscsi_iser_task *iser_task,
- struct iser_data_buf *data,
- enum iser_data_dir iser_dir,
- enum dma_data_direction dma_dir);
+ struct iser_data_buf *data,
+ enum iser_data_dir iser_dir,
+ enum dma_data_direction dma_dir);
void iser_dma_unmap_task_data(struct iscsi_iser_task *iser_task,
struct iser_data_buf *data);
int iser_initialize_task_headers(struct iscsi_task *task,
struct iser_tx_desc *tx_desc);
-int iser_alloc_rx_descriptors(struct iser_conn *ib_conn, struct iscsi_session *session);
-int iser_create_fmr_pool(struct iser_conn *ib_conn, unsigned cmds_max);
-void iser_free_fmr_pool(struct iser_conn *ib_conn);
-int iser_create_fastreg_pool(struct iser_conn *ib_conn, unsigned cmds_max);
-void iser_free_fastreg_pool(struct iser_conn *ib_conn);
+int iser_alloc_rx_descriptors(struct iser_conn *iser_conn,
+ struct iscsi_session *session);
+int iser_create_fmr_pool(struct ib_conn *ib_conn, unsigned cmds_max);
+void iser_free_fmr_pool(struct ib_conn *ib_conn);
+int iser_create_fastreg_pool(struct ib_conn *ib_conn, unsigned cmds_max);
+void iser_free_fastreg_pool(struct ib_conn *ib_conn);
u8 iser_check_task_pi_status(struct iscsi_iser_task *iser_task,
enum iser_data_dir cmd_dir, sector_t *sector);
#endif
diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c
index 8d44a4060634..5a489ea63732 100644
--- a/drivers/infiniband/ulp/iser/iser_initiator.c
+++ b/drivers/infiniband/ulp/iser/iser_initiator.c
@@ -49,7 +49,7 @@ static int iser_prepare_read_cmd(struct iscsi_task *task)
{
struct iscsi_iser_task *iser_task = task->dd_data;
- struct iser_device *device = iser_task->ib_conn->device;
+ struct iser_device *device = iser_task->iser_conn->ib_conn.device;
struct iser_regd_buf *regd_buf;
int err;
struct iser_hdr *hdr = &iser_task->desc.iser_header;
@@ -103,7 +103,7 @@ iser_prepare_write_cmd(struct iscsi_task *task,
unsigned int edtl)
{
struct iscsi_iser_task *iser_task = task->dd_data;
- struct iser_device *device = iser_task->ib_conn->device;
+ struct iser_device *device = iser_task->iser_conn->ib_conn.device;
struct iser_regd_buf *regd_buf;
int err;
struct iser_hdr *hdr = &iser_task->desc.iser_header;
@@ -160,10 +160,10 @@ iser_prepare_write_cmd(struct iscsi_task *task,
}
/* creates a new tx descriptor and adds header regd buffer */
-static void iser_create_send_desc(struct iser_conn *ib_conn,
+static void iser_create_send_desc(struct iser_conn *iser_conn,
struct iser_tx_desc *tx_desc)
{
- struct iser_device *device = ib_conn->device;
+ struct iser_device *device = iser_conn->ib_conn.device;
ib_dma_sync_single_for_cpu(device->ib_device,
tx_desc->dma_addr, ISER_HEADERS_LEN, DMA_TO_DEVICE);
@@ -179,103 +179,108 @@ static void iser_create_send_desc(struct iser_conn *ib_conn,
}
}
-static void iser_free_login_buf(struct iser_conn *ib_conn)
+static void iser_free_login_buf(struct iser_conn *iser_conn)
{
- if (!ib_conn->login_buf)
+ struct iser_device *device = iser_conn->ib_conn.device;
+
+ if (!iser_conn->login_buf)
return;
- if (ib_conn->login_req_dma)
- ib_dma_unmap_single(ib_conn->device->ib_device,
- ib_conn->login_req_dma,
+ if (iser_conn->login_req_dma)
+ ib_dma_unmap_single(device->ib_device,
+ iser_conn->login_req_dma,
ISCSI_DEF_MAX_RECV_SEG_LEN, DMA_TO_DEVICE);
- if (ib_conn->login_resp_dma)
- ib_dma_unmap_single(ib_conn->device->ib_device,
- ib_conn->login_resp_dma,
+ if (iser_conn->login_resp_dma)
+ ib_dma_unmap_single(device->ib_device,
+ iser_conn->login_resp_dma,
ISER_RX_LOGIN_SIZE, DMA_FROM_DEVICE);
- kfree(ib_conn->login_buf);
+ kfree(iser_conn->login_buf);
/* make sure we never redo any unmapping */
- ib_conn->login_req_dma = 0;
- ib_conn->login_resp_dma = 0;
- ib_conn->login_buf = NULL;
+ iser_conn->login_req_dma = 0;
+ iser_conn->login_resp_dma = 0;
+ iser_conn->login_buf = NULL;
}
-static int iser_alloc_login_buf(struct iser_conn *ib_conn)
+static int iser_alloc_login_buf(struct iser_conn *iser_conn)
{
- struct iser_device *device;
+ struct iser_device *device = iser_conn->ib_conn.device;
int req_err, resp_err;
- BUG_ON(ib_conn->device == NULL);
+ BUG_ON(device == NULL);
- device = ib_conn->device;
-
- ib_conn->login_buf = kmalloc(ISCSI_DEF_MAX_RECV_SEG_LEN +
+ iser_conn->login_buf = kmalloc(ISCSI_DEF_MAX_RECV_SEG_LEN +
ISER_RX_LOGIN_SIZE, GFP_KERNEL);
- if (!ib_conn->login_buf)
+ if (!iser_conn->login_buf)
goto out_err;
- ib_conn->login_req_buf = ib_conn->login_buf;
- ib_conn->login_resp_buf = ib_conn->login_buf +
+ iser_conn->login_req_buf = iser_conn->login_buf;
+ iser_conn->login_resp_buf = iser_conn->login_buf +
ISCSI_DEF_MAX_RECV_SEG_LEN;
- ib_conn->login_req_dma = ib_dma_map_single(ib_conn->device->ib_device,
- (void *)ib_conn->login_req_buf,
- ISCSI_DEF_MAX_RECV_SEG_LEN, DMA_TO_DEVICE);
+ iser_conn->login_req_dma = ib_dma_map_single(device->ib_device,
+ iser_conn->login_req_buf,
+ ISCSI_DEF_MAX_RECV_SEG_LEN,
+ DMA_TO_DEVICE);
- ib_conn->login_resp_dma = ib_dma_map_single(ib_conn->device->ib_device,
- (void *)ib_conn->login_resp_buf,
- ISER_RX_LOGIN_SIZE, DMA_FROM_DEVICE);
+ iser_conn->login_resp_dma = ib_dma_map_single(device->ib_device,
+ iser_conn->login_resp_buf,
+ ISER_RX_LOGIN_SIZE,
+ DMA_FROM_DEVICE);
req_err = ib_dma_mapping_error(device->ib_device,
- ib_conn->login_req_dma);
+ iser_conn->login_req_dma);
resp_err = ib_dma_mapping_error(device->ib_device,
- ib_conn->login_resp_dma);
+ iser_conn->login_resp_dma);
if (req_err || resp_err) {
if (req_err)
- ib_conn->login_req_dma = 0;
+ iser_conn->login_req_dma = 0;
if (resp_err)
- ib_conn->login_resp_dma = 0;
+ iser_conn->login_resp_dma = 0;
goto free_login_buf;
}
return 0;
free_login_buf:
- iser_free_login_buf(ib_conn);
+ iser_free_login_buf(iser_conn);
out_err:
iser_err("unable to alloc or map login buf\n");
return -ENOMEM;
}
-int iser_alloc_rx_descriptors(struct iser_conn *ib_conn, struct iscsi_session *session)
+int iser_alloc_rx_descriptors(struct iser_conn *iser_conn,
+ struct iscsi_session *session)
{
int i, j;
u64 dma_addr;
struct iser_rx_desc *rx_desc;
struct ib_sge *rx_sg;
- struct iser_device *device = ib_conn->device;
+ struct ib_conn *ib_conn = &iser_conn->ib_conn;
+ struct iser_device *device = ib_conn->device;
- ib_conn->qp_max_recv_dtos = session->cmds_max;
- ib_conn->qp_max_recv_dtos_mask = session->cmds_max - 1; /* cmds_max is 2^N */
- ib_conn->min_posted_rx = ib_conn->qp_max_recv_dtos >> 2;
+ iser_conn->qp_max_recv_dtos = session->cmds_max;
+ iser_conn->qp_max_recv_dtos_mask = session->cmds_max - 1; /* cmds_max is 2^N */
+ iser_conn->min_posted_rx = iser_conn->qp_max_recv_dtos >> 2;
if (device->iser_alloc_rdma_reg_res(ib_conn, session->scsi_cmds_max))
goto create_rdma_reg_res_failed;
- if (iser_alloc_login_buf(ib_conn))
+ if (iser_alloc_login_buf(iser_conn))
goto alloc_login_buf_fail;
- ib_conn->rx_descs = kmalloc(session->cmds_max *
+ iser_conn->num_rx_descs = session->cmds_max;
+ iser_conn->rx_descs = kmalloc(iser_conn->num_rx_descs *
sizeof(struct iser_rx_desc), GFP_KERNEL);
- if (!ib_conn->rx_descs)
+ if (!iser_conn->rx_descs)
goto rx_desc_alloc_fail;
- rx_desc = ib_conn->rx_descs;
+ rx_desc = iser_conn->rx_descs;
- for (i = 0; i < ib_conn->qp_max_recv_dtos; i++, rx_desc++) {
+ for (i = 0; i < iser_conn->qp_max_recv_dtos; i++, rx_desc++) {
dma_addr = ib_dma_map_single(device->ib_device, (void *)rx_desc,
ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE);
if (ib_dma_mapping_error(device->ib_device, dma_addr))
@@ -289,18 +294,18 @@ int iser_alloc_rx_descriptors(struct iser_conn *ib_conn, struct iscsi_session *s
rx_sg->lkey = device->mr->lkey;
}
- ib_conn->rx_desc_head = 0;
+ iser_conn->rx_desc_head = 0;
return 0;
rx_desc_dma_map_failed:
- rx_desc = ib_conn->rx_descs;
+ rx_desc = iser_conn->rx_descs;
for (j = 0; j < i; j++, rx_desc++)
ib_dma_unmap_single(device->ib_device, rx_desc->dma_addr,
ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE);
- kfree(ib_conn->rx_descs);
- ib_conn->rx_descs = NULL;
+ kfree(iser_conn->rx_descs);
+ iser_conn->rx_descs = NULL;
rx_desc_alloc_fail:
- iser_free_login_buf(ib_conn);
+ iser_free_login_buf(iser_conn);
alloc_login_buf_fail:
device->iser_free_rdma_reg_res(ib_conn);
create_rdma_reg_res_failed:
@@ -308,33 +313,35 @@ create_rdma_reg_res_failed:
return -ENOMEM;
}
-void iser_free_rx_descriptors(struct iser_conn *ib_conn)
+void iser_free_rx_descriptors(struct iser_conn *iser_conn)
{
int i;
struct iser_rx_desc *rx_desc;
+ struct ib_conn *ib_conn = &iser_conn->ib_conn;
struct iser_device *device = ib_conn->device;
- if (!ib_conn->rx_descs)
+ if (!iser_conn->rx_descs)
goto free_login_buf;
if (device->iser_free_rdma_reg_res)
device->iser_free_rdma_reg_res(ib_conn);
- rx_desc = ib_conn->rx_descs;
- for (i = 0; i < ib_conn->qp_max_recv_dtos; i++, rx_desc++)
+ rx_desc = iser_conn->rx_descs;
+ for (i = 0; i < iser_conn->qp_max_recv_dtos; i++, rx_desc++)
ib_dma_unmap_single(device->ib_device, rx_desc->dma_addr,
ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE);
- kfree(ib_conn->rx_descs);
+ kfree(iser_conn->rx_descs);
/* make sure we never redo any unmapping */
- ib_conn->rx_descs = NULL;
+ iser_conn->rx_descs = NULL;
free_login_buf:
- iser_free_login_buf(ib_conn);
+ iser_free_login_buf(iser_conn);
}
static int iser_post_rx_bufs(struct iscsi_conn *conn, struct iscsi_hdr *req)
{
- struct iser_conn *ib_conn = conn->dd_data;
+ struct iser_conn *iser_conn = conn->dd_data;
+ struct ib_conn *ib_conn = &iser_conn->ib_conn;
struct iscsi_session *session = conn->session;
iser_dbg("req op %x flags %x\n", req->opcode, req->flags);
@@ -343,34 +350,37 @@ static int iser_post_rx_bufs(struct iscsi_conn *conn, struct iscsi_hdr *req)
return 0;
/*
- * Check that there is one posted recv buffer (for the last login
- * response) and no posted send buffers left - they must have been
- * consumed during previous login phases.
+ * Check that there is one posted recv buffer
+ * (for the last login response).
*/
WARN_ON(ib_conn->post_recv_buf_count != 1);
- WARN_ON(atomic_read(&ib_conn->post_send_buf_count) != 0);
if (session->discovery_sess) {
iser_info("Discovery session, re-using login RX buffer\n");
return 0;
} else
iser_info("Normal session, posting batch of RX %d buffers\n",
- ib_conn->min_posted_rx);
+ iser_conn->min_posted_rx);
/* Initial post receive buffers */
- if (iser_post_recvm(ib_conn, ib_conn->min_posted_rx))
+ if (iser_post_recvm(iser_conn, iser_conn->min_posted_rx))
return -ENOMEM;
return 0;
}
+static inline bool iser_signal_comp(int sig_count)
+{
+ return ((sig_count % ISER_SIGNAL_CMD_COUNT) == 0);
+}
+
/**
* iser_send_command - send command PDU
*/
int iser_send_command(struct iscsi_conn *conn,
struct iscsi_task *task)
{
- struct iser_conn *ib_conn = conn->dd_data;
+ struct iser_conn *iser_conn = conn->dd_data;
struct iscsi_iser_task *iser_task = task->dd_data;
unsigned long edtl;
int err;
@@ -378,12 +388,13 @@ int iser_send_command(struct iscsi_conn *conn,
struct iscsi_scsi_req *hdr = (struct iscsi_scsi_req *)task->hdr;
struct scsi_cmnd *sc = task->sc;
struct iser_tx_desc *tx_desc = &iser_task->desc;
+ static unsigned sig_count;
edtl = ntohl(hdr->data_length);
/* build the tx desc regd header and add it to the tx desc dto */
tx_desc->type = ISCSI_TX_SCSI_COMMAND;
- iser_create_send_desc(ib_conn, tx_desc);
+ iser_create_send_desc(iser_conn, tx_desc);
if (hdr->flags & ISCSI_FLAG_CMD_READ) {
data_buf = &iser_task->data[ISER_DIR_IN];
@@ -423,7 +434,8 @@ int iser_send_command(struct iscsi_conn *conn,
iser_task->status = ISER_TASK_STATUS_STARTED;
- err = iser_post_send(ib_conn, tx_desc);
+ err = iser_post_send(&iser_conn->ib_conn, tx_desc,
+ iser_signal_comp(++sig_count));
if (!err)
return 0;
@@ -439,7 +451,7 @@ int iser_send_data_out(struct iscsi_conn *conn,
struct iscsi_task *task,
struct iscsi_data *hdr)
{
- struct iser_conn *ib_conn = conn->dd_data;
+ struct iser_conn *iser_conn = conn->dd_data;
struct iscsi_iser_task *iser_task = task->dd_data;
struct iser_tx_desc *tx_desc = NULL;
struct iser_regd_buf *regd_buf;
@@ -488,7 +500,7 @@ int iser_send_data_out(struct iscsi_conn *conn,
itt, buf_offset, data_seg_len);
- err = iser_post_send(ib_conn, tx_desc);
+ err = iser_post_send(&iser_conn->ib_conn, tx_desc, true);
if (!err)
return 0;
@@ -501,7 +513,7 @@ send_data_out_error:
int iser_send_control(struct iscsi_conn *conn,
struct iscsi_task *task)
{
- struct iser_conn *ib_conn = conn->dd_data;
+ struct iser_conn *iser_conn = conn->dd_data;
struct iscsi_iser_task *iser_task = task->dd_data;
struct iser_tx_desc *mdesc = &iser_task->desc;
unsigned long data_seg_len;
@@ -510,9 +522,9 @@ int iser_send_control(struct iscsi_conn *conn,
/* build the tx desc regd header and add it to the tx desc dto */
mdesc->type = ISCSI_TX_CONTROL;
- iser_create_send_desc(ib_conn, mdesc);
+ iser_create_send_desc(iser_conn, mdesc);
- device = ib_conn->device;
+ device = iser_conn->ib_conn.device;
data_seg_len = ntoh24(task->hdr->dlength);
@@ -524,16 +536,16 @@ int iser_send_control(struct iscsi_conn *conn,
}
ib_dma_sync_single_for_cpu(device->ib_device,
- ib_conn->login_req_dma, task->data_count,
+ iser_conn->login_req_dma, task->data_count,
DMA_TO_DEVICE);
- memcpy(ib_conn->login_req_buf, task->data, task->data_count);
+ memcpy(iser_conn->login_req_buf, task->data, task->data_count);
ib_dma_sync_single_for_device(device->ib_device,
- ib_conn->login_req_dma, task->data_count,
+ iser_conn->login_req_dma, task->data_count,
DMA_TO_DEVICE);
- tx_dsg->addr = ib_conn->login_req_dma;
+ tx_dsg->addr = iser_conn->login_req_dma;
tx_dsg->length = task->data_count;
tx_dsg->lkey = device->mr->lkey;
mdesc->num_sge = 2;
@@ -542,7 +554,7 @@ int iser_send_control(struct iscsi_conn *conn,
if (task == conn->login_task) {
iser_dbg("op %x dsl %lx, posting login rx buffer\n",
task->hdr->opcode, data_seg_len);
- err = iser_post_recvl(ib_conn);
+ err = iser_post_recvl(iser_conn);
if (err)
goto send_control_error;
err = iser_post_rx_bufs(conn, task->hdr);
@@ -550,7 +562,7 @@ int iser_send_control(struct iscsi_conn *conn,
goto send_control_error;
}
- err = iser_post_send(ib_conn, mdesc);
+ err = iser_post_send(&iser_conn->ib_conn, mdesc, true);
if (!err)
return 0;
@@ -564,15 +576,17 @@ send_control_error:
*/
void iser_rcv_completion(struct iser_rx_desc *rx_desc,
unsigned long rx_xfer_len,
- struct iser_conn *ib_conn)
+ struct ib_conn *ib_conn)
{
+ struct iser_conn *iser_conn = container_of(ib_conn, struct iser_conn,
+ ib_conn);
struct iscsi_hdr *hdr;
u64 rx_dma;
int rx_buflen, outstanding, count, err;
/* differentiate between login to all other PDUs */
- if ((char *)rx_desc == ib_conn->login_resp_buf) {
- rx_dma = ib_conn->login_resp_dma;
+ if ((char *)rx_desc == iser_conn->login_resp_buf) {
+ rx_dma = iser_conn->login_resp_dma;
rx_buflen = ISER_RX_LOGIN_SIZE;
} else {
rx_dma = rx_desc->dma_addr;
@@ -580,14 +594,14 @@ void iser_rcv_completion(struct iser_rx_desc *rx_desc,
}
ib_dma_sync_single_for_cpu(ib_conn->device->ib_device, rx_dma,
- rx_buflen, DMA_FROM_DEVICE);
+ rx_buflen, DMA_FROM_DEVICE);
hdr = &rx_desc->iscsi_header;
iser_dbg("op 0x%x itt 0x%x dlen %d\n", hdr->opcode,
hdr->itt, (int)(rx_xfer_len - ISER_HEADERS_LEN));
- iscsi_iser_recv(ib_conn->iscsi_conn, hdr, rx_desc->data,
+ iscsi_iser_recv(iser_conn->iscsi_conn, hdr, rx_desc->data,
rx_xfer_len - ISER_HEADERS_LEN);
ib_dma_sync_single_for_device(ib_conn->device->ib_device, rx_dma,
@@ -599,21 +613,21 @@ void iser_rcv_completion(struct iser_rx_desc *rx_desc,
* for the posted rx bufs refcount to become zero handles everything */
ib_conn->post_recv_buf_count--;
- if (rx_dma == ib_conn->login_resp_dma)
+ if (rx_dma == iser_conn->login_resp_dma)
return;
outstanding = ib_conn->post_recv_buf_count;
- if (outstanding + ib_conn->min_posted_rx <= ib_conn->qp_max_recv_dtos) {
- count = min(ib_conn->qp_max_recv_dtos - outstanding,
- ib_conn->min_posted_rx);
- err = iser_post_recvm(ib_conn, count);
+ if (outstanding + iser_conn->min_posted_rx <= iser_conn->qp_max_recv_dtos) {
+ count = min(iser_conn->qp_max_recv_dtos - outstanding,
+ iser_conn->min_posted_rx);
+ err = iser_post_recvm(iser_conn, count);
if (err)
iser_err("posting %d rx bufs err %d\n", count, err);
}
}
void iser_snd_completion(struct iser_tx_desc *tx_desc,
- struct iser_conn *ib_conn)
+ struct ib_conn *ib_conn)
{
struct iscsi_task *task;
struct iser_device *device = ib_conn->device;
@@ -625,8 +639,6 @@ void iser_snd_completion(struct iser_tx_desc *tx_desc,
tx_desc = NULL;
}
- atomic_dec(&ib_conn->post_send_buf_count);
-
if (tx_desc && tx_desc->type == ISCSI_TX_CONTROL) {
/* this arithmetic is legal by libiscsi dd_data allocation */
task = (void *) ((long)(void *)tx_desc -
@@ -658,7 +670,7 @@ void iser_task_rdma_init(struct iscsi_iser_task *iser_task)
void iser_task_rdma_finalize(struct iscsi_iser_task *iser_task)
{
- struct iser_device *device = iser_task->ib_conn->device;
+ struct iser_device *device = iser_task->iser_conn->ib_conn.device;
int is_rdma_data_aligned = 1;
int is_rdma_prot_aligned = 1;
int prot_count = scsi_prot_sg_count(iser_task->sc);
diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c
index 47acd3ad3a17..6c5ce357fba6 100644
--- a/drivers/infiniband/ulp/iser/iser_memory.c
+++ b/drivers/infiniband/ulp/iser/iser_memory.c
@@ -49,7 +49,7 @@ static int iser_start_rdma_unaligned_sg(struct iscsi_iser_task *iser_task,
struct iser_data_buf *data_copy,
enum iser_data_dir cmd_dir)
{
- struct ib_device *dev = iser_task->ib_conn->device->ib_device;
+ struct ib_device *dev = iser_task->iser_conn->ib_conn.device->ib_device;
struct scatterlist *sgl = (struct scatterlist *)data->buf;
struct scatterlist *sg;
char *mem = NULL;
@@ -116,7 +116,7 @@ void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *iser_task,
struct ib_device *dev;
unsigned long cmd_data_len;
- dev = iser_task->ib_conn->device->ib_device;
+ dev = iser_task->iser_conn->ib_conn.device->ib_device;
ib_dma_unmap_sg(dev, &data_copy->sg_single, 1,
(cmd_dir == ISER_DIR_OUT) ?
@@ -322,7 +322,7 @@ int iser_dma_map_task_data(struct iscsi_iser_task *iser_task,
struct ib_device *dev;
iser_task->dir[iser_dir] = 1;
- dev = iser_task->ib_conn->device->ib_device;
+ dev = iser_task->iser_conn->ib_conn.device->ib_device;
data->dma_nents = ib_dma_map_sg(dev, data->buf, data->size, dma_dir);
if (data->dma_nents == 0) {
@@ -337,7 +337,7 @@ void iser_dma_unmap_task_data(struct iscsi_iser_task *iser_task,
{
struct ib_device *dev;
- dev = iser_task->ib_conn->device->ib_device;
+ dev = iser_task->iser_conn->ib_conn.device->ib_device;
ib_dma_unmap_sg(dev, data->buf, data->size, DMA_FROM_DEVICE);
}
@@ -348,7 +348,7 @@ static int fall_to_bounce_buf(struct iscsi_iser_task *iser_task,
enum iser_data_dir cmd_dir,
int aligned_len)
{
- struct iscsi_conn *iscsi_conn = iser_task->ib_conn->iscsi_conn;
+ struct iscsi_conn *iscsi_conn = iser_task->iser_conn->iscsi_conn;
iscsi_conn->fmr_unalign_cnt++;
iser_warn("rdma alignment violation (%d/%d aligned) or FMR not supported\n",
@@ -377,7 +377,7 @@ static int fall_to_bounce_buf(struct iscsi_iser_task *iser_task,
int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *iser_task,
enum iser_data_dir cmd_dir)
{
- struct iser_conn *ib_conn = iser_task->ib_conn;
+ struct ib_conn *ib_conn = &iser_task->iser_conn->ib_conn;
struct iser_device *device = ib_conn->device;
struct ib_device *ibdev = device->ib_device;
struct iser_data_buf *mem = &iser_task->data[cmd_dir];
@@ -432,7 +432,7 @@ int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *iser_task,
ib_conn->fmr.page_vec->offset);
for (i = 0; i < ib_conn->fmr.page_vec->length; i++)
iser_err("page_vec[%d] = 0x%llx\n", i,
- (unsigned long long) ib_conn->fmr.page_vec->pages[i]);
+ (unsigned long long)ib_conn->fmr.page_vec->pages[i]);
}
if (err)
return err;
@@ -440,77 +440,74 @@ int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *iser_task,
return 0;
}
-static inline enum ib_t10_dif_type
-scsi2ib_prot_type(unsigned char prot_type)
+static inline void
+iser_set_dif_domain(struct scsi_cmnd *sc, struct ib_sig_attrs *sig_attrs,
+ struct ib_sig_domain *domain)
{
- switch (prot_type) {
- case SCSI_PROT_DIF_TYPE0:
- return IB_T10DIF_NONE;
- case SCSI_PROT_DIF_TYPE1:
- return IB_T10DIF_TYPE1;
- case SCSI_PROT_DIF_TYPE2:
- return IB_T10DIF_TYPE2;
- case SCSI_PROT_DIF_TYPE3:
- return IB_T10DIF_TYPE3;
- default:
- return IB_T10DIF_NONE;
- }
-}
-
+ domain->sig_type = IB_SIG_TYPE_T10_DIF;
+ domain->sig.dif.pi_interval = sc->device->sector_size;
+ domain->sig.dif.ref_tag = scsi_get_lba(sc) & 0xffffffff;
+ /*
+ * At the moment we hard code those, but in the future
+ * we will take them from sc.
+ */
+ domain->sig.dif.apptag_check_mask = 0xffff;
+ domain->sig.dif.app_escape = true;
+ domain->sig.dif.ref_escape = true;
+ if (scsi_get_prot_type(sc) == SCSI_PROT_DIF_TYPE1 ||
+ scsi_get_prot_type(sc) == SCSI_PROT_DIF_TYPE2)
+ domain->sig.dif.ref_remap = true;
+};
static int
iser_set_sig_attrs(struct scsi_cmnd *sc, struct ib_sig_attrs *sig_attrs)
{
- unsigned char scsi_ptype = scsi_get_prot_type(sc);
-
- sig_attrs->mem.sig_type = IB_SIG_TYPE_T10_DIF;
- sig_attrs->wire.sig_type = IB_SIG_TYPE_T10_DIF;
- sig_attrs->mem.sig.dif.pi_interval = sc->device->sector_size;
- sig_attrs->wire.sig.dif.pi_interval = sc->device->sector_size;
-
switch (scsi_get_prot_op(sc)) {
case SCSI_PROT_WRITE_INSERT:
case SCSI_PROT_READ_STRIP:
- sig_attrs->mem.sig.dif.type = IB_T10DIF_NONE;
- sig_attrs->wire.sig.dif.type = scsi2ib_prot_type(scsi_ptype);
+ sig_attrs->mem.sig_type = IB_SIG_TYPE_NONE;
+ iser_set_dif_domain(sc, sig_attrs, &sig_attrs->wire);
sig_attrs->wire.sig.dif.bg_type = IB_T10DIF_CRC;
- sig_attrs->wire.sig.dif.ref_tag = scsi_get_lba(sc) &
- 0xffffffff;
break;
case SCSI_PROT_READ_INSERT:
case SCSI_PROT_WRITE_STRIP:
- sig_attrs->mem.sig.dif.type = scsi2ib_prot_type(scsi_ptype);
- sig_attrs->mem.sig.dif.bg_type = IB_T10DIF_CRC;
- sig_attrs->mem.sig.dif.ref_tag = scsi_get_lba(sc) &
- 0xffffffff;
- sig_attrs->wire.sig.dif.type = IB_T10DIF_NONE;
+ sig_attrs->wire.sig_type = IB_SIG_TYPE_NONE;
+ iser_set_dif_domain(sc, sig_attrs, &sig_attrs->mem);
+ /*
+ * At the moment we use this modparam to tell what is
+ * the memory bg_type, in the future we will take it
+ * from sc.
+ */
+ sig_attrs->mem.sig.dif.bg_type = iser_pi_guard ? IB_T10DIF_CSUM :
+ IB_T10DIF_CRC;
break;
case SCSI_PROT_READ_PASS:
case SCSI_PROT_WRITE_PASS:
- sig_attrs->mem.sig.dif.type = scsi2ib_prot_type(scsi_ptype);
- sig_attrs->mem.sig.dif.bg_type = IB_T10DIF_CRC;
- sig_attrs->mem.sig.dif.ref_tag = scsi_get_lba(sc) &
- 0xffffffff;
- sig_attrs->wire.sig.dif.type = scsi2ib_prot_type(scsi_ptype);
+ iser_set_dif_domain(sc, sig_attrs, &sig_attrs->wire);
sig_attrs->wire.sig.dif.bg_type = IB_T10DIF_CRC;
- sig_attrs->wire.sig.dif.ref_tag = scsi_get_lba(sc) &
- 0xffffffff;
+ iser_set_dif_domain(sc, sig_attrs, &sig_attrs->mem);
+ /*
+ * At the moment we use this modparam to tell what is
+ * the memory bg_type, in the future we will take it
+ * from sc.
+ */
+ sig_attrs->mem.sig.dif.bg_type = iser_pi_guard ? IB_T10DIF_CSUM :
+ IB_T10DIF_CRC;
break;
default:
iser_err("Unsupported PI operation %d\n",
scsi_get_prot_op(sc));
return -EINVAL;
}
+
return 0;
}
-
static int
iser_set_prot_checks(struct scsi_cmnd *sc, u8 *mask)
{
switch (scsi_get_prot_type(sc)) {
case SCSI_PROT_DIF_TYPE0:
- *mask = 0x0;
break;
case SCSI_PROT_DIF_TYPE1:
case SCSI_PROT_DIF_TYPE2:
@@ -533,7 +530,7 @@ iser_reg_sig_mr(struct iscsi_iser_task *iser_task,
struct fast_reg_descriptor *desc, struct ib_sge *data_sge,
struct ib_sge *prot_sge, struct ib_sge *sig_sge)
{
- struct iser_conn *ib_conn = iser_task->ib_conn;
+ struct ib_conn *ib_conn = &iser_task->iser_conn->ib_conn;
struct iser_pi_context *pi_ctx = desc->pi_ctx;
struct ib_send_wr sig_wr, inv_wr;
struct ib_send_wr *bad_wr, *wr = NULL;
@@ -609,7 +606,7 @@ static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task,
struct ib_sge *sge)
{
struct fast_reg_descriptor *desc = regd_buf->reg.mem_h;
- struct iser_conn *ib_conn = iser_task->ib_conn;
+ struct ib_conn *ib_conn = &iser_task->iser_conn->ib_conn;
struct iser_device *device = ib_conn->device;
struct ib_device *ibdev = device->ib_device;
struct ib_mr *mr;
@@ -700,7 +697,7 @@ static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task,
int iser_reg_rdma_mem_fastreg(struct iscsi_iser_task *iser_task,
enum iser_data_dir cmd_dir)
{
- struct iser_conn *ib_conn = iser_task->ib_conn;
+ struct ib_conn *ib_conn = &iser_task->iser_conn->ib_conn;
struct iser_device *device = ib_conn->device;
struct ib_device *ibdev = device->ib_device;
struct iser_data_buf *mem = &iser_task->data[cmd_dir];
diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c
index 3bfec4bbda52..67225bb82bb5 100644
--- a/drivers/infiniband/ulp/iser/iser_verbs.c
+++ b/drivers/infiniband/ulp/iser/iser_verbs.c
@@ -39,8 +39,12 @@
#include "iscsi_iser.h"
#define ISCSI_ISER_MAX_CONN 8
-#define ISER_MAX_RX_CQ_LEN (ISER_QP_MAX_RECV_DTOS * ISCSI_ISER_MAX_CONN)
-#define ISER_MAX_TX_CQ_LEN (ISER_QP_MAX_REQ_DTOS * ISCSI_ISER_MAX_CONN)
+#define ISER_MAX_RX_LEN (ISER_QP_MAX_RECV_DTOS * ISCSI_ISER_MAX_CONN)
+#define ISER_MAX_TX_LEN (ISER_QP_MAX_REQ_DTOS * ISCSI_ISER_MAX_CONN)
+#define ISER_MAX_CQ_LEN (ISER_MAX_RX_LEN + ISER_MAX_TX_LEN + \
+ ISCSI_ISER_MAX_CONN)
+
+static int iser_cq_poll_limit = 512;
static void iser_cq_tasklet_fn(unsigned long data);
static void iser_cq_callback(struct ib_cq *cq, void *cq_context);
@@ -71,7 +75,6 @@ static void iser_event_handler(struct ib_event_handler *handler,
*/
static int iser_create_device_ib_res(struct iser_device *device)
{
- struct iser_cq_desc *cq_desc;
struct ib_device_attr *dev_attr = &device->dev_attr;
int ret, i;
@@ -101,51 +104,35 @@ static int iser_create_device_ib_res(struct iser_device *device)
return -1;
}
- device->cqs_used = min(ISER_MAX_CQ, device->ib_device->num_comp_vectors);
+ device->comps_used = min(ISER_MAX_CQ,
+ device->ib_device->num_comp_vectors);
iser_info("using %d CQs, device %s supports %d vectors\n",
- device->cqs_used, device->ib_device->name,
+ device->comps_used, device->ib_device->name,
device->ib_device->num_comp_vectors);
- device->cq_desc = kmalloc(sizeof(struct iser_cq_desc) * device->cqs_used,
- GFP_KERNEL);
- if (device->cq_desc == NULL)
- goto cq_desc_err;
- cq_desc = device->cq_desc;
-
device->pd = ib_alloc_pd(device->ib_device);
if (IS_ERR(device->pd))
goto pd_err;
- for (i = 0; i < device->cqs_used; i++) {
- cq_desc[i].device = device;
- cq_desc[i].cq_index = i;
-
- device->rx_cq[i] = ib_create_cq(device->ib_device,
- iser_cq_callback,
- iser_cq_event_callback,
- (void *)&cq_desc[i],
- ISER_MAX_RX_CQ_LEN, i);
- if (IS_ERR(device->rx_cq[i])) {
- device->rx_cq[i] = NULL;
+ for (i = 0; i < device->comps_used; i++) {
+ struct iser_comp *comp = &device->comps[i];
+
+ comp->device = device;
+ comp->cq = ib_create_cq(device->ib_device,
+ iser_cq_callback,
+ iser_cq_event_callback,
+ (void *)comp,
+ ISER_MAX_CQ_LEN, i);
+ if (IS_ERR(comp->cq)) {
+ comp->cq = NULL;
goto cq_err;
}
- device->tx_cq[i] = ib_create_cq(device->ib_device,
- NULL, iser_cq_event_callback,
- (void *)&cq_desc[i],
- ISER_MAX_TX_CQ_LEN, i);
-
- if (IS_ERR(device->tx_cq[i])) {
- device->tx_cq[i] = NULL;
+ if (ib_req_notify_cq(comp->cq, IB_CQ_NEXT_COMP))
goto cq_err;
- }
- if (ib_req_notify_cq(device->rx_cq[i], IB_CQ_NEXT_COMP))
- goto cq_err;
-
- tasklet_init(&device->cq_tasklet[i],
- iser_cq_tasklet_fn,
- (unsigned long)&cq_desc[i]);
+ tasklet_init(&comp->tasklet, iser_cq_tasklet_fn,
+ (unsigned long)comp);
}
device->mr = ib_get_dma_mr(device->pd, IB_ACCESS_LOCAL_WRITE |
@@ -164,19 +151,17 @@ static int iser_create_device_ib_res(struct iser_device *device)
handler_err:
ib_dereg_mr(device->mr);
dma_mr_err:
- for (i = 0; i < device->cqs_used; i++)
- tasklet_kill(&device->cq_tasklet[i]);
+ for (i = 0; i < device->comps_used; i++)
+ tasklet_kill(&device->comps[i].tasklet);
cq_err:
- for (i = 0; i < device->cqs_used; i++) {
- if (device->tx_cq[i])
- ib_destroy_cq(device->tx_cq[i]);
- if (device->rx_cq[i])
- ib_destroy_cq(device->rx_cq[i]);
+ for (i = 0; i < device->comps_used; i++) {
+ struct iser_comp *comp = &device->comps[i];
+
+ if (comp->cq)
+ ib_destroy_cq(comp->cq);
}
ib_dealloc_pd(device->pd);
pd_err:
- kfree(device->cq_desc);
-cq_desc_err:
iser_err("failed to allocate an IB resource\n");
return -1;
}
@@ -190,20 +175,18 @@ static void iser_free_device_ib_res(struct iser_device *device)
int i;
BUG_ON(device->mr == NULL);
- for (i = 0; i < device->cqs_used; i++) {
- tasklet_kill(&device->cq_tasklet[i]);
- (void)ib_destroy_cq(device->tx_cq[i]);
- (void)ib_destroy_cq(device->rx_cq[i]);
- device->tx_cq[i] = NULL;
- device->rx_cq[i] = NULL;
+ for (i = 0; i < device->comps_used; i++) {
+ struct iser_comp *comp = &device->comps[i];
+
+ tasklet_kill(&comp->tasklet);
+ ib_destroy_cq(comp->cq);
+ comp->cq = NULL;
}
(void)ib_unregister_event_handler(&device->event_handler);
(void)ib_dereg_mr(device->mr);
(void)ib_dealloc_pd(device->pd);
- kfree(device->cq_desc);
-
device->mr = NULL;
device->pd = NULL;
}
@@ -213,7 +196,7 @@ static void iser_free_device_ib_res(struct iser_device *device)
*
* returns 0 on success, or errno code on failure
*/
-int iser_create_fmr_pool(struct iser_conn *ib_conn, unsigned cmds_max)
+int iser_create_fmr_pool(struct ib_conn *ib_conn, unsigned cmds_max)
{
struct iser_device *device = ib_conn->device;
struct ib_fmr_pool_param params;
@@ -263,7 +246,7 @@ int iser_create_fmr_pool(struct iser_conn *ib_conn, unsigned cmds_max)
/**
* iser_free_fmr_pool - releases the FMR pool and page vec
*/
-void iser_free_fmr_pool(struct iser_conn *ib_conn)
+void iser_free_fmr_pool(struct ib_conn *ib_conn)
{
iser_info("freeing conn %p fmr pool %p\n",
ib_conn, ib_conn->fmr.pool);
@@ -367,10 +350,10 @@ fast_reg_mr_failure:
* for fast registration work requests.
* returns 0 on success, or errno code on failure
*/
-int iser_create_fastreg_pool(struct iser_conn *ib_conn, unsigned cmds_max)
+int iser_create_fastreg_pool(struct ib_conn *ib_conn, unsigned cmds_max)
{
- struct iser_device *device = ib_conn->device;
- struct fast_reg_descriptor *desc;
+ struct iser_device *device = ib_conn->device;
+ struct fast_reg_descriptor *desc;
int i, ret;
INIT_LIST_HEAD(&ib_conn->fastreg.pool);
@@ -406,7 +389,7 @@ err:
/**
* iser_free_fastreg_pool - releases the pool of fast_reg descriptors
*/
-void iser_free_fastreg_pool(struct iser_conn *ib_conn)
+void iser_free_fastreg_pool(struct ib_conn *ib_conn)
{
struct fast_reg_descriptor *desc, *tmp;
int i = 0;
@@ -440,7 +423,7 @@ void iser_free_fastreg_pool(struct iser_conn *ib_conn)
*
* returns 0 on success, -1 on failure
*/
-static int iser_create_ib_conn_res(struct iser_conn *ib_conn)
+static int iser_create_ib_conn_res(struct ib_conn *ib_conn)
{
struct iser_device *device;
struct ib_qp_init_attr init_attr;
@@ -455,28 +438,30 @@ static int iser_create_ib_conn_res(struct iser_conn *ib_conn)
mutex_lock(&ig.connlist_mutex);
/* select the CQ with the minimal number of usages */
- for (index = 0; index < device->cqs_used; index++)
- if (device->cq_active_qps[index] <
- device->cq_active_qps[min_index])
+ for (index = 0; index < device->comps_used; index++) {
+ if (device->comps[index].active_qps <
+ device->comps[min_index].active_qps)
min_index = index;
- device->cq_active_qps[min_index]++;
+ }
+ ib_conn->comp = &device->comps[min_index];
+ ib_conn->comp->active_qps++;
mutex_unlock(&ig.connlist_mutex);
iser_info("cq index %d used for ib_conn %p\n", min_index, ib_conn);
init_attr.event_handler = iser_qp_event_callback;
init_attr.qp_context = (void *)ib_conn;
- init_attr.send_cq = device->tx_cq[min_index];
- init_attr.recv_cq = device->rx_cq[min_index];
+ init_attr.send_cq = ib_conn->comp->cq;
+ init_attr.recv_cq = ib_conn->comp->cq;
init_attr.cap.max_recv_wr = ISER_QP_MAX_RECV_DTOS;
init_attr.cap.max_send_sge = 2;
init_attr.cap.max_recv_sge = 1;
init_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
init_attr.qp_type = IB_QPT_RC;
if (ib_conn->pi_support) {
- init_attr.cap.max_send_wr = ISER_QP_SIG_MAX_REQ_DTOS;
+ init_attr.cap.max_send_wr = ISER_QP_SIG_MAX_REQ_DTOS + 1;
init_attr.create_flags |= IB_QP_CREATE_SIGNATURE_EN;
} else {
- init_attr.cap.max_send_wr = ISER_QP_MAX_REQ_DTOS;
+ init_attr.cap.max_send_wr = ISER_QP_MAX_REQ_DTOS + 1;
}
ret = rdma_create_qp(ib_conn->cma_id, device->pd, &init_attr);
@@ -495,30 +480,6 @@ out_err:
}
/**
- * releases the QP object
- */
-static void iser_free_ib_conn_res(struct iser_conn *ib_conn)
-{
- int cq_index;
- BUG_ON(ib_conn == NULL);
-
- iser_info("freeing conn %p cma_id %p qp %p\n",
- ib_conn, ib_conn->cma_id,
- ib_conn->qp);
-
- /* qp is created only once both addr & route are resolved */
-
- if (ib_conn->qp != NULL) {
- cq_index = ((struct iser_cq_desc *)ib_conn->qp->recv_cq->cq_context)->cq_index;
- ib_conn->device->cq_active_qps[cq_index]--;
-
- rdma_destroy_qp(ib_conn->cma_id);
- }
-
- ib_conn->qp = NULL;
-}
-
-/**
* based on the resolved device node GUID see if there already allocated
* device for this device. If there's no such, create one.
*/
@@ -572,88 +533,142 @@ static void iser_device_try_release(struct iser_device *device)
/**
* Called with state mutex held
**/
-static int iser_conn_state_comp_exch(struct iser_conn *ib_conn,
- enum iser_ib_conn_state comp,
- enum iser_ib_conn_state exch)
+static int iser_conn_state_comp_exch(struct iser_conn *iser_conn,
+ enum iser_conn_state comp,
+ enum iser_conn_state exch)
{
int ret;
- if ((ret = (ib_conn->state == comp)))
- ib_conn->state = exch;
+ ret = (iser_conn->state == comp);
+ if (ret)
+ iser_conn->state = exch;
+
return ret;
}
void iser_release_work(struct work_struct *work)
{
- struct iser_conn *ib_conn;
- int rc;
+ struct iser_conn *iser_conn;
- ib_conn = container_of(work, struct iser_conn, release_work);
+ iser_conn = container_of(work, struct iser_conn, release_work);
- /* wait for .conn_stop callback */
- rc = wait_for_completion_timeout(&ib_conn->stop_completion, 30 * HZ);
- WARN_ON(rc == 0);
+ /* Wait for conn_stop to complete */
+ wait_for_completion(&iser_conn->stop_completion);
+ /* Wait for IB resouces cleanup to complete */
+ wait_for_completion(&iser_conn->ib_completion);
- /* wait for the qp`s post send and post receive buffers to empty */
- rc = wait_for_completion_timeout(&ib_conn->flush_completion, 30 * HZ);
- WARN_ON(rc == 0);
+ mutex_lock(&iser_conn->state_mutex);
+ iser_conn->state = ISER_CONN_DOWN;
+ mutex_unlock(&iser_conn->state_mutex);
- ib_conn->state = ISER_CONN_DOWN;
+ iser_conn_release(iser_conn);
+}
+
+/**
+ * iser_free_ib_conn_res - release IB related resources
+ * @iser_conn: iser connection struct
+ * @destroy_device: indicator if we need to try to release
+ * the iser device (only iscsi shutdown and DEVICE_REMOVAL
+ * will use this.
+ *
+ * This routine is called with the iser state mutex held
+ * so the cm_id removal is out of here. It is Safe to
+ * be invoked multiple times.
+ */
+static void iser_free_ib_conn_res(struct iser_conn *iser_conn,
+ bool destroy_device)
+{
+ struct ib_conn *ib_conn = &iser_conn->ib_conn;
+ struct iser_device *device = ib_conn->device;
- mutex_lock(&ib_conn->state_mutex);
- ib_conn->state = ISER_CONN_DOWN;
- mutex_unlock(&ib_conn->state_mutex);
+ iser_info("freeing conn %p cma_id %p qp %p\n",
+ iser_conn, ib_conn->cma_id, ib_conn->qp);
+
+ iser_free_rx_descriptors(iser_conn);
- iser_conn_release(ib_conn);
+ if (ib_conn->qp != NULL) {
+ ib_conn->comp->active_qps--;
+ rdma_destroy_qp(ib_conn->cma_id);
+ ib_conn->qp = NULL;
+ }
+
+ if (destroy_device && device != NULL) {
+ iser_device_try_release(device);
+ ib_conn->device = NULL;
+ }
}
/**
* Frees all conn objects and deallocs conn descriptor
*/
-void iser_conn_release(struct iser_conn *ib_conn)
+void iser_conn_release(struct iser_conn *iser_conn)
{
- struct iser_device *device = ib_conn->device;
+ struct ib_conn *ib_conn = &iser_conn->ib_conn;
mutex_lock(&ig.connlist_mutex);
- list_del(&ib_conn->conn_list);
+ list_del(&iser_conn->conn_list);
mutex_unlock(&ig.connlist_mutex);
- mutex_lock(&ib_conn->state_mutex);
- BUG_ON(ib_conn->state != ISER_CONN_DOWN);
-
- iser_free_rx_descriptors(ib_conn);
- iser_free_ib_conn_res(ib_conn);
- ib_conn->device = NULL;
- /* on EVENT_ADDR_ERROR there's no device yet for this conn */
- if (device != NULL)
- iser_device_try_release(device);
- mutex_unlock(&ib_conn->state_mutex);
+ mutex_lock(&iser_conn->state_mutex);
+ if (iser_conn->state != ISER_CONN_DOWN)
+ iser_warn("iser conn %p state %d, expected state down.\n",
+ iser_conn, iser_conn->state);
+ /*
+ * In case we never got to bind stage, we still need to
+ * release IB resources (which is safe to call more than once).
+ */
+ iser_free_ib_conn_res(iser_conn, true);
+ mutex_unlock(&iser_conn->state_mutex);
- /* if cma handler context, the caller actually destroy the id */
if (ib_conn->cma_id != NULL) {
rdma_destroy_id(ib_conn->cma_id);
ib_conn->cma_id = NULL;
}
- kfree(ib_conn);
+
+ kfree(iser_conn);
}
/**
* triggers start of the disconnect procedures and wait for them to be done
+ * Called with state mutex held
*/
-void iser_conn_terminate(struct iser_conn *ib_conn)
+int iser_conn_terminate(struct iser_conn *iser_conn)
{
+ struct ib_conn *ib_conn = &iser_conn->ib_conn;
+ struct ib_send_wr *bad_wr;
int err = 0;
- /* change the ib conn state only if the conn is UP, however always call
- * rdma_disconnect since this is the only way to cause the CMA to change
- * the QP state to ERROR
+ /* terminate the iser conn only if the conn state is UP */
+ if (!iser_conn_state_comp_exch(iser_conn, ISER_CONN_UP,
+ ISER_CONN_TERMINATING))
+ return 0;
+
+ iser_info("iser_conn %p state %d\n", iser_conn, iser_conn->state);
+
+ /* suspend queuing of new iscsi commands */
+ if (iser_conn->iscsi_conn)
+ iscsi_suspend_queue(iser_conn->iscsi_conn);
+
+ /*
+ * In case we didn't already clean up the cma_id (peer initiated
+ * a disconnection), we need to Cause the CMA to change the QP
+ * state to ERROR.
*/
+ if (ib_conn->cma_id) {
+ err = rdma_disconnect(ib_conn->cma_id);
+ if (err)
+ iser_err("Failed to disconnect, conn: 0x%p err %d\n",
+ iser_conn, err);
+
+ /* post an indication that all flush errors were consumed */
+ err = ib_post_send(ib_conn->qp, &ib_conn->beacon, &bad_wr);
+ if (err)
+ iser_err("conn %p failed to post beacon", ib_conn);
+
+ wait_for_completion(&ib_conn->flush_comp);
+ }
- iser_conn_state_comp_exch(ib_conn, ISER_CONN_UP, ISER_CONN_TERMINATING);
- err = rdma_disconnect(ib_conn->cma_id);
- if (err)
- iser_err("Failed to disconnect, conn: 0x%p err %d\n",
- ib_conn,err);
+ return 1;
}
/**
@@ -661,10 +676,10 @@ void iser_conn_terminate(struct iser_conn *ib_conn)
**/
static void iser_connect_error(struct rdma_cm_id *cma_id)
{
- struct iser_conn *ib_conn;
+ struct iser_conn *iser_conn;
- ib_conn = (struct iser_conn *)cma_id->context;
- ib_conn->state = ISER_CONN_DOWN;
+ iser_conn = (struct iser_conn *)cma_id->context;
+ iser_conn->state = ISER_CONN_DOWN;
}
/**
@@ -673,14 +688,16 @@ static void iser_connect_error(struct rdma_cm_id *cma_id)
static void iser_addr_handler(struct rdma_cm_id *cma_id)
{
struct iser_device *device;
- struct iser_conn *ib_conn;
+ struct iser_conn *iser_conn;
+ struct ib_conn *ib_conn;
int ret;
- ib_conn = (struct iser_conn *)cma_id->context;
- if (ib_conn->state != ISER_CONN_PENDING)
+ iser_conn = (struct iser_conn *)cma_id->context;
+ if (iser_conn->state != ISER_CONN_PENDING)
/* bailout */
return;
+ ib_conn = &iser_conn->ib_conn;
device = iser_device_find_by_ib_device(cma_id);
if (!device) {
iser_err("device lookup/creation failed\n");
@@ -719,14 +736,15 @@ static void iser_route_handler(struct rdma_cm_id *cma_id)
struct rdma_conn_param conn_param;
int ret;
struct iser_cm_hdr req_hdr;
- struct iser_conn *ib_conn = (struct iser_conn *)cma_id->context;
+ struct iser_conn *iser_conn = (struct iser_conn *)cma_id->context;
+ struct ib_conn *ib_conn = &iser_conn->ib_conn;
struct iser_device *device = ib_conn->device;
- if (ib_conn->state != ISER_CONN_PENDING)
+ if (iser_conn->state != ISER_CONN_PENDING)
/* bailout */
return;
- ret = iser_create_ib_conn_res((struct iser_conn *)cma_id->context);
+ ret = iser_create_ib_conn_res(ib_conn);
if (ret)
goto failure;
@@ -755,57 +773,60 @@ failure:
static void iser_connected_handler(struct rdma_cm_id *cma_id)
{
- struct iser_conn *ib_conn;
+ struct iser_conn *iser_conn;
struct ib_qp_attr attr;
struct ib_qp_init_attr init_attr;
- ib_conn = (struct iser_conn *)cma_id->context;
- if (ib_conn->state != ISER_CONN_PENDING)
+ iser_conn = (struct iser_conn *)cma_id->context;
+ if (iser_conn->state != ISER_CONN_PENDING)
/* bailout */
return;
(void)ib_query_qp(cma_id->qp, &attr, ~0, &init_attr);
iser_info("remote qpn:%x my qpn:%x\n", attr.dest_qp_num, cma_id->qp->qp_num);
- ib_conn->state = ISER_CONN_UP;
- complete(&ib_conn->up_completion);
+ iser_conn->state = ISER_CONN_UP;
+ complete(&iser_conn->up_completion);
}
static void iser_disconnected_handler(struct rdma_cm_id *cma_id)
{
- struct iser_conn *ib_conn;
-
- ib_conn = (struct iser_conn *)cma_id->context;
+ struct iser_conn *iser_conn = (struct iser_conn *)cma_id->context;
- /* getting here when the state is UP means that the conn is being *
- * terminated asynchronously from the iSCSI layer's perspective. */
- if (iser_conn_state_comp_exch(ib_conn, ISER_CONN_UP,
- ISER_CONN_TERMINATING)){
- if (ib_conn->iscsi_conn)
- iscsi_conn_failure(ib_conn->iscsi_conn, ISCSI_ERR_CONN_FAILED);
+ if (iser_conn_terminate(iser_conn)) {
+ if (iser_conn->iscsi_conn)
+ iscsi_conn_failure(iser_conn->iscsi_conn,
+ ISCSI_ERR_CONN_FAILED);
else
iser_err("iscsi_iser connection isn't bound\n");
}
+}
+
+static void iser_cleanup_handler(struct rdma_cm_id *cma_id,
+ bool destroy_device)
+{
+ struct iser_conn *iser_conn = (struct iser_conn *)cma_id->context;
- /* Complete the termination process if no posts are pending. This code
- * block also exists in iser_handle_comp_error(), but it is needed here
- * for cases of no flushes at all, e.g. discovery over rdma.
+ /*
+ * We are not guaranteed that we visited disconnected_handler
+ * by now, call it here to be safe that we handle CM drep
+ * and flush errors.
*/
- if (ib_conn->post_recv_buf_count == 0 &&
- (atomic_read(&ib_conn->post_send_buf_count) == 0)) {
- complete(&ib_conn->flush_completion);
- }
-}
+ iser_disconnected_handler(cma_id);
+ iser_free_ib_conn_res(iser_conn, destroy_device);
+ complete(&iser_conn->ib_completion);
+};
static int iser_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
{
- struct iser_conn *ib_conn;
+ struct iser_conn *iser_conn;
+ int ret = 0;
- ib_conn = (struct iser_conn *)cma_id->context;
+ iser_conn = (struct iser_conn *)cma_id->context;
iser_info("event %d status %d conn %p id %p\n",
event->event, event->status, cma_id->context, cma_id);
- mutex_lock(&ib_conn->state_mutex);
+ mutex_lock(&iser_conn->state_mutex);
switch (event->event) {
case RDMA_CM_EVENT_ADDR_RESOLVED:
iser_addr_handler(cma_id);
@@ -824,57 +845,73 @@ static int iser_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *eve
iser_connect_error(cma_id);
break;
case RDMA_CM_EVENT_DISCONNECTED:
- case RDMA_CM_EVENT_DEVICE_REMOVAL:
case RDMA_CM_EVENT_ADDR_CHANGE:
- case RDMA_CM_EVENT_TIMEWAIT_EXIT:
iser_disconnected_handler(cma_id);
break;
+ case RDMA_CM_EVENT_DEVICE_REMOVAL:
+ /*
+ * we *must* destroy the device as we cannot rely
+ * on iscsid to be around to initiate error handling.
+ * also implicitly destroy the cma_id.
+ */
+ iser_cleanup_handler(cma_id, true);
+ iser_conn->ib_conn.cma_id = NULL;
+ ret = 1;
+ break;
+ case RDMA_CM_EVENT_TIMEWAIT_EXIT:
+ iser_cleanup_handler(cma_id, false);
+ break;
default:
iser_err("Unexpected RDMA CM event (%d)\n", event->event);
break;
}
- mutex_unlock(&ib_conn->state_mutex);
- return 0;
+ mutex_unlock(&iser_conn->state_mutex);
+
+ return ret;
}
-void iser_conn_init(struct iser_conn *ib_conn)
+void iser_conn_init(struct iser_conn *iser_conn)
{
- ib_conn->state = ISER_CONN_INIT;
- ib_conn->post_recv_buf_count = 0;
- atomic_set(&ib_conn->post_send_buf_count, 0);
- init_completion(&ib_conn->stop_completion);
- init_completion(&ib_conn->flush_completion);
- init_completion(&ib_conn->up_completion);
- INIT_LIST_HEAD(&ib_conn->conn_list);
- spin_lock_init(&ib_conn->lock);
- mutex_init(&ib_conn->state_mutex);
+ iser_conn->state = ISER_CONN_INIT;
+ iser_conn->ib_conn.post_recv_buf_count = 0;
+ init_completion(&iser_conn->ib_conn.flush_comp);
+ init_completion(&iser_conn->stop_completion);
+ init_completion(&iser_conn->ib_completion);
+ init_completion(&iser_conn->up_completion);
+ INIT_LIST_HEAD(&iser_conn->conn_list);
+ spin_lock_init(&iser_conn->ib_conn.lock);
+ mutex_init(&iser_conn->state_mutex);
}
/**
* starts the process of connecting to the target
* sleeps until the connection is established or rejected
*/
-int iser_connect(struct iser_conn *ib_conn,
+int iser_connect(struct iser_conn *iser_conn,
struct sockaddr *src_addr,
struct sockaddr *dst_addr,
int non_blocking)
{
+ struct ib_conn *ib_conn = &iser_conn->ib_conn;
int err = 0;
- mutex_lock(&ib_conn->state_mutex);
+ mutex_lock(&iser_conn->state_mutex);
- sprintf(ib_conn->name, "%pISp", dst_addr);
+ sprintf(iser_conn->name, "%pISp", dst_addr);
- iser_info("connecting to: %s\n", ib_conn->name);
+ iser_info("connecting to: %s\n", iser_conn->name);
/* the device is known only --after-- address resolution */
ib_conn->device = NULL;
- ib_conn->state = ISER_CONN_PENDING;
+ iser_conn->state = ISER_CONN_PENDING;
+
+ ib_conn->beacon.wr_id = ISER_BEACON_WRID;
+ ib_conn->beacon.opcode = IB_WR_SEND;
ib_conn->cma_id = rdma_create_id(iser_cma_handler,
- (void *)ib_conn,
- RDMA_PS_TCP, IB_QPT_RC);
+ (void *)iser_conn,
+ RDMA_PS_TCP, IB_QPT_RC);
if (IS_ERR(ib_conn->cma_id)) {
err = PTR_ERR(ib_conn->cma_id);
iser_err("rdma_create_id failed: %d\n", err);
@@ -888,27 +925,27 @@ int iser_connect(struct iser_conn *ib_conn,
}
if (!non_blocking) {
- wait_for_completion_interruptible(&ib_conn->up_completion);
+ wait_for_completion_interruptible(&iser_conn->up_completion);
- if (ib_conn->state != ISER_CONN_UP) {
+ if (iser_conn->state != ISER_CONN_UP) {
err = -EIO;
goto connect_failure;
}
}
- mutex_unlock(&ib_conn->state_mutex);
+ mutex_unlock(&iser_conn->state_mutex);
mutex_lock(&ig.connlist_mutex);
- list_add(&ib_conn->conn_list, &ig.connlist);
+ list_add(&iser_conn->conn_list, &ig.connlist);
mutex_unlock(&ig.connlist_mutex);
return 0;
id_failure:
ib_conn->cma_id = NULL;
addr_failure:
- ib_conn->state = ISER_CONN_DOWN;
+ iser_conn->state = ISER_CONN_DOWN;
connect_failure:
- mutex_unlock(&ib_conn->state_mutex);
- iser_conn_release(ib_conn);
+ mutex_unlock(&iser_conn->state_mutex);
+ iser_conn_release(iser_conn);
return err;
}
@@ -917,7 +954,7 @@ connect_failure:
*
* returns: 0 on success, errno code on failure
*/
-int iser_reg_page_vec(struct iser_conn *ib_conn,
+int iser_reg_page_vec(struct ib_conn *ib_conn,
struct iser_page_vec *page_vec,
struct iser_mem_reg *mem_reg)
{
@@ -987,7 +1024,8 @@ void iser_unreg_mem_fastreg(struct iscsi_iser_task *iser_task,
enum iser_data_dir cmd_dir)
{
struct iser_mem_reg *reg = &iser_task->rdma_regd[cmd_dir].reg;
- struct iser_conn *ib_conn = iser_task->ib_conn;
+ struct iser_conn *iser_conn = iser_task->iser_conn;
+ struct ib_conn *ib_conn = &iser_conn->ib_conn;
struct fast_reg_descriptor *desc = reg->mem_h;
if (!reg->is_mr)
@@ -1000,17 +1038,18 @@ void iser_unreg_mem_fastreg(struct iscsi_iser_task *iser_task,
spin_unlock_bh(&ib_conn->lock);
}
-int iser_post_recvl(struct iser_conn *ib_conn)
+int iser_post_recvl(struct iser_conn *iser_conn)
{
struct ib_recv_wr rx_wr, *rx_wr_failed;
+ struct ib_conn *ib_conn = &iser_conn->ib_conn;
struct ib_sge sge;
int ib_ret;
- sge.addr = ib_conn->login_resp_dma;
+ sge.addr = iser_conn->login_resp_dma;
sge.length = ISER_RX_LOGIN_SIZE;
sge.lkey = ib_conn->device->mr->lkey;
- rx_wr.wr_id = (unsigned long)ib_conn->login_resp_buf;
+ rx_wr.wr_id = (unsigned long)iser_conn->login_resp_buf;
rx_wr.sg_list = &sge;
rx_wr.num_sge = 1;
rx_wr.next = NULL;
@@ -1024,20 +1063,21 @@ int iser_post_recvl(struct iser_conn *ib_conn)
return ib_ret;
}
-int iser_post_recvm(struct iser_conn *ib_conn, int count)
+int iser_post_recvm(struct iser_conn *iser_conn, int count)
{
struct ib_recv_wr *rx_wr, *rx_wr_failed;
int i, ib_ret;
- unsigned int my_rx_head = ib_conn->rx_desc_head;
+ struct ib_conn *ib_conn = &iser_conn->ib_conn;
+ unsigned int my_rx_head = iser_conn->rx_desc_head;
struct iser_rx_desc *rx_desc;
for (rx_wr = ib_conn->rx_wr, i = 0; i < count; i++, rx_wr++) {
- rx_desc = &ib_conn->rx_descs[my_rx_head];
+ rx_desc = &iser_conn->rx_descs[my_rx_head];
rx_wr->wr_id = (unsigned long)rx_desc;
rx_wr->sg_list = &rx_desc->rx_sg;
rx_wr->num_sge = 1;
rx_wr->next = rx_wr + 1;
- my_rx_head = (my_rx_head + 1) & ib_conn->qp_max_recv_dtos_mask;
+ my_rx_head = (my_rx_head + 1) & iser_conn->qp_max_recv_dtos_mask;
}
rx_wr--;
@@ -1049,7 +1089,7 @@ int iser_post_recvm(struct iser_conn *ib_conn, int count)
iser_err("ib_post_recv failed ret=%d\n", ib_ret);
ib_conn->post_recv_buf_count -= count;
} else
- ib_conn->rx_desc_head = my_rx_head;
+ iser_conn->rx_desc_head = my_rx_head;
return ib_ret;
}
@@ -1059,139 +1099,166 @@ int iser_post_recvm(struct iser_conn *ib_conn, int count)
*
* returns 0 on success, -1 on failure
*/
-int iser_post_send(struct iser_conn *ib_conn, struct iser_tx_desc *tx_desc)
+int iser_post_send(struct ib_conn *ib_conn, struct iser_tx_desc *tx_desc,
+ bool signal)
{
int ib_ret;
struct ib_send_wr send_wr, *send_wr_failed;
ib_dma_sync_single_for_device(ib_conn->device->ib_device,
- tx_desc->dma_addr, ISER_HEADERS_LEN, DMA_TO_DEVICE);
+ tx_desc->dma_addr, ISER_HEADERS_LEN,
+ DMA_TO_DEVICE);
send_wr.next = NULL;
send_wr.wr_id = (unsigned long)tx_desc;
send_wr.sg_list = tx_desc->tx_sg;
send_wr.num_sge = tx_desc->num_sge;
send_wr.opcode = IB_WR_SEND;
- send_wr.send_flags = IB_SEND_SIGNALED;
-
- atomic_inc(&ib_conn->post_send_buf_count);
+ send_wr.send_flags = signal ? IB_SEND_SIGNALED : 0;
ib_ret = ib_post_send(ib_conn->qp, &send_wr, &send_wr_failed);
- if (ib_ret) {
+ if (ib_ret)
iser_err("ib_post_send failed, ret:%d\n", ib_ret);
- atomic_dec(&ib_conn->post_send_buf_count);
- }
+
return ib_ret;
}
-static void iser_handle_comp_error(struct iser_tx_desc *desc,
- struct iser_conn *ib_conn)
+/**
+ * is_iser_tx_desc - Indicate if the completion wr_id
+ * is a TX descriptor or not.
+ * @iser_conn: iser connection
+ * @wr_id: completion WR identifier
+ *
+ * Since we cannot rely on wc opcode in FLUSH errors
+ * we must work around it by checking if the wr_id address
+ * falls in the iser connection rx_descs buffer. If so
+ * it is an RX descriptor, otherwize it is a TX.
+ */
+static inline bool
+is_iser_tx_desc(struct iser_conn *iser_conn, void *wr_id)
+{
+ void *start = iser_conn->rx_descs;
+ int len = iser_conn->num_rx_descs * sizeof(*iser_conn->rx_descs);
+
+ if (wr_id >= start && wr_id < start + len)
+ return false;
+
+ return true;
+}
+
+/**
+ * iser_handle_comp_error() - Handle error completion
+ * @ib_conn: connection RDMA resources
+ * @wc: work completion
+ *
+ * Notes: We may handle a FLUSH error completion and in this case
+ * we only cleanup in case TX type was DATAOUT. For non-FLUSH
+ * error completion we should also notify iscsi layer that
+ * connection is failed (in case we passed bind stage).
+ */
+static void
+iser_handle_comp_error(struct ib_conn *ib_conn,
+ struct ib_wc *wc)
{
- if (desc && desc->type == ISCSI_TX_DATAOUT)
- kmem_cache_free(ig.desc_cache, desc);
-
- if (ib_conn->post_recv_buf_count == 0 &&
- atomic_read(&ib_conn->post_send_buf_count) == 0) {
- /**
- * getting here when the state is UP means that the conn is
- * being terminated asynchronously from the iSCSI layer's
- * perspective. It is safe to peek at the connection state
- * since iscsi_conn_failure is allowed to be called twice.
- **/
- if (ib_conn->state == ISER_CONN_UP)
- iscsi_conn_failure(ib_conn->iscsi_conn,
+ struct iser_conn *iser_conn = container_of(ib_conn, struct iser_conn,
+ ib_conn);
+
+ if (wc->status != IB_WC_WR_FLUSH_ERR)
+ if (iser_conn->iscsi_conn)
+ iscsi_conn_failure(iser_conn->iscsi_conn,
ISCSI_ERR_CONN_FAILED);
- /* no more non completed posts to the QP, complete the
- * termination process w.o worrying on disconnect event */
- complete(&ib_conn->flush_completion);
+ if (is_iser_tx_desc(iser_conn, (void *)wc->wr_id)) {
+ struct iser_tx_desc *desc = (struct iser_tx_desc *)wc->wr_id;
+
+ if (desc->type == ISCSI_TX_DATAOUT)
+ kmem_cache_free(ig.desc_cache, desc);
+ } else {
+ ib_conn->post_recv_buf_count--;
}
}
-static int iser_drain_tx_cq(struct iser_device *device, int cq_index)
+/**
+ * iser_handle_wc - handle a single work completion
+ * @wc: work completion
+ *
+ * Soft-IRQ context, work completion can be either
+ * SEND or RECV, and can turn out successful or
+ * with error (or flush error).
+ */
+static void iser_handle_wc(struct ib_wc *wc)
{
- struct ib_cq *cq = device->tx_cq[cq_index];
- struct ib_wc wc;
+ struct ib_conn *ib_conn;
struct iser_tx_desc *tx_desc;
- struct iser_conn *ib_conn;
- int completed_tx = 0;
-
- while (ib_poll_cq(cq, 1, &wc) == 1) {
- tx_desc = (struct iser_tx_desc *) (unsigned long) wc.wr_id;
- ib_conn = wc.qp->qp_context;
- if (wc.status == IB_WC_SUCCESS) {
- if (wc.opcode == IB_WC_SEND)
- iser_snd_completion(tx_desc, ib_conn);
- else
- iser_err("expected opcode %d got %d\n",
- IB_WC_SEND, wc.opcode);
+ struct iser_rx_desc *rx_desc;
+
+ ib_conn = wc->qp->qp_context;
+ if (wc->status == IB_WC_SUCCESS) {
+ if (wc->opcode == IB_WC_RECV) {
+ rx_desc = (struct iser_rx_desc *)wc->wr_id;
+ iser_rcv_completion(rx_desc, wc->byte_len,
+ ib_conn);
+ } else
+ if (wc->opcode == IB_WC_SEND) {
+ tx_desc = (struct iser_tx_desc *)wc->wr_id;
+ iser_snd_completion(tx_desc, ib_conn);
} else {
- iser_err("tx id %llx status %d vend_err %x\n",
- wc.wr_id, wc.status, wc.vendor_err);
- if (wc.wr_id != ISER_FASTREG_LI_WRID) {
- atomic_dec(&ib_conn->post_send_buf_count);
- iser_handle_comp_error(tx_desc, ib_conn);
- }
+ iser_err("Unknown wc opcode %d\n", wc->opcode);
}
- completed_tx++;
+ } else {
+ if (wc->status != IB_WC_WR_FLUSH_ERR)
+ iser_err("wr id %llx status %d vend_err %x\n",
+ wc->wr_id, wc->status, wc->vendor_err);
+ else
+ iser_dbg("flush error: wr id %llx\n", wc->wr_id);
+
+ if (wc->wr_id != ISER_FASTREG_LI_WRID &&
+ wc->wr_id != ISER_BEACON_WRID)
+ iser_handle_comp_error(ib_conn, wc);
+
+ /* complete in case all flush errors were consumed */
+ if (wc->wr_id == ISER_BEACON_WRID)
+ complete(&ib_conn->flush_comp);
}
- return completed_tx;
}
-
+/**
+ * iser_cq_tasklet_fn - iSER completion polling loop
+ * @data: iSER completion context
+ *
+ * Soft-IRQ context, polling connection CQ until
+ * either CQ was empty or we exausted polling budget
+ */
static void iser_cq_tasklet_fn(unsigned long data)
{
- struct iser_cq_desc *cq_desc = (struct iser_cq_desc *)data;
- struct iser_device *device = cq_desc->device;
- int cq_index = cq_desc->cq_index;
- struct ib_cq *cq = device->rx_cq[cq_index];
- struct ib_wc wc;
- struct iser_rx_desc *desc;
- unsigned long xfer_len;
- struct iser_conn *ib_conn;
- int completed_tx, completed_rx = 0;
-
- /* First do tx drain, so in a case where we have rx flushes and a successful
- * tx completion we will still go through completion error handling.
- */
- completed_tx = iser_drain_tx_cq(device, cq_index);
-
- while (ib_poll_cq(cq, 1, &wc) == 1) {
- desc = (struct iser_rx_desc *) (unsigned long) wc.wr_id;
- BUG_ON(desc == NULL);
- ib_conn = wc.qp->qp_context;
- if (wc.status == IB_WC_SUCCESS) {
- if (wc.opcode == IB_WC_RECV) {
- xfer_len = (unsigned long)wc.byte_len;
- iser_rcv_completion(desc, xfer_len, ib_conn);
- } else
- iser_err("expected opcode %d got %d\n",
- IB_WC_RECV, wc.opcode);
- } else {
- if (wc.status != IB_WC_WR_FLUSH_ERR)
- iser_err("rx id %llx status %d vend_err %x\n",
- wc.wr_id, wc.status, wc.vendor_err);
- ib_conn->post_recv_buf_count--;
- iser_handle_comp_error(NULL, ib_conn);
- }
- completed_rx++;
- if (!(completed_rx & 63))
- completed_tx += iser_drain_tx_cq(device, cq_index);
+ struct iser_comp *comp = (struct iser_comp *)data;
+ struct ib_cq *cq = comp->cq;
+ struct ib_wc *const wcs = comp->wcs;
+ int i, n, completed = 0;
+
+ while ((n = ib_poll_cq(cq, ARRAY_SIZE(comp->wcs), wcs)) > 0) {
+ for (i = 0; i < n; i++)
+ iser_handle_wc(&wcs[i]);
+
+ completed += n;
+ if (completed >= iser_cq_poll_limit)
+ break;
}
- /* #warning "it is assumed here that arming CQ only once its empty" *
- * " would not cause interrupts to be missed" */
+
+ /*
+ * It is assumed here that arming CQ only once its empty
+ * would not cause interrupts to be missed.
+ */
ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
- iser_dbg("got %d rx %d tx completions\n", completed_rx, completed_tx);
+ iser_dbg("got %d completions\n", completed);
}
static void iser_cq_callback(struct ib_cq *cq, void *cq_context)
{
- struct iser_cq_desc *cq_desc = (struct iser_cq_desc *)cq_context;
- struct iser_device *device = cq_desc->device;
- int cq_index = cq_desc->cq_index;
+ struct iser_comp *comp = cq_context;
- tasklet_schedule(&device->cq_tasklet[cq_index]);
+ tasklet_schedule(&comp->tasklet);
}
u8 iser_check_task_pi_status(struct iscsi_iser_task *iser_task,
diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c
index da8ff124762a..0bea5776bcbc 100644
--- a/drivers/infiniband/ulp/isert/ib_isert.c
+++ b/drivers/infiniband/ulp/isert/ib_isert.c
@@ -2609,58 +2609,45 @@ isert_fast_reg_mr(struct isert_conn *isert_conn,
return ret;
}
-static inline enum ib_t10_dif_type
-se2ib_prot_type(enum target_prot_type prot_type)
-{
- switch (prot_type) {
- case TARGET_DIF_TYPE0_PROT:
- return IB_T10DIF_NONE;
- case TARGET_DIF_TYPE1_PROT:
- return IB_T10DIF_TYPE1;
- case TARGET_DIF_TYPE2_PROT:
- return IB_T10DIF_TYPE2;
- case TARGET_DIF_TYPE3_PROT:
- return IB_T10DIF_TYPE3;
- default:
- return IB_T10DIF_NONE;
- }
-}
+static inline void
+isert_set_dif_domain(struct se_cmd *se_cmd, struct ib_sig_attrs *sig_attrs,
+ struct ib_sig_domain *domain)
+{
+ domain->sig_type = IB_SIG_TYPE_T10_DIF;
+ domain->sig.dif.bg_type = IB_T10DIF_CRC;
+ domain->sig.dif.pi_interval = se_cmd->se_dev->dev_attrib.block_size;
+ domain->sig.dif.ref_tag = se_cmd->reftag_seed;
+ /*
+ * At the moment we hard code those, but if in the future
+ * the target core would like to use it, we will take it
+ * from se_cmd.
+ */
+ domain->sig.dif.apptag_check_mask = 0xffff;
+ domain->sig.dif.app_escape = true;
+ domain->sig.dif.ref_escape = true;
+ if (se_cmd->prot_type == TARGET_DIF_TYPE1_PROT ||
+ se_cmd->prot_type == TARGET_DIF_TYPE2_PROT)
+ domain->sig.dif.ref_remap = true;
+};
static int
isert_set_sig_attrs(struct se_cmd *se_cmd, struct ib_sig_attrs *sig_attrs)
{
- enum ib_t10_dif_type ib_prot_type = se2ib_prot_type(se_cmd->prot_type);
-
- sig_attrs->mem.sig_type = IB_SIG_TYPE_T10_DIF;
- sig_attrs->wire.sig_type = IB_SIG_TYPE_T10_DIF;
- sig_attrs->mem.sig.dif.pi_interval =
- se_cmd->se_dev->dev_attrib.block_size;
- sig_attrs->wire.sig.dif.pi_interval =
- se_cmd->se_dev->dev_attrib.block_size;
-
switch (se_cmd->prot_op) {
case TARGET_PROT_DIN_INSERT:
case TARGET_PROT_DOUT_STRIP:
- sig_attrs->mem.sig.dif.type = IB_T10DIF_NONE;
- sig_attrs->wire.sig.dif.type = ib_prot_type;
- sig_attrs->wire.sig.dif.bg_type = IB_T10DIF_CRC;
- sig_attrs->wire.sig.dif.ref_tag = se_cmd->reftag_seed;
+ sig_attrs->mem.sig_type = IB_SIG_TYPE_NONE;
+ isert_set_dif_domain(se_cmd, sig_attrs, &sig_attrs->wire);
break;
case TARGET_PROT_DOUT_INSERT:
case TARGET_PROT_DIN_STRIP:
- sig_attrs->mem.sig.dif.type = ib_prot_type;
- sig_attrs->mem.sig.dif.bg_type = IB_T10DIF_CRC;
- sig_attrs->mem.sig.dif.ref_tag = se_cmd->reftag_seed;
- sig_attrs->wire.sig.dif.type = IB_T10DIF_NONE;
+ sig_attrs->wire.sig_type = IB_SIG_TYPE_NONE;
+ isert_set_dif_domain(se_cmd, sig_attrs, &sig_attrs->mem);
break;
case TARGET_PROT_DIN_PASS:
case TARGET_PROT_DOUT_PASS:
- sig_attrs->mem.sig.dif.type = ib_prot_type;
- sig_attrs->mem.sig.dif.bg_type = IB_T10DIF_CRC;
- sig_attrs->mem.sig.dif.ref_tag = se_cmd->reftag_seed;
- sig_attrs->wire.sig.dif.type = ib_prot_type;
- sig_attrs->wire.sig.dif.bg_type = IB_T10DIF_CRC;
- sig_attrs->wire.sig.dif.ref_tag = se_cmd->reftag_seed;
+ isert_set_dif_domain(se_cmd, sig_attrs, &sig_attrs->wire);
+ isert_set_dif_domain(se_cmd, sig_attrs, &sig_attrs->mem);
break;
default:
pr_err("Unsupported PI operation %d\n", se_cmd->prot_op);