From f5b404317b79823ec643dfbb71d62f65a48cc178 Mon Sep 17 00:00:00 2001 From: Florin Malita Date: Thu, 19 Jul 2007 15:58:09 -0400 Subject: IB/mlx4: Fix leaks in __mlx4_ib_modify_qp Temporarily allocated struct mlx4_qp_context *context is leaked by several error paths. The patch takes advantage of the return value 'err' being preinitialized to -EINVAL. Spotted by Coverity (CID 1768). Signed-off-by: Florin Malita Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/qp.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index b5a24fbef70d..5456bc4aff7c 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -742,7 +742,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, if (attr->path_mtu < IB_MTU_256 || attr->path_mtu > IB_MTU_4096) { printk(KERN_ERR "path MTU (%u) is invalid\n", attr->path_mtu); - return -EINVAL; + goto out; } context->mtu_msgmax = (attr->path_mtu << 5) | 31; } @@ -781,10 +781,8 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, if (attr_mask & IB_QP_AV) { if (mlx4_set_path(dev, &attr->ah_attr, &context->pri_path, - attr_mask & IB_QP_PORT ? attr->port_num : qp->port)) { - err = -EINVAL; + attr_mask & IB_QP_PORT ? attr->port_num : qp->port)) goto out; - } optpar |= (MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH | MLX4_QP_OPTPAR_SCHED_QUEUE); @@ -798,15 +796,15 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, if (attr_mask & IB_QP_ALT_PATH) { if (attr->alt_port_num == 0 || attr->alt_port_num > dev->dev->caps.num_ports) - return -EINVAL; + goto out; if (attr->alt_pkey_index >= dev->dev->caps.pkey_table_len[attr->alt_port_num]) - return -EINVAL; + goto out; if (mlx4_set_path(dev, &attr->alt_ah_attr, &context->alt_path, attr->alt_port_num)) - return -EINVAL; + goto out; context->alt_path.pkey_index = attr->alt_pkey_index; context->alt_path.ackto = attr->alt_timeout << 3; -- cgit v1.2.3 From bd631048116df40837667a72c578b170c906dd30 Mon Sep 17 00:00:00 2001 From: Arthur Jones Date: Thu, 19 Jul 2007 11:32:49 -0700 Subject: IB/ipath: Remove ipath_layer dead code The ipath_layer.[ch] code was an attempt to provide a single interface for the ipath verbs and ipath_ether code to use. As verbs functionality increased, the layer's functionality became insufficient and the verbs code broke away to interface directly to the driver. The failed attempt to get ipath_ether upstream was the final nail in the coffin and now it sits quietly in a dark kernel.org corner waiting for someone to notice the smell and send it along to it's final resting place. Roland Dreier was that someone -- this patch expands on his work... Signed-off-by: Arthur Jones Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ipath/Makefile | 1 - drivers/infiniband/hw/ipath/ipath_layer.c | 365 ------------------------------ drivers/infiniband/hw/ipath/ipath_layer.h | 71 ------ drivers/infiniband/hw/ipath/ipath_verbs.h | 2 - 4 files changed, 439 deletions(-) delete mode 100644 drivers/infiniband/hw/ipath/ipath_layer.c delete mode 100644 drivers/infiniband/hw/ipath/ipath_layer.h (limited to 'drivers') diff --git a/drivers/infiniband/hw/ipath/Makefile b/drivers/infiniband/hw/ipath/Makefile index ec2e603ea241..fe6738826865 100644 --- a/drivers/infiniband/hw/ipath/Makefile +++ b/drivers/infiniband/hw/ipath/Makefile @@ -14,7 +14,6 @@ ib_ipath-y := \ ipath_init_chip.o \ ipath_intr.o \ ipath_keys.o \ - ipath_layer.o \ ipath_mad.o \ ipath_mmap.o \ ipath_mr.o \ diff --git a/drivers/infiniband/hw/ipath/ipath_layer.c b/drivers/infiniband/hw/ipath/ipath_layer.c deleted file mode 100644 index 82616b779e24..000000000000 --- a/drivers/infiniband/hw/ipath/ipath_layer.c +++ /dev/null @@ -1,365 +0,0 @@ -/* - * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. - * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -/* - * These are the routines used by layered drivers, currently just the - * layered ethernet driver and verbs layer. - */ - -#include -#include - -#include "ipath_kernel.h" -#include "ipath_layer.h" -#include "ipath_verbs.h" -#include "ipath_common.h" - -/* Acquire before ipath_devs_lock. */ -static DEFINE_MUTEX(ipath_layer_mutex); - -u16 ipath_layer_rcv_opcode; - -static int (*layer_intr)(void *, u32); -static int (*layer_rcv)(void *, void *, struct sk_buff *); -static int (*layer_rcv_lid)(void *, void *); - -static void *(*layer_add_one)(int, struct ipath_devdata *); -static void (*layer_remove_one)(void *); - -int __ipath_layer_intr(struct ipath_devdata *dd, u32 arg) -{ - int ret = -ENODEV; - - if (dd->ipath_layer.l_arg && layer_intr) - ret = layer_intr(dd->ipath_layer.l_arg, arg); - - return ret; -} - -int ipath_layer_intr(struct ipath_devdata *dd, u32 arg) -{ - int ret; - - mutex_lock(&ipath_layer_mutex); - - ret = __ipath_layer_intr(dd, arg); - - mutex_unlock(&ipath_layer_mutex); - - return ret; -} - -int __ipath_layer_rcv(struct ipath_devdata *dd, void *hdr, - struct sk_buff *skb) -{ - int ret = -ENODEV; - - if (dd->ipath_layer.l_arg && layer_rcv) - ret = layer_rcv(dd->ipath_layer.l_arg, hdr, skb); - - return ret; -} - -int __ipath_layer_rcv_lid(struct ipath_devdata *dd, void *hdr) -{ - int ret = -ENODEV; - - if (dd->ipath_layer.l_arg && layer_rcv_lid) - ret = layer_rcv_lid(dd->ipath_layer.l_arg, hdr); - - return ret; -} - -void ipath_layer_lid_changed(struct ipath_devdata *dd) -{ - mutex_lock(&ipath_layer_mutex); - - if (dd->ipath_layer.l_arg && layer_intr) - layer_intr(dd->ipath_layer.l_arg, IPATH_LAYER_INT_LID); - - mutex_unlock(&ipath_layer_mutex); -} - -void ipath_layer_add(struct ipath_devdata *dd) -{ - mutex_lock(&ipath_layer_mutex); - - if (layer_add_one) - dd->ipath_layer.l_arg = - layer_add_one(dd->ipath_unit, dd); - - mutex_unlock(&ipath_layer_mutex); -} - -void ipath_layer_remove(struct ipath_devdata *dd) -{ - mutex_lock(&ipath_layer_mutex); - - if (dd->ipath_layer.l_arg && layer_remove_one) { - layer_remove_one(dd->ipath_layer.l_arg); - dd->ipath_layer.l_arg = NULL; - } - - mutex_unlock(&ipath_layer_mutex); -} - -int ipath_layer_register(void *(*l_add)(int, struct ipath_devdata *), - void (*l_remove)(void *), - int (*l_intr)(void *, u32), - int (*l_rcv)(void *, void *, struct sk_buff *), - u16 l_rcv_opcode, - int (*l_rcv_lid)(void *, void *)) -{ - struct ipath_devdata *dd, *tmp; - unsigned long flags; - - mutex_lock(&ipath_layer_mutex); - - layer_add_one = l_add; - layer_remove_one = l_remove; - layer_intr = l_intr; - layer_rcv = l_rcv; - layer_rcv_lid = l_rcv_lid; - ipath_layer_rcv_opcode = l_rcv_opcode; - - spin_lock_irqsave(&ipath_devs_lock, flags); - - list_for_each_entry_safe(dd, tmp, &ipath_dev_list, ipath_list) { - if (!(dd->ipath_flags & IPATH_INITTED)) - continue; - - if (dd->ipath_layer.l_arg) - continue; - - spin_unlock_irqrestore(&ipath_devs_lock, flags); - dd->ipath_layer.l_arg = l_add(dd->ipath_unit, dd); - spin_lock_irqsave(&ipath_devs_lock, flags); - } - - spin_unlock_irqrestore(&ipath_devs_lock, flags); - mutex_unlock(&ipath_layer_mutex); - - return 0; -} - -EXPORT_SYMBOL_GPL(ipath_layer_register); - -void ipath_layer_unregister(void) -{ - struct ipath_devdata *dd, *tmp; - unsigned long flags; - - mutex_lock(&ipath_layer_mutex); - spin_lock_irqsave(&ipath_devs_lock, flags); - - list_for_each_entry_safe(dd, tmp, &ipath_dev_list, ipath_list) { - if (dd->ipath_layer.l_arg && layer_remove_one) { - spin_unlock_irqrestore(&ipath_devs_lock, flags); - layer_remove_one(dd->ipath_layer.l_arg); - spin_lock_irqsave(&ipath_devs_lock, flags); - dd->ipath_layer.l_arg = NULL; - } - } - - spin_unlock_irqrestore(&ipath_devs_lock, flags); - - layer_add_one = NULL; - layer_remove_one = NULL; - layer_intr = NULL; - layer_rcv = NULL; - layer_rcv_lid = NULL; - - mutex_unlock(&ipath_layer_mutex); -} - -EXPORT_SYMBOL_GPL(ipath_layer_unregister); - -int ipath_layer_open(struct ipath_devdata *dd, u32 * pktmax) -{ - int ret; - u32 intval = 0; - - mutex_lock(&ipath_layer_mutex); - - if (!dd->ipath_layer.l_arg) { - ret = -EINVAL; - goto bail; - } - - ret = ipath_setrcvhdrsize(dd, IPATH_HEADER_QUEUE_WORDS); - - if (ret < 0) - goto bail; - - *pktmax = dd->ipath_ibmaxlen; - - if (*dd->ipath_statusp & IPATH_STATUS_IB_READY) - intval |= IPATH_LAYER_INT_IF_UP; - if (dd->ipath_lid) - intval |= IPATH_LAYER_INT_LID; - if (dd->ipath_mlid) - intval |= IPATH_LAYER_INT_BCAST; - /* - * do this on open, in case low level is already up and - * just layered driver was reloaded, etc. - */ - if (intval) - layer_intr(dd->ipath_layer.l_arg, intval); - - ret = 0; -bail: - mutex_unlock(&ipath_layer_mutex); - - return ret; -} - -EXPORT_SYMBOL_GPL(ipath_layer_open); - -u16 ipath_layer_get_lid(struct ipath_devdata *dd) -{ - return dd->ipath_lid; -} - -EXPORT_SYMBOL_GPL(ipath_layer_get_lid); - -/** - * ipath_layer_get_mac - get the MAC address - * @dd: the infinipath device - * @mac: the MAC is put here - * - * This is the EUID-64 OUI octets (top 3), then - * skip the next 2 (which should both be zero or 0xff). - * The returned MAC is in network order - * mac points to at least 6 bytes of buffer - * We assume that by the time the LID is set, that the GUID is as valid - * as it's ever going to be, rather than adding yet another status bit. - */ - -int ipath_layer_get_mac(struct ipath_devdata *dd, u8 * mac) -{ - u8 *guid; - - guid = (u8 *) &dd->ipath_guid; - - mac[0] = guid[0]; - mac[1] = guid[1]; - mac[2] = guid[2]; - mac[3] = guid[5]; - mac[4] = guid[6]; - mac[5] = guid[7]; - if ((guid[3] || guid[4]) && !(guid[3] == 0xff && guid[4] == 0xff)) - ipath_dbg("Warning, guid bytes 3 and 4 not 0 or 0xffff: " - "%x %x\n", guid[3], guid[4]); - return 0; -} - -EXPORT_SYMBOL_GPL(ipath_layer_get_mac); - -u16 ipath_layer_get_bcast(struct ipath_devdata *dd) -{ - return dd->ipath_mlid; -} - -EXPORT_SYMBOL_GPL(ipath_layer_get_bcast); - -int ipath_layer_send_hdr(struct ipath_devdata *dd, struct ether_header *hdr) -{ - int ret = 0; - u32 __iomem *piobuf; - u32 plen, *uhdr; - size_t count; - __be16 vlsllnh; - - if (!(dd->ipath_flags & IPATH_RCVHDRSZ_SET)) { - ipath_dbg("send while not open\n"); - ret = -EINVAL; - } else - if ((dd->ipath_flags & (IPATH_LINKUNK | IPATH_LINKDOWN)) || - dd->ipath_lid == 0) { - /* - * lid check is for when sma hasn't yet configured - */ - ret = -ENETDOWN; - ipath_cdbg(VERBOSE, "send while not ready, " - "mylid=%u, flags=0x%x\n", - dd->ipath_lid, dd->ipath_flags); - } - - vlsllnh = *((__be16 *) hdr); - if (vlsllnh != htons(IPATH_LRH_BTH)) { - ipath_dbg("Warning: lrh[0] wrong (%x, not %x); " - "not sending\n", be16_to_cpu(vlsllnh), - IPATH_LRH_BTH); - ret = -EINVAL; - } - if (ret) - goto done; - - /* Get a PIO buffer to use. */ - piobuf = ipath_getpiobuf(dd, NULL); - if (piobuf == NULL) { - ret = -EBUSY; - goto done; - } - - plen = (sizeof(*hdr) >> 2); /* actual length */ - ipath_cdbg(EPKT, "0x%x+1w pio %p\n", plen, piobuf); - - writeq(plen+1, piobuf); /* len (+1 for pad) to pbc, no flags */ - ipath_flush_wc(); - piobuf += 2; - uhdr = (u32 *)hdr; - count = plen-1; /* amount we can copy before trigger word */ - __iowrite32_copy(piobuf, uhdr, count); - ipath_flush_wc(); - __raw_writel(uhdr[count], piobuf + count); - ipath_flush_wc(); /* ensure it's sent, now */ - - ipath_stats.sps_ether_spkts++; /* ether packet sent */ - -done: - return ret; -} - -EXPORT_SYMBOL_GPL(ipath_layer_send_hdr); - -int ipath_layer_set_piointbufavail_int(struct ipath_devdata *dd) -{ - set_bit(IPATH_S_PIOINTBUFAVAIL, &dd->ipath_sendctrl); - - ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, - dd->ipath_sendctrl); - return 0; -} - -EXPORT_SYMBOL_GPL(ipath_layer_set_piointbufavail_int); diff --git a/drivers/infiniband/hw/ipath/ipath_layer.h b/drivers/infiniband/hw/ipath/ipath_layer.h deleted file mode 100644 index 415709c4d85b..000000000000 --- a/drivers/infiniband/hw/ipath/ipath_layer.h +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. - * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef _IPATH_LAYER_H -#define _IPATH_LAYER_H - -/* - * This header file is for symbols shared between the infinipath driver - * and drivers layered upon it (such as ipath). - */ - -struct sk_buff; -struct ipath_devdata; -struct ether_header; - -int ipath_layer_register(void *(*l_add)(int, struct ipath_devdata *), - void (*l_remove)(void *), - int (*l_intr)(void *, u32), - int (*l_rcv)(void *, void *, - struct sk_buff *), - u16 rcv_opcode, - int (*l_rcv_lid)(void *, void *)); -void ipath_layer_unregister(void); -int ipath_layer_open(struct ipath_devdata *, u32 * pktmax); -u16 ipath_layer_get_lid(struct ipath_devdata *dd); -int ipath_layer_get_mac(struct ipath_devdata *dd, u8 *); -u16 ipath_layer_get_bcast(struct ipath_devdata *dd); -int ipath_layer_send_hdr(struct ipath_devdata *dd, - struct ether_header *hdr); -int ipath_layer_set_piointbufavail_int(struct ipath_devdata *dd); - -/* ipath_ether interrupt values */ -#define IPATH_LAYER_INT_IF_UP 0x2 -#define IPATH_LAYER_INT_IF_DOWN 0x4 -#define IPATH_LAYER_INT_LID 0x8 -#define IPATH_LAYER_INT_SEND_CONTINUE 0x10 -#define IPATH_LAYER_INT_BCAST 0x40 - -extern unsigned ipath_debug; /* debugging bit mask */ - -#endif /* _IPATH_LAYER_H */ diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.h b/drivers/infiniband/hw/ipath/ipath_verbs.h index 9bbe81967f14..1a24c6a4a814 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs.h +++ b/drivers/infiniband/hw/ipath/ipath_verbs.h @@ -42,8 +42,6 @@ #include #include -#include "ipath_layer.h" - #define IPATH_MAX_RDMA_ATOMIC 4 #define QPN_MAX (1 << 24) -- cgit v1.2.3 From c1f74958dbd19f6a837d887ed416688c063af529 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 19 Jul 2007 14:28:49 +0300 Subject: IB/mthca: Change command token on timeout The FW command token is currently only updated on a command completion event. This means that on command timeout, the same token will be reused for new command, which results in a mess if the timed out command *does* eventually complete. Signed-off-by: Michael S. Tsirkin Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mthca/mthca_cmd.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.c b/drivers/infiniband/hw/mthca/mthca_cmd.c index f40558d76475..acc95892713a 100644 --- a/drivers/infiniband/hw/mthca/mthca_cmd.c +++ b/drivers/infiniband/hw/mthca/mthca_cmd.c @@ -357,8 +357,6 @@ void mthca_cmd_event(struct mthca_dev *dev, context->status = status; context->out_param = out_param; - context->token += dev->cmd.token_mask + 1; - complete(&context->done); } @@ -380,6 +378,7 @@ static int mthca_cmd_wait(struct mthca_dev *dev, spin_lock(&dev->cmd.context_lock); BUG_ON(dev->cmd.free_head < 0); context = &dev->cmd.context[dev->cmd.free_head]; + context->token += dev->cmd.token_mask + 1; dev->cmd.free_head = context->next; spin_unlock(&dev->cmd.context_lock); -- cgit v1.2.3 From 0981582dbfae86ba0306406f1af329bb702752d2 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Fri, 20 Jul 2007 21:19:43 -0700 Subject: mlx4_core: Change command token on timeout The FW command token is currently only updated on a command completion event. This means that on command timeout, the same token will be reused for new command, which results in a mess if the timed out command *does* eventually complete. This is the same change as the patch for mthca from Michael S. Tsirkin that was just merged. It seems sensible to avoid gratuitous differences in FW command processing between mthca and mlx4. Signed-off-by: Roland Dreier --- drivers/net/mlx4/cmd.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/net/mlx4/cmd.c b/drivers/net/mlx4/cmd.c index c1f81a993f5d..a9f31753661a 100644 --- a/drivers/net/mlx4/cmd.c +++ b/drivers/net/mlx4/cmd.c @@ -246,8 +246,6 @@ void mlx4_cmd_event(struct mlx4_dev *dev, u16 token, u8 status, u64 out_param) context->result = mlx4_status_to_errno(status); context->out_param = out_param; - context->token += priv->cmd.token_mask + 1; - complete(&context->done); } @@ -264,6 +262,7 @@ static int mlx4_cmd_wait(struct mlx4_dev *dev, u64 in_param, u64 *out_param, spin_lock(&cmd->context_lock); BUG_ON(cmd->free_head < 0); context = &cmd->context[cmd->free_head]; + context->token += cmd->token_mask + 1; cmd->free_head = context->next; spin_unlock(&cmd->context_lock); -- cgit v1.2.3 From 23f1b38481596ad77e5f51562977b12c8418eee3 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Fri, 20 Jul 2007 21:19:43 -0700 Subject: IB/mlx4: Fix error path in create_qp_common() The error handling code at err_wrid in create_qp_common() does not handle a userspace QP attached to an SRQ correctly, since it ends up in the else clause of the if statement. This means it tries to kfree() the uninitialized qp->sq.wrid and qp->rq.wrid pointers. Fix this so we only free the wrid arrays for kernel QPs. Pointed out by Michael S. Tsirkin . Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/qp.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 5456bc4aff7c..f6315dfb213e 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -415,9 +415,11 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, return 0; err_wrid: - if (pd->uobject && !init_attr->srq) - mlx4_ib_db_unmap_user(to_mucontext(pd->uobject->context), &qp->db); - else { + if (pd->uobject) { + if (!init_attr->srq) + mlx4_ib_db_unmap_user(to_mucontext(pd->uobject->context), + &qp->db); + } else { kfree(qp->sq.wrid); kfree(qp->rq.wrid); } -- cgit v1.2.3 From 5bb7d9290cd23a55906e4fe7a7fedecf29468c81 Mon Sep 17 00:00:00 2001 From: Hoang-Nam Nguyen Date: Fri, 20 Jul 2007 16:01:51 +0200 Subject: IB/ehca: Support large page MRs Add support for MR pages larger than 4K on eHCA2. This reduces firmware memory consumption. If enabled via the mr_largepage module parameter, the MR page size will be determined based on the MR length and the hardware capabilities -- if the MR is >= 16M, 16M pages are used, for example. Signed-off-by: Joachim Fenkes Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ehca/ehca_classes.h | 9 + drivers/infiniband/hw/ehca/ehca_main.c | 18 +- drivers/infiniband/hw/ehca/ehca_mrmw.c | 371 +++++++++++++++++++++++++----- drivers/infiniband/hw/ehca/ehca_mrmw.h | 2 +- drivers/infiniband/hw/ehca/hcp_if.c | 20 +- 5 files changed, 357 insertions(+), 63 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h index 043e4fb23fb0..63b8b9f7c4fc 100644 --- a/drivers/infiniband/hw/ehca/ehca_classes.h +++ b/drivers/infiniband/hw/ehca/ehca_classes.h @@ -100,6 +100,11 @@ struct ehca_sport { struct ehca_sma_attr saved_attr; }; +#define HCA_CAP_MR_PGSIZE_4K 1 +#define HCA_CAP_MR_PGSIZE_64K 2 +#define HCA_CAP_MR_PGSIZE_1M 4 +#define HCA_CAP_MR_PGSIZE_16M 8 + struct ehca_shca { struct ib_device ib_device; struct ibmebus_dev *ibmebus_dev; @@ -115,6 +120,8 @@ struct ehca_shca { struct h_galpas galpas; struct mutex modify_mutex; u64 hca_cap; + /* MR pgsize: bit 0-3 means 4K, 64K, 1M, 16M respectively */ + u32 hca_cap_mr_pgsize; int max_mtu; }; @@ -206,6 +213,7 @@ struct ehca_mr { enum ehca_mr_flag flags; u32 num_kpages; /* number of kernel pages */ u32 num_hwpages; /* number of hw pages to form MR */ + u64 hwpage_size; /* hw page size used for this MR */ int acl; /* ACL (stored here for usage in reregister) */ u64 *start; /* virtual start address (stored here for */ /* usage in reregister) */ @@ -240,6 +248,7 @@ struct ehca_mr_pginfo { enum ehca_mr_pgi_type type; u64 num_kpages; u64 kpage_cnt; + u64 hwpage_size; /* hw page size used for this MR */ u64 num_hwpages; /* number of hw pages */ u64 hwpage_cnt; /* counter for hw pages */ u64 next_hwpage; /* next hw page in buffer/chunk/listelem */ diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c index 04c324330b7c..ec0145192228 100644 --- a/drivers/infiniband/hw/ehca/ehca_main.c +++ b/drivers/infiniband/hw/ehca/ehca_main.c @@ -63,6 +63,7 @@ int ehca_port_act_time = 30; int ehca_poll_all_eqs = 1; int ehca_static_rate = -1; int ehca_scaling_code = 0; +int ehca_mr_largepage = 0; module_param_named(open_aqp1, ehca_open_aqp1, int, 0); module_param_named(debug_level, ehca_debug_level, int, 0); @@ -72,7 +73,8 @@ module_param_named(use_hp_mr, ehca_use_hp_mr, int, 0); module_param_named(port_act_time, ehca_port_act_time, int, 0); module_param_named(poll_all_eqs, ehca_poll_all_eqs, int, 0); module_param_named(static_rate, ehca_static_rate, int, 0); -module_param_named(scaling_code, ehca_scaling_code, int, 0); +module_param_named(scaling_code, ehca_scaling_code, int, 0); +module_param_named(mr_largepage, ehca_mr_largepage, int, 0); MODULE_PARM_DESC(open_aqp1, "AQP1 on startup (0: no (default), 1: yes)"); @@ -95,6 +97,9 @@ MODULE_PARM_DESC(static_rate, "set permanent static rate (default: disabled)"); MODULE_PARM_DESC(scaling_code, "set scaling code (0: disabled/default, 1: enabled)"); +MODULE_PARM_DESC(mr_largepage, + "use large page for MR (0: use PAGE_SIZE (default), " + "1: use large page depending on MR size"); DEFINE_RWLOCK(ehca_qp_idr_lock); DEFINE_RWLOCK(ehca_cq_idr_lock); @@ -295,6 +300,8 @@ int ehca_sense_attributes(struct ehca_shca *shca) if (EHCA_BMASK_GET(hca_cap_descr[i].mask, shca->hca_cap)) ehca_gen_dbg(" %s", hca_cap_descr[i].descr); + shca->hca_cap_mr_pgsize = rblock->memory_page_size_supported; + port = (struct hipz_query_port *)rblock; h_ret = hipz_h_query_port(shca->ipz_hca_handle, 1, port); if (h_ret != H_SUCCESS) { @@ -590,6 +597,14 @@ static ssize_t ehca_show_adapter_handle(struct device *dev, } static DEVICE_ATTR(adapter_handle, S_IRUGO, ehca_show_adapter_handle, NULL); +static ssize_t ehca_show_mr_largepage(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + return sprintf(buf, "%d\n", ehca_mr_largepage); +} +static DEVICE_ATTR(mr_largepage, S_IRUGO, ehca_show_mr_largepage, NULL); + static struct attribute *ehca_dev_attrs[] = { &dev_attr_adapter_handle.attr, &dev_attr_num_ports.attr, @@ -606,6 +621,7 @@ static struct attribute *ehca_dev_attrs[] = { &dev_attr_cur_mw.attr, &dev_attr_max_pd.attr, &dev_attr_max_ah.attr, + &dev_attr_mr_largepage.attr, NULL }; diff --git a/drivers/infiniband/hw/ehca/ehca_mrmw.c b/drivers/infiniband/hw/ehca/ehca_mrmw.c index 9f4c9d46e8ef..c1b868b79d67 100644 --- a/drivers/infiniband/hw/ehca/ehca_mrmw.c +++ b/drivers/infiniband/hw/ehca/ehca_mrmw.c @@ -5,6 +5,7 @@ * * Authors: Dietmar Decker * Christoph Raisch + * Hoang-Nam Nguyen * * Copyright (c) 2005 IBM Corporation * @@ -56,6 +57,37 @@ static struct kmem_cache *mr_cache; static struct kmem_cache *mw_cache; +enum ehca_mr_pgsize { + EHCA_MR_PGSIZE4K = 0x1000L, + EHCA_MR_PGSIZE64K = 0x10000L, + EHCA_MR_PGSIZE1M = 0x100000L, + EHCA_MR_PGSIZE16M = 0x1000000L +}; + +extern int ehca_mr_largepage; + +static u32 ehca_encode_hwpage_size(u32 pgsize) +{ + u32 idx = 0; + pgsize >>= 12; + /* + * map mr page size into hw code: + * 0, 1, 2, 3 for 4K, 64K, 1M, 64M + */ + while (!(pgsize & 1)) { + idx++; + pgsize >>= 4; + } + return idx; +} + +static u64 ehca_get_max_hwpage_size(struct ehca_shca *shca) +{ + if (shca->hca_cap_mr_pgsize & HCA_CAP_MR_PGSIZE_16M) + return EHCA_MR_PGSIZE16M; + return EHCA_MR_PGSIZE4K; +} + static struct ehca_mr *ehca_mr_new(void) { struct ehca_mr *me; @@ -207,19 +239,23 @@ struct ib_mr *ehca_reg_phys_mr(struct ib_pd *pd, struct ehca_mr_pginfo pginfo; u32 num_kpages; u32 num_hwpages; + u64 hw_pgsize; num_kpages = NUM_CHUNKS(((u64)iova_start % PAGE_SIZE) + size, PAGE_SIZE); - num_hwpages = NUM_CHUNKS(((u64)iova_start % EHCA_PAGESIZE) + - size, EHCA_PAGESIZE); + /* for kernel space we try most possible pgsize */ + hw_pgsize = ehca_get_max_hwpage_size(shca); + num_hwpages = NUM_CHUNKS(((u64)iova_start % hw_pgsize) + size, + hw_pgsize); memset(&pginfo, 0, sizeof(pginfo)); pginfo.type = EHCA_MR_PGI_PHYS; pginfo.num_kpages = num_kpages; + pginfo.hwpage_size = hw_pgsize; pginfo.num_hwpages = num_hwpages; pginfo.u.phy.num_phys_buf = num_phys_buf; pginfo.u.phy.phys_buf_array = phys_buf_array; - pginfo.next_hwpage = (((u64)iova_start & ~PAGE_MASK) / - EHCA_PAGESIZE); + pginfo.next_hwpage = + ((u64)iova_start & ~(hw_pgsize - 1)) / hw_pgsize; ret = ehca_reg_mr(shca, e_mr, iova_start, size, mr_access_flags, e_pd, &pginfo, &e_mr->ib.ib_mr.lkey, @@ -259,6 +295,7 @@ struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, int ret; u32 num_kpages; u32 num_hwpages; + u64 hwpage_size; if (!pd) { ehca_gen_err("bad pd=%p", pd); @@ -309,16 +346,32 @@ struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, /* determine number of MR pages */ num_kpages = NUM_CHUNKS((virt % PAGE_SIZE) + length, PAGE_SIZE); - num_hwpages = NUM_CHUNKS((virt % EHCA_PAGESIZE) + length, - EHCA_PAGESIZE); + /* select proper hw_pgsize */ + if (ehca_mr_largepage && + (shca->hca_cap_mr_pgsize & HCA_CAP_MR_PGSIZE_16M)) { + if (length <= EHCA_MR_PGSIZE4K + && PAGE_SIZE == EHCA_MR_PGSIZE4K) + hwpage_size = EHCA_MR_PGSIZE4K; + else if (length <= EHCA_MR_PGSIZE64K) + hwpage_size = EHCA_MR_PGSIZE64K; + else if (length <= EHCA_MR_PGSIZE1M) + hwpage_size = EHCA_MR_PGSIZE1M; + else + hwpage_size = EHCA_MR_PGSIZE16M; + } else + hwpage_size = EHCA_MR_PGSIZE4K; + ehca_dbg(pd->device, "hwpage_size=%lx", hwpage_size); +reg_user_mr_fallback: + num_hwpages = NUM_CHUNKS((virt % hwpage_size) + length, hwpage_size); /* register MR on HCA */ memset(&pginfo, 0, sizeof(pginfo)); pginfo.type = EHCA_MR_PGI_USER; + pginfo.hwpage_size = hwpage_size; pginfo.num_kpages = num_kpages; pginfo.num_hwpages = num_hwpages; pginfo.u.usr.region = e_mr->umem; - pginfo.next_hwpage = e_mr->umem->offset / EHCA_PAGESIZE; + pginfo.next_hwpage = e_mr->umem->offset / hwpage_size; pginfo.u.usr.next_chunk = list_prepare_entry(pginfo.u.usr.next_chunk, (&e_mr->umem->chunk_list), list); @@ -326,6 +379,18 @@ struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, ret = ehca_reg_mr(shca, e_mr, (u64 *)virt, length, mr_access_flags, e_pd, &pginfo, &e_mr->ib.ib_mr.lkey, &e_mr->ib.ib_mr.rkey); + if (ret == -EINVAL && pginfo.hwpage_size > PAGE_SIZE) { + ehca_warn(pd->device, "failed to register mr " + "with hwpage_size=%lx", hwpage_size); + ehca_info(pd->device, "try to register mr with " + "kpage_size=%lx", PAGE_SIZE); + /* + * this means kpages are not contiguous for a hw page + * try kernel page size as fallback solution + */ + hwpage_size = PAGE_SIZE; + goto reg_user_mr_fallback; + } if (ret) { ib_mr = ERR_PTR(ret); goto reg_user_mr_exit2; @@ -452,6 +517,8 @@ int ehca_rereg_phys_mr(struct ib_mr *mr, new_pd = container_of(mr->pd, struct ehca_pd, ib_pd); if (mr_rereg_mask & IB_MR_REREG_TRANS) { + u64 hw_pgsize = ehca_get_max_hwpage_size(shca); + new_start = iova_start; /* change address */ /* check physical buffer list and calculate size */ ret = ehca_mr_chk_buf_and_calc_size(phys_buf_array, @@ -468,16 +535,17 @@ int ehca_rereg_phys_mr(struct ib_mr *mr, } num_kpages = NUM_CHUNKS(((u64)new_start % PAGE_SIZE) + new_size, PAGE_SIZE); - num_hwpages = NUM_CHUNKS(((u64)new_start % EHCA_PAGESIZE) + - new_size, EHCA_PAGESIZE); + num_hwpages = NUM_CHUNKS(((u64)new_start % hw_pgsize) + + new_size, hw_pgsize); memset(&pginfo, 0, sizeof(pginfo)); pginfo.type = EHCA_MR_PGI_PHYS; pginfo.num_kpages = num_kpages; + pginfo.hwpage_size = hw_pgsize; pginfo.num_hwpages = num_hwpages; pginfo.u.phy.num_phys_buf = num_phys_buf; pginfo.u.phy.phys_buf_array = phys_buf_array; - pginfo.next_hwpage = (((u64)iova_start & ~PAGE_MASK) / - EHCA_PAGESIZE); + pginfo.next_hwpage = + ((u64)iova_start & ~(hw_pgsize - 1)) / hw_pgsize; } if (mr_rereg_mask & IB_MR_REREG_ACCESS) new_acl = mr_access_flags; @@ -709,6 +777,7 @@ struct ib_fmr *ehca_alloc_fmr(struct ib_pd *pd, int ret; u32 tmp_lkey, tmp_rkey; struct ehca_mr_pginfo pginfo; + u64 hw_pgsize; /* check other parameters */ if (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) && @@ -738,8 +807,8 @@ struct ib_fmr *ehca_alloc_fmr(struct ib_pd *pd, ib_fmr = ERR_PTR(-EINVAL); goto alloc_fmr_exit0; } - if (((1 << fmr_attr->page_shift) != EHCA_PAGESIZE) && - ((1 << fmr_attr->page_shift) != PAGE_SIZE)) { + hw_pgsize = ehca_get_max_hwpage_size(shca); + if ((1 << fmr_attr->page_shift) != hw_pgsize) { ehca_err(pd->device, "unsupported fmr_attr->page_shift=%x", fmr_attr->page_shift); ib_fmr = ERR_PTR(-EINVAL); @@ -755,6 +824,10 @@ struct ib_fmr *ehca_alloc_fmr(struct ib_pd *pd, /* register MR on HCA */ memset(&pginfo, 0, sizeof(pginfo)); + /* + * pginfo.num_hwpages==0, ie register_rpages() will not be called + * but deferred to map_phys_fmr() + */ ret = ehca_reg_mr(shca, e_fmr, NULL, fmr_attr->max_pages * (1 << fmr_attr->page_shift), mr_access_flags, e_pd, &pginfo, @@ -765,6 +838,7 @@ struct ib_fmr *ehca_alloc_fmr(struct ib_pd *pd, } /* successful */ + e_fmr->hwpage_size = hw_pgsize; e_fmr->fmr_page_size = 1 << fmr_attr->page_shift; e_fmr->fmr_max_pages = fmr_attr->max_pages; e_fmr->fmr_max_maps = fmr_attr->max_maps; @@ -822,10 +896,12 @@ int ehca_map_phys_fmr(struct ib_fmr *fmr, memset(&pginfo, 0, sizeof(pginfo)); pginfo.type = EHCA_MR_PGI_FMR; pginfo.num_kpages = list_len; - pginfo.num_hwpages = list_len * (e_fmr->fmr_page_size / EHCA_PAGESIZE); + pginfo.hwpage_size = e_fmr->hwpage_size; + pginfo.num_hwpages = + list_len * e_fmr->fmr_page_size / pginfo.hwpage_size; pginfo.u.fmr.page_list = page_list; - pginfo.next_hwpage = ((iova & (e_fmr->fmr_page_size-1)) / - EHCA_PAGESIZE); + pginfo.next_hwpage = + (iova & (e_fmr->fmr_page_size-1)) / pginfo.hwpage_size; pginfo.u.fmr.fmr_pgsize = e_fmr->fmr_page_size; ret = ehca_rereg_mr(shca, e_fmr, (u64 *)iova, @@ -964,7 +1040,7 @@ int ehca_reg_mr(struct ehca_shca *shca, struct ehca_mr_hipzout_parms hipzout; ehca_mrmw_map_acl(acl, &hipz_acl); - ehca_mrmw_set_pgsize_hipz_acl(&hipz_acl); + ehca_mrmw_set_pgsize_hipz_acl(pginfo->hwpage_size, &hipz_acl); if (ehca_use_hp_mr == 1) hipz_acl |= 0x00000001; @@ -987,6 +1063,7 @@ int ehca_reg_mr(struct ehca_shca *shca, /* successful registration */ e_mr->num_kpages = pginfo->num_kpages; e_mr->num_hwpages = pginfo->num_hwpages; + e_mr->hwpage_size = pginfo->hwpage_size; e_mr->start = iova_start; e_mr->size = size; e_mr->acl = acl; @@ -1029,6 +1106,9 @@ int ehca_reg_mr_rpages(struct ehca_shca *shca, u32 i; u64 *kpage; + if (!pginfo->num_hwpages) /* in case of fmr */ + return 0; + kpage = ehca_alloc_fw_ctrlblock(GFP_KERNEL); if (!kpage) { ehca_err(&shca->ib_device, "kpage alloc failed"); @@ -1036,7 +1116,7 @@ int ehca_reg_mr_rpages(struct ehca_shca *shca, goto ehca_reg_mr_rpages_exit0; } - /* max 512 pages per shot */ + /* max MAX_RPAGES ehca mr pages per register call */ for (i = 0; i < NUM_CHUNKS(pginfo->num_hwpages, MAX_RPAGES); i++) { if (i == NUM_CHUNKS(pginfo->num_hwpages, MAX_RPAGES) - 1) { @@ -1049,8 +1129,8 @@ int ehca_reg_mr_rpages(struct ehca_shca *shca, ret = ehca_set_pagebuf(pginfo, rnum, kpage); if (ret) { ehca_err(&shca->ib_device, "ehca_set_pagebuf " - "bad rc, ret=%x rnum=%x kpage=%p", - ret, rnum, kpage); + "bad rc, ret=%x rnum=%x kpage=%p", + ret, rnum, kpage); goto ehca_reg_mr_rpages_exit1; } @@ -1065,9 +1145,10 @@ int ehca_reg_mr_rpages(struct ehca_shca *shca, } else rpage = *kpage; - h_ret = hipz_h_register_rpage_mr(shca->ipz_hca_handle, e_mr, - 0, /* pagesize 4k */ - 0, rpage, rnum); + h_ret = hipz_h_register_rpage_mr( + shca->ipz_hca_handle, e_mr, + ehca_encode_hwpage_size(pginfo->hwpage_size), + 0, rpage, rnum); if (i == NUM_CHUNKS(pginfo->num_hwpages, MAX_RPAGES) - 1) { /* @@ -1131,7 +1212,7 @@ inline int ehca_rereg_mr_rereg1(struct ehca_shca *shca, struct ehca_mr_hipzout_parms hipzout; ehca_mrmw_map_acl(acl, &hipz_acl); - ehca_mrmw_set_pgsize_hipz_acl(&hipz_acl); + ehca_mrmw_set_pgsize_hipz_acl(pginfo->hwpage_size, &hipz_acl); kpage = ehca_alloc_fw_ctrlblock(GFP_KERNEL); if (!kpage) { @@ -1182,6 +1263,7 @@ inline int ehca_rereg_mr_rereg1(struct ehca_shca *shca, */ e_mr->num_kpages = pginfo->num_kpages; e_mr->num_hwpages = pginfo->num_hwpages; + e_mr->hwpage_size = pginfo->hwpage_size; e_mr->start = iova_start; e_mr->size = size; e_mr->acl = acl; @@ -1268,13 +1350,14 @@ int ehca_rereg_mr(struct ehca_shca *shca, /* set some MR values */ e_mr->flags = save_mr.flags; + e_mr->hwpage_size = save_mr.hwpage_size; e_mr->fmr_page_size = save_mr.fmr_page_size; e_mr->fmr_max_pages = save_mr.fmr_max_pages; e_mr->fmr_max_maps = save_mr.fmr_max_maps; e_mr->fmr_map_cnt = save_mr.fmr_map_cnt; ret = ehca_reg_mr(shca, e_mr, iova_start, size, acl, - e_pd, pginfo, lkey, rkey); + e_pd, pginfo, lkey, rkey); if (ret) { u32 offset = (u64)(&e_mr->flags) - (u64)e_mr; memcpy(&e_mr->flags, &(save_mr.flags), @@ -1355,6 +1438,7 @@ int ehca_unmap_one_fmr(struct ehca_shca *shca, /* set some MR values */ e_fmr->flags = save_fmr.flags; + e_fmr->hwpage_size = save_fmr.hwpage_size; e_fmr->fmr_page_size = save_fmr.fmr_page_size; e_fmr->fmr_max_pages = save_fmr.fmr_max_pages; e_fmr->fmr_max_maps = save_fmr.fmr_max_maps; @@ -1363,8 +1447,6 @@ int ehca_unmap_one_fmr(struct ehca_shca *shca, memset(&pginfo, 0, sizeof(pginfo)); pginfo.type = EHCA_MR_PGI_FMR; - pginfo.num_kpages = 0; - pginfo.num_hwpages = 0; ret = ehca_reg_mr(shca, e_fmr, NULL, (e_fmr->fmr_max_pages * e_fmr->fmr_page_size), e_fmr->acl, e_pd, &pginfo, &tmp_lkey, @@ -1373,7 +1455,6 @@ int ehca_unmap_one_fmr(struct ehca_shca *shca, u32 offset = (u64)(&e_fmr->flags) - (u64)e_fmr; memcpy(&e_fmr->flags, &(save_mr.flags), sizeof(struct ehca_mr) - offset); - goto ehca_unmap_one_fmr_exit0; } ehca_unmap_one_fmr_exit0: @@ -1401,7 +1482,7 @@ int ehca_reg_smr(struct ehca_shca *shca, struct ehca_mr_hipzout_parms hipzout; ehca_mrmw_map_acl(acl, &hipz_acl); - ehca_mrmw_set_pgsize_hipz_acl(&hipz_acl); + ehca_mrmw_set_pgsize_hipz_acl(e_origmr->hwpage_size, &hipz_acl); h_ret = hipz_h_register_smr(shca->ipz_hca_handle, e_newmr, e_origmr, (u64)iova_start, hipz_acl, e_pd->fw_pd, @@ -1420,6 +1501,7 @@ int ehca_reg_smr(struct ehca_shca *shca, /* successful registration */ e_newmr->num_kpages = e_origmr->num_kpages; e_newmr->num_hwpages = e_origmr->num_hwpages; + e_newmr->hwpage_size = e_origmr->hwpage_size; e_newmr->start = iova_start; e_newmr->size = e_origmr->size; e_newmr->acl = acl; @@ -1452,6 +1534,7 @@ int ehca_reg_internal_maxmr( struct ib_phys_buf ib_pbuf; u32 num_kpages; u32 num_hwpages; + u64 hw_pgsize; e_mr = ehca_mr_new(); if (!e_mr) { @@ -1468,13 +1551,15 @@ int ehca_reg_internal_maxmr( ib_pbuf.size = size_maxmr; num_kpages = NUM_CHUNKS(((u64)iova_start % PAGE_SIZE) + size_maxmr, PAGE_SIZE); - num_hwpages = NUM_CHUNKS(((u64)iova_start % EHCA_PAGESIZE) + size_maxmr, - EHCA_PAGESIZE); + hw_pgsize = ehca_get_max_hwpage_size(shca); + num_hwpages = NUM_CHUNKS(((u64)iova_start % hw_pgsize) + size_maxmr, + hw_pgsize); memset(&pginfo, 0, sizeof(pginfo)); pginfo.type = EHCA_MR_PGI_PHYS; pginfo.num_kpages = num_kpages; pginfo.num_hwpages = num_hwpages; + pginfo.hwpage_size = hw_pgsize; pginfo.u.phy.num_phys_buf = 1; pginfo.u.phy.phys_buf_array = &ib_pbuf; @@ -1523,7 +1608,7 @@ int ehca_reg_maxmr(struct ehca_shca *shca, struct ehca_mr_hipzout_parms hipzout; ehca_mrmw_map_acl(acl, &hipz_acl); - ehca_mrmw_set_pgsize_hipz_acl(&hipz_acl); + ehca_mrmw_set_pgsize_hipz_acl(e_origmr->hwpage_size, &hipz_acl); h_ret = hipz_h_register_smr(shca->ipz_hca_handle, e_newmr, e_origmr, (u64)iova_start, hipz_acl, e_pd->fw_pd, @@ -1539,6 +1624,7 @@ int ehca_reg_maxmr(struct ehca_shca *shca, /* successful registration */ e_newmr->num_kpages = e_origmr->num_kpages; e_newmr->num_hwpages = e_origmr->num_hwpages; + e_newmr->hwpage_size = e_origmr->hwpage_size; e_newmr->start = iova_start; e_newmr->size = e_origmr->size; e_newmr->acl = acl; @@ -1684,6 +1770,7 @@ static int ehca_set_pagebuf_user1(struct ehca_mr_pginfo *pginfo, u64 pgaddr; u32 i = 0; u32 j = 0; + int hwpages_per_kpage = PAGE_SIZE / pginfo->hwpage_size; /* loop over desired chunk entries */ chunk = pginfo->u.usr.next_chunk; @@ -1695,7 +1782,7 @@ static int ehca_set_pagebuf_user1(struct ehca_mr_pginfo *pginfo, << PAGE_SHIFT ; *kpage = phys_to_abs(pgaddr + (pginfo->next_hwpage * - EHCA_PAGESIZE)); + pginfo->hwpage_size)); if ( !(*kpage) ) { ehca_gen_err("pgaddr=%lx " "chunk->page_list[i]=%lx " @@ -1708,8 +1795,7 @@ static int ehca_set_pagebuf_user1(struct ehca_mr_pginfo *pginfo, (pginfo->hwpage_cnt)++; (pginfo->next_hwpage)++; kpage++; - if (pginfo->next_hwpage % - (PAGE_SIZE / EHCA_PAGESIZE) == 0) { + if (pginfo->next_hwpage % hwpages_per_kpage == 0) { (pginfo->kpage_cnt)++; (pginfo->u.usr.next_nmap)++; pginfo->next_hwpage = 0; @@ -1738,6 +1824,143 @@ static int ehca_set_pagebuf_user1(struct ehca_mr_pginfo *pginfo, return ret; } +/* + * check given pages for contiguous layout + * last page addr is returned in prev_pgaddr for further check + */ +static int ehca_check_kpages_per_ate(struct scatterlist *page_list, + int start_idx, int end_idx, + u64 *prev_pgaddr) +{ + int t; + for (t = start_idx; t <= end_idx; t++) { + u64 pgaddr = page_to_pfn(page_list[t].page) << PAGE_SHIFT; + ehca_gen_dbg("chunk_page=%lx value=%016lx", pgaddr, + *(u64 *)abs_to_virt(phys_to_abs(pgaddr))); + if (pgaddr - PAGE_SIZE != *prev_pgaddr) { + ehca_gen_err("uncontiguous page found pgaddr=%lx " + "prev_pgaddr=%lx page_list_i=%x", + pgaddr, *prev_pgaddr, t); + return -EINVAL; + } + *prev_pgaddr = pgaddr; + } + return 0; +} + +/* PAGE_SIZE < pginfo->hwpage_size */ +static int ehca_set_pagebuf_user2(struct ehca_mr_pginfo *pginfo, + u32 number, + u64 *kpage) +{ + int ret = 0; + struct ib_umem_chunk *prev_chunk; + struct ib_umem_chunk *chunk; + u64 pgaddr, prev_pgaddr; + u32 i = 0; + u32 j = 0; + int kpages_per_hwpage = pginfo->hwpage_size / PAGE_SIZE; + int nr_kpages = kpages_per_hwpage; + + /* loop over desired chunk entries */ + chunk = pginfo->u.usr.next_chunk; + prev_chunk = pginfo->u.usr.next_chunk; + list_for_each_entry_continue( + chunk, (&(pginfo->u.usr.region->chunk_list)), list) { + for (i = pginfo->u.usr.next_nmap; i < chunk->nmap; ) { + if (nr_kpages == kpages_per_hwpage) { + pgaddr = ( page_to_pfn(chunk->page_list[i].page) + << PAGE_SHIFT ); + *kpage = phys_to_abs(pgaddr); + if ( !(*kpage) ) { + ehca_gen_err("pgaddr=%lx i=%x", + pgaddr, i); + ret = -EFAULT; + return ret; + } + /* + * The first page in a hwpage must be aligned; + * the first MR page is exempt from this rule. + */ + if (pgaddr & (pginfo->hwpage_size - 1)) { + if (pginfo->hwpage_cnt) { + ehca_gen_err( + "invalid alignment " + "pgaddr=%lx i=%x " + "mr_pgsize=%lx", + pgaddr, i, + pginfo->hwpage_size); + ret = -EFAULT; + return ret; + } + /* first MR page */ + pginfo->kpage_cnt = + (pgaddr & + (pginfo->hwpage_size - 1)) >> + PAGE_SHIFT; + nr_kpages -= pginfo->kpage_cnt; + *kpage = phys_to_abs( + pgaddr & + ~(pginfo->hwpage_size - 1)); + } + ehca_gen_dbg("kpage=%lx chunk_page=%lx " + "value=%016lx", *kpage, pgaddr, + *(u64 *)abs_to_virt( + phys_to_abs(pgaddr))); + prev_pgaddr = pgaddr; + i++; + pginfo->kpage_cnt++; + pginfo->u.usr.next_nmap++; + nr_kpages--; + if (!nr_kpages) + goto next_kpage; + continue; + } + if (i + nr_kpages > chunk->nmap) { + ret = ehca_check_kpages_per_ate( + chunk->page_list, i, + chunk->nmap - 1, &prev_pgaddr); + if (ret) return ret; + pginfo->kpage_cnt += chunk->nmap - i; + pginfo->u.usr.next_nmap += chunk->nmap - i; + nr_kpages -= chunk->nmap - i; + break; + } + + ret = ehca_check_kpages_per_ate(chunk->page_list, i, + i + nr_kpages - 1, + &prev_pgaddr); + if (ret) return ret; + i += nr_kpages; + pginfo->kpage_cnt += nr_kpages; + pginfo->u.usr.next_nmap += nr_kpages; +next_kpage: + nr_kpages = kpages_per_hwpage; + (pginfo->hwpage_cnt)++; + kpage++; + j++; + if (j >= number) break; + } + if ((pginfo->u.usr.next_nmap >= chunk->nmap) && + (j >= number)) { + pginfo->u.usr.next_nmap = 0; + prev_chunk = chunk; + break; + } else if (pginfo->u.usr.next_nmap >= chunk->nmap) { + pginfo->u.usr.next_nmap = 0; + prev_chunk = chunk; + } else if (j >= number) + break; + else + prev_chunk = chunk; + } + pginfo->u.usr.next_chunk = + list_prepare_entry(prev_chunk, + (&(pginfo->u.usr.region->chunk_list)), + list); + return ret; +} + int ehca_set_pagebuf_phys(struct ehca_mr_pginfo *pginfo, u32 number, u64 *kpage) @@ -1750,9 +1973,10 @@ int ehca_set_pagebuf_phys(struct ehca_mr_pginfo *pginfo, /* loop over desired phys_buf_array entries */ while (i < number) { pbuf = pginfo->u.phy.phys_buf_array + pginfo->u.phy.next_buf; - num_hw = NUM_CHUNKS((pbuf->addr % EHCA_PAGESIZE) + - pbuf->size, EHCA_PAGESIZE); - offs_hw = (pbuf->addr & ~PAGE_MASK) / EHCA_PAGESIZE; + num_hw = NUM_CHUNKS((pbuf->addr % pginfo->hwpage_size) + + pbuf->size, pginfo->hwpage_size); + offs_hw = (pbuf->addr & ~(pginfo->hwpage_size - 1)) / + pginfo->hwpage_size; while (pginfo->next_hwpage < offs_hw + num_hw) { /* sanity check */ if ((pginfo->kpage_cnt >= pginfo->num_kpages) || @@ -1768,21 +1992,23 @@ int ehca_set_pagebuf_phys(struct ehca_mr_pginfo *pginfo, return -EFAULT; } *kpage = phys_to_abs( - (pbuf->addr & EHCA_PAGEMASK) - + (pginfo->next_hwpage * EHCA_PAGESIZE)); + (pbuf->addr & ~(pginfo->hwpage_size - 1)) + + (pginfo->next_hwpage * pginfo->hwpage_size)); if ( !(*kpage) && pbuf->addr ) { - ehca_gen_err("pbuf->addr=%lx " - "pbuf->size=%lx " + ehca_gen_err("pbuf->addr=%lx pbuf->size=%lx " "next_hwpage=%lx", pbuf->addr, - pbuf->size, - pginfo->next_hwpage); + pbuf->size, pginfo->next_hwpage); return -EFAULT; } (pginfo->hwpage_cnt)++; (pginfo->next_hwpage)++; - if (pginfo->next_hwpage % - (PAGE_SIZE / EHCA_PAGESIZE) == 0) - (pginfo->kpage_cnt)++; + if (PAGE_SIZE >= pginfo->hwpage_size) { + if (pginfo->next_hwpage % + (PAGE_SIZE / pginfo->hwpage_size) == 0) + (pginfo->kpage_cnt)++; + } else + pginfo->kpage_cnt += pginfo->hwpage_size / + PAGE_SIZE; kpage++; i++; if (i >= number) break; @@ -1806,8 +2032,8 @@ int ehca_set_pagebuf_fmr(struct ehca_mr_pginfo *pginfo, /* loop over desired page_list entries */ fmrlist = pginfo->u.fmr.page_list + pginfo->u.fmr.next_listelem; for (i = 0; i < number; i++) { - *kpage = phys_to_abs((*fmrlist & EHCA_PAGEMASK) + - pginfo->next_hwpage * EHCA_PAGESIZE); + *kpage = phys_to_abs((*fmrlist & ~(pginfo->hwpage_size - 1)) + + pginfo->next_hwpage * pginfo->hwpage_size); if ( !(*kpage) ) { ehca_gen_err("*fmrlist=%lx fmrlist=%p " "next_listelem=%lx next_hwpage=%lx", @@ -1817,15 +2043,38 @@ int ehca_set_pagebuf_fmr(struct ehca_mr_pginfo *pginfo, return -EFAULT; } (pginfo->hwpage_cnt)++; - (pginfo->next_hwpage)++; - kpage++; - if (pginfo->next_hwpage % - (pginfo->u.fmr.fmr_pgsize / EHCA_PAGESIZE) == 0) { - (pginfo->kpage_cnt)++; - (pginfo->u.fmr.next_listelem)++; - fmrlist++; - pginfo->next_hwpage = 0; + if (pginfo->u.fmr.fmr_pgsize >= pginfo->hwpage_size) { + if (pginfo->next_hwpage % + (pginfo->u.fmr.fmr_pgsize / + pginfo->hwpage_size) == 0) { + (pginfo->kpage_cnt)++; + (pginfo->u.fmr.next_listelem)++; + fmrlist++; + pginfo->next_hwpage = 0; + } else + (pginfo->next_hwpage)++; + } else { + unsigned int cnt_per_hwpage = pginfo->hwpage_size / + pginfo->u.fmr.fmr_pgsize; + unsigned int j; + u64 prev = *kpage; + /* check if adrs are contiguous */ + for (j = 1; j < cnt_per_hwpage; j++) { + u64 p = phys_to_abs(fmrlist[j] & + ~(pginfo->hwpage_size - 1)); + if (prev + pginfo->u.fmr.fmr_pgsize != p) { + ehca_gen_err("uncontiguous fmr pages " + "found prev=%lx p=%lx " + "idx=%x", prev, p, i + j); + return -EINVAL; + } + prev = p; + } + pginfo->kpage_cnt += cnt_per_hwpage; + pginfo->u.fmr.next_listelem += cnt_per_hwpage; + fmrlist += cnt_per_hwpage; } + kpage++; } return ret; } @@ -1842,7 +2091,9 @@ int ehca_set_pagebuf(struct ehca_mr_pginfo *pginfo, ret = ehca_set_pagebuf_phys(pginfo, number, kpage); break; case EHCA_MR_PGI_USER: - ret = ehca_set_pagebuf_user1(pginfo, number, kpage); + ret = PAGE_SIZE >= pginfo->hwpage_size ? + ehca_set_pagebuf_user1(pginfo, number, kpage) : + ehca_set_pagebuf_user2(pginfo, number, kpage); break; case EHCA_MR_PGI_FMR: ret = ehca_set_pagebuf_fmr(pginfo, number, kpage); @@ -1895,9 +2146,9 @@ void ehca_mrmw_map_acl(int ib_acl, /*----------------------------------------------------------------------*/ /* sets page size in hipz access control for MR/MW. */ -void ehca_mrmw_set_pgsize_hipz_acl(u32 *hipz_acl) /*INOUT*/ +void ehca_mrmw_set_pgsize_hipz_acl(u32 pgsize, u32 *hipz_acl) /*INOUT*/ { - return; /* HCA supports only 4k */ + *hipz_acl |= (ehca_encode_hwpage_size(pgsize) << 24); } /* end ehca_mrmw_set_pgsize_hipz_acl() */ /*----------------------------------------------------------------------*/ diff --git a/drivers/infiniband/hw/ehca/ehca_mrmw.h b/drivers/infiniband/hw/ehca/ehca_mrmw.h index 24f13fe3708b..bc8f4e31c123 100644 --- a/drivers/infiniband/hw/ehca/ehca_mrmw.h +++ b/drivers/infiniband/hw/ehca/ehca_mrmw.h @@ -111,7 +111,7 @@ int ehca_mr_is_maxmr(u64 size, void ehca_mrmw_map_acl(int ib_acl, u32 *hipz_acl); -void ehca_mrmw_set_pgsize_hipz_acl(u32 *hipz_acl); +void ehca_mrmw_set_pgsize_hipz_acl(u32 pgsize, u32 *hipz_acl); void ehca_mrmw_reverse_map_acl(const u32 *hipz_acl, int *ib_acl); diff --git a/drivers/infiniband/hw/ehca/hcp_if.c b/drivers/infiniband/hw/ehca/hcp_if.c index 3394e05f4b4f..358796ccf008 100644 --- a/drivers/infiniband/hw/ehca/hcp_if.c +++ b/drivers/infiniband/hw/ehca/hcp_if.c @@ -427,7 +427,8 @@ u64 hipz_h_register_rpage(const struct ipz_adapter_handle adapter_handle, { return ehca_plpar_hcall_norets(H_REGISTER_RPAGES, adapter_handle.handle, /* r4 */ - queue_type | pagesize << 8, /* r5 */ + (u64)queue_type | ((u64)pagesize) << 8, + /* r5 */ resource_handle, /* r6 */ logical_address_of_page, /* r7 */ count, /* r8 */ @@ -724,6 +725,9 @@ u64 hipz_h_alloc_resource_mr(const struct ipz_adapter_handle adapter_handle, u64 ret; u64 outs[PLPAR_HCALL9_BUFSIZE]; + ehca_gen_dbg("kernel PAGE_SIZE=%x access_ctrl=%016x " + "vaddr=%lx length=%lx", + (u32)PAGE_SIZE, access_ctrl, vaddr, length); ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs, adapter_handle.handle, /* r4 */ 5, /* r5 */ @@ -746,8 +750,22 @@ u64 hipz_h_register_rpage_mr(const struct ipz_adapter_handle adapter_handle, const u64 logical_address_of_page, const u64 count) { + extern int ehca_debug_level; u64 ret; + if (unlikely(ehca_debug_level >= 2)) { + if (count > 1) { + u64 *kpage; + int i; + kpage = (u64 *)abs_to_virt(logical_address_of_page); + for (i = 0; i < count; i++) + ehca_gen_dbg("kpage[%d]=%p", + i, (void *)kpage[i]); + } else + ehca_gen_dbg("kpage=%p", + (void *)logical_address_of_page); + } + if ((count > 1) && (logical_address_of_page & (EHCA_PAGESIZE-1))) { ehca_gen_err("logical_address_of_page not on a 4k boundary " "adapter_handle=%lx mr=%p mr_handle=%lx " -- cgit v1.2.3 From 633a5aedaee1c96347b8a6c2ae7dceb47d0c910f Mon Sep 17 00:00:00 2001 From: Hoang-Nam Nguyen Date: Fri, 20 Jul 2007 16:02:18 +0200 Subject: IB/ehca: Generate async event when SRQ limit reached Signed-off-by: Joachim Fenkes Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ehca/ehca_irq.c | 42 +++++++++++++++++++++++------------ 1 file changed, 28 insertions(+), 14 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/ehca/ehca_irq.c b/drivers/infiniband/hw/ehca/ehca_irq.c index 4fb01fcb63ae..71c0799b3500 100644 --- a/drivers/infiniband/hw/ehca/ehca_irq.c +++ b/drivers/infiniband/hw/ehca/ehca_irq.c @@ -175,9 +175,8 @@ error_data1: } -static void qp_event_callback(struct ehca_shca *shca, - u64 eqe, - enum ib_event_type event_type) +static void qp_event_callback(struct ehca_shca *shca, u64 eqe, + enum ib_event_type event_type, int fatal) { struct ib_event event; struct ehca_qp *qp; @@ -191,16 +190,26 @@ static void qp_event_callback(struct ehca_shca *shca, if (!qp) return; - ehca_error_data(shca, qp, qp->ipz_qp_handle.handle); + if (fatal) + ehca_error_data(shca, qp, qp->ipz_qp_handle.handle); - if (!qp->ib_qp.event_handler) - return; + event.device = &shca->ib_device; - event.device = &shca->ib_device; - event.event = event_type; - event.element.qp = &qp->ib_qp; + if (qp->ext_type == EQPT_SRQ) { + if (!qp->ib_srq.event_handler) + return; - qp->ib_qp.event_handler(&event, qp->ib_qp.qp_context); + event.event = fatal ? IB_EVENT_SRQ_ERR : event_type; + event.element.srq = &qp->ib_srq; + qp->ib_srq.event_handler(&event, qp->ib_srq.srq_context); + } else { + if (!qp->ib_qp.event_handler) + return; + + event.event = event_type; + event.element.qp = &qp->ib_qp; + qp->ib_qp.event_handler(&event, qp->ib_qp.qp_context); + } return; } @@ -234,17 +243,17 @@ static void parse_identifier(struct ehca_shca *shca, u64 eqe) switch (identifier) { case 0x02: /* path migrated */ - qp_event_callback(shca, eqe, IB_EVENT_PATH_MIG); + qp_event_callback(shca, eqe, IB_EVENT_PATH_MIG, 0); break; case 0x03: /* communication established */ - qp_event_callback(shca, eqe, IB_EVENT_COMM_EST); + qp_event_callback(shca, eqe, IB_EVENT_COMM_EST, 0); break; case 0x04: /* send queue drained */ - qp_event_callback(shca, eqe, IB_EVENT_SQ_DRAINED); + qp_event_callback(shca, eqe, IB_EVENT_SQ_DRAINED, 0); break; case 0x05: /* QP error */ case 0x06: /* QP error */ - qp_event_callback(shca, eqe, IB_EVENT_QP_FATAL); + qp_event_callback(shca, eqe, IB_EVENT_QP_FATAL, 1); break; case 0x07: /* CQ error */ case 0x08: /* CQ error */ @@ -278,6 +287,11 @@ static void parse_identifier(struct ehca_shca *shca, u64 eqe) ehca_err(&shca->ib_device, "Interface trace stopped."); break; case 0x14: /* first error capture info available */ + ehca_info(&shca->ib_device, "First error capture available"); + break; + case 0x15: /* SRQ limit reached */ + qp_event_callback(shca, eqe, IB_EVENT_SRQ_LIMIT_REACHED, 0); + break; default: ehca_err(&shca->ib_device, "Unknown identifier: %x on %s.", identifier, shca->ib_device.name); -- cgit v1.2.3 From 51d2bfbddb33dc59786a3a41f7eeb59e30fa561c Mon Sep 17 00:00:00 2001 From: Hoang-Nam Nguyen Date: Fri, 20 Jul 2007 16:02:46 +0200 Subject: IB/ehca: Move ehca2ib_return_code() out of line ehca2ib_return_code() is not used in any fast path, and making it non-inline saves ~1.5K of code. Signed-off-by: Joachim Fenkes Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ehca/ehca_main.c | 17 +++++++++++++++++ drivers/infiniband/hw/ehca/ehca_tools.h | 19 +------------------ 2 files changed, 18 insertions(+), 18 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c index ec0145192228..bb104b7f73e3 100644 --- a/drivers/infiniband/hw/ehca/ehca_main.c +++ b/drivers/infiniband/hw/ehca/ehca_main.c @@ -130,6 +130,23 @@ void ehca_free_fw_ctrlblock(void *ptr) } #endif +int ehca2ib_return_code(u64 ehca_rc) +{ + switch (ehca_rc) { + case H_SUCCESS: + return 0; + case H_RESOURCE: /* Resource in use */ + case H_BUSY: + return -EBUSY; + case H_NOT_ENOUGH_RESOURCES: /* insufficient resources */ + case H_CONSTRAINED: /* resource constraint */ + case H_NO_MEM: + return -ENOMEM; + default: + return -EINVAL; + } +} + static int ehca_create_slab_caches(void) { int ret; diff --git a/drivers/infiniband/hw/ehca/ehca_tools.h b/drivers/infiniband/hw/ehca/ehca_tools.h index 678b81391861..57c77a715f46 100644 --- a/drivers/infiniband/hw/ehca/ehca_tools.h +++ b/drivers/infiniband/hw/ehca/ehca_tools.h @@ -154,24 +154,7 @@ extern int ehca_debug_level; #define EHCA_BMASK_GET(mask, value) \ (EHCA_BMASK_MASK(mask) & (((u64)(value)) >> EHCA_BMASK_SHIFTPOS(mask))) - /* Converts ehca to ib return code */ -static inline int ehca2ib_return_code(u64 ehca_rc) -{ - switch (ehca_rc) { - case H_SUCCESS: - return 0; - case H_RESOURCE: /* Resource in use */ - case H_BUSY: - return -EBUSY; - case H_NOT_ENOUGH_RESOURCES: /* insufficient resources */ - case H_CONSTRAINED: /* resource constraint */ - case H_NO_MEM: - return -ENOMEM; - default: - return -EINVAL; - } -} - +int ehca2ib_return_code(u64 ehca_rc); #endif /* EHCA_TOOLS_H */ -- cgit v1.2.3 From 0c10f7b79b5bb07a37aa5927072abdc3f45ac8d3 Mon Sep 17 00:00:00 2001 From: Joachim Fenkes Date: Thu, 19 Jul 2007 21:40:00 +0200 Subject: IB/ehca: Make internal_create/destroy_qp() static They're only used in ehca_qp.c, so make them static to that file. Signed-off-by: Joachim Fenkes Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ehca/ehca_qp.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c b/drivers/infiniband/hw/ehca/ehca_qp.c index a3146e696c5d..483f0ca1acc4 100644 --- a/drivers/infiniband/hw/ehca/ehca_qp.c +++ b/drivers/infiniband/hw/ehca/ehca_qp.c @@ -363,10 +363,11 @@ init_qp_queue1: * the value of the is_srq parameter. If init_attr and srq_init_attr share * fields, the field out of init_attr is used. */ -struct ehca_qp *internal_create_qp(struct ib_pd *pd, - struct ib_qp_init_attr *init_attr, - struct ib_srq_init_attr *srq_init_attr, - struct ib_udata *udata, int is_srq) +static struct ehca_qp *internal_create_qp( + struct ib_pd *pd, + struct ib_qp_init_attr *init_attr, + struct ib_srq_init_attr *srq_init_attr, + struct ib_udata *udata, int is_srq) { struct ehca_qp *my_qp; struct ehca_pd *my_pd = container_of(pd, struct ehca_pd, ib_pd); @@ -752,8 +753,8 @@ struct ib_qp *ehca_create_qp(struct ib_pd *pd, return IS_ERR(ret) ? (struct ib_qp *)ret : &ret->ib_qp; } -int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp, - struct ib_uobject *uobject); +static int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp, + struct ib_uobject *uobject); struct ib_srq *ehca_create_srq(struct ib_pd *pd, struct ib_srq_init_attr *srq_init_attr, @@ -1669,8 +1670,8 @@ query_srq_exit1: return ret; } -int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp, - struct ib_uobject *uobject) +static int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp, + struct ib_uobject *uobject) { struct ehca_shca *shca = container_of(dev, struct ehca_shca, ib_device); struct ehca_pd *my_pd = container_of(my_qp->ib_qp.pd, struct ehca_pd, -- cgit v1.2.3 From e2f81daf23efde23d8cac1fc253d41838f0347cf Mon Sep 17 00:00:00 2001 From: Stefan Roscher Date: Fri, 20 Jul 2007 16:04:17 +0200 Subject: IB/ehca: Support small QP queues eHCA2 supports QP queues that can be as small as 512 bytes. This greatly reduces memory overhead for consumers that use lots of QPs with small queues (e.g. RDMA-only QPs). Apart from dealing with firmware, this code needs to manage bite-sized chunks of kernel pages, making sure that no kernel page is shared between different protection domains. Signed-off-by: Hoang-Nam Nguyen --- drivers/infiniband/hw/ehca/ehca_classes.h | 41 ++++-- drivers/infiniband/hw/ehca/ehca_cq.c | 8 +- drivers/infiniband/hw/ehca/ehca_eq.c | 8 +- drivers/infiniband/hw/ehca/ehca_main.c | 14 +- drivers/infiniband/hw/ehca/ehca_pd.c | 25 +++- drivers/infiniband/hw/ehca/ehca_qp.c | 161 +++++++++++++--------- drivers/infiniband/hw/ehca/ehca_uverbs.c | 2 +- drivers/infiniband/hw/ehca/hcp_if.c | 30 ++-- drivers/infiniband/hw/ehca/ipz_pt_fn.c | 222 +++++++++++++++++++++++------- drivers/infiniband/hw/ehca/ipz_pt_fn.h | 26 +++- 10 files changed, 378 insertions(+), 159 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h index 63b8b9f7c4fc..3725aa8664d9 100644 --- a/drivers/infiniband/hw/ehca/ehca_classes.h +++ b/drivers/infiniband/hw/ehca/ehca_classes.h @@ -43,7 +43,6 @@ #ifndef __EHCA_CLASSES_H__ #define __EHCA_CLASSES_H__ - struct ehca_module; struct ehca_qp; struct ehca_cq; @@ -129,6 +128,10 @@ struct ehca_pd { struct ib_pd ib_pd; struct ipz_pd fw_pd; u32 ownpid; + /* small queue mgmt */ + struct mutex lock; + struct list_head free[2]; + struct list_head full[2]; }; enum ehca_ext_qp_type { @@ -307,6 +310,8 @@ int ehca_init_av_cache(void); void ehca_cleanup_av_cache(void); int ehca_init_mrmw_cache(void); void ehca_cleanup_mrmw_cache(void); +int ehca_init_small_qp_cache(void); +void ehca_cleanup_small_qp_cache(void); extern rwlock_t ehca_qp_idr_lock; extern rwlock_t ehca_cq_idr_lock; @@ -324,7 +329,7 @@ struct ipzu_queue_resp { u32 queue_length; /* queue length allocated in bytes */ u32 pagesize; u32 toggle_state; - u32 dummy; /* padding for 8 byte alignment */ + u32 offset; /* save offset within a page for small_qp */ }; struct ehca_create_cq_resp { @@ -366,15 +371,29 @@ enum ehca_ll_comp_flags { LLQP_COMP_MASK = 0x60, }; +struct ehca_alloc_queue_parms { + /* input parameters */ + int max_wr; + int max_sge; + int page_size; + int is_small; + + /* output parameters */ + u16 act_nr_wqes; + u8 act_nr_sges; + u32 queue_size; /* bytes for small queues, pages otherwise */ +}; + struct ehca_alloc_qp_parms { -/* input parameters */ + struct ehca_alloc_queue_parms squeue; + struct ehca_alloc_queue_parms rqueue; + + /* input parameters */ enum ehca_service_type servicetype; + int qp_storage; int sigtype; enum ehca_ext_qp_type ext_type; enum ehca_ll_comp_flags ll_comp_flags; - - int max_send_wr, max_recv_wr; - int max_send_sge, max_recv_sge; int ud_av_l_key_ctl; u32 token; @@ -384,18 +403,10 @@ struct ehca_alloc_qp_parms { u32 srq_qpn, srq_token, srq_limit; -/* output parameters */ + /* output parameters */ u32 real_qp_num; struct ipz_qp_handle qp_handle; struct h_galpas galpas; - - u16 act_nr_send_wqes; - u16 act_nr_recv_wqes; - u8 act_nr_recv_sges; - u8 act_nr_send_sges; - - u32 nr_rq_pages; - u32 nr_sq_pages; }; int ehca_cq_assign_qp(struct ehca_cq *cq, struct ehca_qp *qp); diff --git a/drivers/infiniband/hw/ehca/ehca_cq.c b/drivers/infiniband/hw/ehca/ehca_cq.c index 1e8ca3fca4aa..81aff36101ba 100644 --- a/drivers/infiniband/hw/ehca/ehca_cq.c +++ b/drivers/infiniband/hw/ehca/ehca_cq.c @@ -190,8 +190,8 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector, goto create_cq_exit2; } - ipz_rc = ipz_queue_ctor(&my_cq->ipz_queue, param.act_pages, - EHCA_PAGESIZE, sizeof(struct ehca_cqe), 0); + ipz_rc = ipz_queue_ctor(NULL, &my_cq->ipz_queue, param.act_pages, + EHCA_PAGESIZE, sizeof(struct ehca_cqe), 0, 0); if (!ipz_rc) { ehca_err(device, "ipz_queue_ctor() failed ipz_rc=%x device=%p", ipz_rc, device); @@ -285,7 +285,7 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector, return cq; create_cq_exit4: - ipz_queue_dtor(&my_cq->ipz_queue); + ipz_queue_dtor(NULL, &my_cq->ipz_queue); create_cq_exit3: h_ret = hipz_h_destroy_cq(adapter_handle, my_cq, 1); @@ -359,7 +359,7 @@ int ehca_destroy_cq(struct ib_cq *cq) "ehca_cq=%p cq_num=%x", h_ret, my_cq, cq_num); return ehca2ib_return_code(h_ret); } - ipz_queue_dtor(&my_cq->ipz_queue); + ipz_queue_dtor(NULL, &my_cq->ipz_queue); kmem_cache_free(cq_cache, my_cq); return 0; diff --git a/drivers/infiniband/hw/ehca/ehca_eq.c b/drivers/infiniband/hw/ehca/ehca_eq.c index 4825975f88cf..1d41faa7a337 100644 --- a/drivers/infiniband/hw/ehca/ehca_eq.c +++ b/drivers/infiniband/hw/ehca/ehca_eq.c @@ -86,8 +86,8 @@ int ehca_create_eq(struct ehca_shca *shca, return -EINVAL; } - ret = ipz_queue_ctor(&eq->ipz_queue, nr_pages, - EHCA_PAGESIZE, sizeof(struct ehca_eqe), 0); + ret = ipz_queue_ctor(NULL, &eq->ipz_queue, nr_pages, + EHCA_PAGESIZE, sizeof(struct ehca_eqe), 0, 0); if (!ret) { ehca_err(ib_dev, "Can't allocate EQ pages eq=%p", eq); goto create_eq_exit1; @@ -145,7 +145,7 @@ int ehca_create_eq(struct ehca_shca *shca, return 0; create_eq_exit2: - ipz_queue_dtor(&eq->ipz_queue); + ipz_queue_dtor(NULL, &eq->ipz_queue); create_eq_exit1: hipz_h_destroy_eq(shca->ipz_hca_handle, eq); @@ -181,7 +181,7 @@ int ehca_destroy_eq(struct ehca_shca *shca, struct ehca_eq *eq) ehca_err(&shca->ib_device, "Can't free EQ resources."); return -EINVAL; } - ipz_queue_dtor(&eq->ipz_queue); + ipz_queue_dtor(NULL, &eq->ipz_queue); return 0; } diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c index bb104b7f73e3..99036b65bb84 100644 --- a/drivers/infiniband/hw/ehca/ehca_main.c +++ b/drivers/infiniband/hw/ehca/ehca_main.c @@ -181,6 +181,12 @@ static int ehca_create_slab_caches(void) goto create_slab_caches5; } + ret = ehca_init_small_qp_cache(); + if (ret) { + ehca_gen_err("Cannot create small queue SLAB cache."); + goto create_slab_caches6; + } + #ifdef CONFIG_PPC_64K_PAGES ctblk_cache = kmem_cache_create("ehca_cache_ctblk", EHCA_PAGESIZE, H_CB_ALIGNMENT, @@ -188,12 +194,15 @@ static int ehca_create_slab_caches(void) NULL); if (!ctblk_cache) { ehca_gen_err("Cannot create ctblk SLAB cache."); - ehca_cleanup_mrmw_cache(); - goto create_slab_caches5; + ehca_cleanup_small_qp_cache(); + goto create_slab_caches6; } #endif return 0; +create_slab_caches6: + ehca_cleanup_mrmw_cache(); + create_slab_caches5: ehca_cleanup_av_cache(); @@ -211,6 +220,7 @@ create_slab_caches2: static void ehca_destroy_slab_caches(void) { + ehca_cleanup_small_qp_cache(); ehca_cleanup_mrmw_cache(); ehca_cleanup_av_cache(); ehca_cleanup_qp_cache(); diff --git a/drivers/infiniband/hw/ehca/ehca_pd.c b/drivers/infiniband/hw/ehca/ehca_pd.c index c85312ad292b..3dafd7ff36cd 100644 --- a/drivers/infiniband/hw/ehca/ehca_pd.c +++ b/drivers/infiniband/hw/ehca/ehca_pd.c @@ -49,6 +49,7 @@ struct ib_pd *ehca_alloc_pd(struct ib_device *device, struct ib_ucontext *context, struct ib_udata *udata) { struct ehca_pd *pd; + int i; pd = kmem_cache_zalloc(pd_cache, GFP_KERNEL); if (!pd) { @@ -58,6 +59,11 @@ struct ib_pd *ehca_alloc_pd(struct ib_device *device, } pd->ownpid = current->tgid; + for (i = 0; i < 2; i++) { + INIT_LIST_HEAD(&pd->free[i]); + INIT_LIST_HEAD(&pd->full[i]); + } + mutex_init(&pd->lock); /* * Kernel PD: when device = -1, 0 @@ -81,6 +87,9 @@ int ehca_dealloc_pd(struct ib_pd *pd) { u32 cur_pid = current->tgid; struct ehca_pd *my_pd = container_of(pd, struct ehca_pd, ib_pd); + int i, leftovers = 0; + extern struct kmem_cache *small_qp_cache; + struct ipz_small_queue_page *page, *tmp; if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context && my_pd->ownpid != cur_pid) { @@ -89,8 +98,20 @@ int ehca_dealloc_pd(struct ib_pd *pd) return -EINVAL; } - kmem_cache_free(pd_cache, - container_of(pd, struct ehca_pd, ib_pd)); + for (i = 0; i < 2; i++) { + list_splice(&my_pd->full[i], &my_pd->free[i]); + list_for_each_entry_safe(page, tmp, &my_pd->free[i], list) { + leftovers = 1; + free_page(page->page); + kmem_cache_free(small_qp_cache, page); + } + } + + if (leftovers) + ehca_warn(pd->device, + "Some small queue pages were not freed"); + + kmem_cache_free(pd_cache, my_pd); return 0; } diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c b/drivers/infiniband/hw/ehca/ehca_qp.c index 483f0ca1acc4..b178cba96345 100644 --- a/drivers/infiniband/hw/ehca/ehca_qp.c +++ b/drivers/infiniband/hw/ehca/ehca_qp.c @@ -275,34 +275,39 @@ static inline void queue2resp(struct ipzu_queue_resp *resp, resp->toggle_state = queue->toggle_state; } -static inline int ll_qp_msg_size(int nr_sge) -{ - return 128 << nr_sge; -} - /* * init_qp_queue initializes/constructs r/squeue and registers queue pages. */ static inline int init_qp_queue(struct ehca_shca *shca, + struct ehca_pd *pd, struct ehca_qp *my_qp, struct ipz_queue *queue, int q_type, u64 expected_hret, - int nr_q_pages, - int wqe_size, - int nr_sges) + struct ehca_alloc_queue_parms *parms, + int wqe_size) { - int ret, cnt, ipz_rc; + int ret, cnt, ipz_rc, nr_q_pages; void *vpage; u64 rpage, h_ret; struct ib_device *ib_dev = &shca->ib_device; struct ipz_adapter_handle ipz_hca_handle = shca->ipz_hca_handle; - if (!nr_q_pages) + if (!parms->queue_size) return 0; - ipz_rc = ipz_queue_ctor(queue, nr_q_pages, EHCA_PAGESIZE, - wqe_size, nr_sges); + if (parms->is_small) { + nr_q_pages = 1; + ipz_rc = ipz_queue_ctor(pd, queue, nr_q_pages, + 128 << parms->page_size, + wqe_size, parms->act_nr_sges, 1); + } else { + nr_q_pages = parms->queue_size; + ipz_rc = ipz_queue_ctor(pd, queue, nr_q_pages, + EHCA_PAGESIZE, wqe_size, + parms->act_nr_sges, 0); + } + if (!ipz_rc) { ehca_err(ib_dev, "Cannot allocate page for queue. ipz_rc=%x", ipz_rc); @@ -323,7 +328,7 @@ static inline int init_qp_queue(struct ehca_shca *shca, h_ret = hipz_h_register_rpage_qp(ipz_hca_handle, my_qp->ipz_qp_handle, NULL, 0, q_type, - rpage, 1, + rpage, parms->is_small ? 0 : 1, my_qp->galpas.kernel); if (cnt == (nr_q_pages - 1)) { /* last page! */ if (h_ret != expected_hret) { @@ -354,10 +359,45 @@ static inline int init_qp_queue(struct ehca_shca *shca, return 0; init_qp_queue1: - ipz_queue_dtor(queue); + ipz_queue_dtor(pd, queue); return ret; } +static inline int ehca_calc_wqe_size(int act_nr_sge, int is_llqp) +{ + if (is_llqp) + return 128 << act_nr_sge; + else + return offsetof(struct ehca_wqe, + u.nud.sg_list[act_nr_sge]); +} + +static void ehca_determine_small_queue(struct ehca_alloc_queue_parms *queue, + int req_nr_sge, int is_llqp) +{ + u32 wqe_size, q_size; + int act_nr_sge = req_nr_sge; + + if (!is_llqp) + /* round up #SGEs so WQE size is a power of 2 */ + for (act_nr_sge = 4; act_nr_sge <= 252; + act_nr_sge = 4 + 2 * act_nr_sge) + if (act_nr_sge >= req_nr_sge) + break; + + wqe_size = ehca_calc_wqe_size(act_nr_sge, is_llqp); + q_size = wqe_size * (queue->max_wr + 1); + + if (q_size <= 512) + queue->page_size = 2; + else if (q_size <= 1024) + queue->page_size = 3; + else + queue->page_size = 0; + + queue->is_small = (queue->page_size != 0); +} + /* * Create an ib_qp struct that is either a QP or an SRQ, depending on * the value of the is_srq parameter. If init_attr and srq_init_attr share @@ -553,10 +593,20 @@ static struct ehca_qp *internal_create_qp( if (my_qp->recv_cq) parms.recv_cq_handle = my_qp->recv_cq->ipz_cq_handle; - parms.max_send_wr = init_attr->cap.max_send_wr; - parms.max_recv_wr = init_attr->cap.max_recv_wr; - parms.max_send_sge = max_send_sge; - parms.max_recv_sge = max_recv_sge; + parms.squeue.max_wr = init_attr->cap.max_send_wr; + parms.rqueue.max_wr = init_attr->cap.max_recv_wr; + parms.squeue.max_sge = max_send_sge; + parms.rqueue.max_sge = max_recv_sge; + + if (EHCA_BMASK_GET(HCA_CAP_MINI_QP, shca->hca_cap) + && !(context && udata)) { /* no small QP support in userspace ATM */ + ehca_determine_small_queue( + &parms.squeue, max_send_sge, is_llqp); + ehca_determine_small_queue( + &parms.rqueue, max_recv_sge, is_llqp); + parms.qp_storage = + (parms.squeue.is_small || parms.rqueue.is_small); + } h_ret = hipz_h_alloc_resource_qp(shca->ipz_hca_handle, &parms); if (h_ret != H_SUCCESS) { @@ -570,50 +620,33 @@ static struct ehca_qp *internal_create_qp( my_qp->ipz_qp_handle = parms.qp_handle; my_qp->galpas = parms.galpas; + swqe_size = ehca_calc_wqe_size(parms.squeue.act_nr_sges, is_llqp); + rwqe_size = ehca_calc_wqe_size(parms.rqueue.act_nr_sges, is_llqp); + switch (qp_type) { case IB_QPT_RC: - if (!is_llqp) { - swqe_size = offsetof(struct ehca_wqe, u.nud.sg_list[ - (parms.act_nr_send_sges)]); - rwqe_size = offsetof(struct ehca_wqe, u.nud.sg_list[ - (parms.act_nr_recv_sges)]); - } else { /* for LLQP we need to use msg size, not wqe size */ - swqe_size = ll_qp_msg_size(max_send_sge); - rwqe_size = ll_qp_msg_size(max_recv_sge); - parms.act_nr_send_sges = 1; - parms.act_nr_recv_sges = 1; + if (is_llqp) { + parms.squeue.act_nr_sges = 1; + parms.rqueue.act_nr_sges = 1; } break; - case IB_QPT_UC: - swqe_size = offsetof(struct ehca_wqe, - u.nud.sg_list[parms.act_nr_send_sges]); - rwqe_size = offsetof(struct ehca_wqe, - u.nud.sg_list[parms.act_nr_recv_sges]); - break; - case IB_QPT_UD: case IB_QPT_GSI: case IB_QPT_SMI: + /* UD circumvention */ if (is_llqp) { - swqe_size = ll_qp_msg_size(parms.act_nr_send_sges); - rwqe_size = ll_qp_msg_size(parms.act_nr_recv_sges); - parms.act_nr_send_sges = 1; - parms.act_nr_recv_sges = 1; + parms.squeue.act_nr_sges = 1; + parms.rqueue.act_nr_sges = 1; } else { - /* UD circumvention */ - parms.act_nr_send_sges -= 2; - parms.act_nr_recv_sges -= 2; - swqe_size = offsetof(struct ehca_wqe, u.ud_av.sg_list[ - parms.act_nr_send_sges]); - rwqe_size = offsetof(struct ehca_wqe, u.ud_av.sg_list[ - parms.act_nr_recv_sges]); + parms.squeue.act_nr_sges -= 2; + parms.rqueue.act_nr_sges -= 2; } if (IB_QPT_GSI == qp_type || IB_QPT_SMI == qp_type) { - parms.act_nr_send_wqes = init_attr->cap.max_send_wr; - parms.act_nr_recv_wqes = init_attr->cap.max_recv_wr; - parms.act_nr_send_sges = init_attr->cap.max_send_sge; - parms.act_nr_recv_sges = init_attr->cap.max_recv_sge; + parms.squeue.act_nr_wqes = init_attr->cap.max_send_wr; + parms.rqueue.act_nr_wqes = init_attr->cap.max_recv_wr; + parms.squeue.act_nr_sges = init_attr->cap.max_send_sge; + parms.rqueue.act_nr_sges = init_attr->cap.max_recv_sge; ib_qp_num = (qp_type == IB_QPT_SMI) ? 0 : 1; } @@ -626,10 +659,9 @@ static struct ehca_qp *internal_create_qp( /* initialize r/squeue and register queue pages */ if (HAS_SQ(my_qp)) { ret = init_qp_queue( - shca, my_qp, &my_qp->ipz_squeue, 0, + shca, my_pd, my_qp, &my_qp->ipz_squeue, 0, HAS_RQ(my_qp) ? H_PAGE_REGISTERED : H_SUCCESS, - parms.nr_sq_pages, swqe_size, - parms.act_nr_send_sges); + &parms.squeue, swqe_size); if (ret) { ehca_err(pd->device, "Couldn't initialize squeue " "and pages ret=%x", ret); @@ -639,9 +671,8 @@ static struct ehca_qp *internal_create_qp( if (HAS_RQ(my_qp)) { ret = init_qp_queue( - shca, my_qp, &my_qp->ipz_rqueue, 1, - H_SUCCESS, parms.nr_rq_pages, rwqe_size, - parms.act_nr_recv_sges); + shca, my_pd, my_qp, &my_qp->ipz_rqueue, 1, + H_SUCCESS, &parms.rqueue, rwqe_size); if (ret) { ehca_err(pd->device, "Couldn't initialize rqueue " "and pages ret=%x", ret); @@ -671,10 +702,10 @@ static struct ehca_qp *internal_create_qp( } init_attr->cap.max_inline_data = 0; /* not supported yet */ - init_attr->cap.max_recv_sge = parms.act_nr_recv_sges; - init_attr->cap.max_recv_wr = parms.act_nr_recv_wqes; - init_attr->cap.max_send_sge = parms.act_nr_send_sges; - init_attr->cap.max_send_wr = parms.act_nr_send_wqes; + init_attr->cap.max_recv_sge = parms.rqueue.act_nr_sges; + init_attr->cap.max_recv_wr = parms.rqueue.act_nr_wqes; + init_attr->cap.max_send_sge = parms.squeue.act_nr_sges; + init_attr->cap.max_send_wr = parms.squeue.act_nr_wqes; my_qp->init_attr = *init_attr; /* NOTE: define_apq0() not supported yet */ @@ -708,6 +739,8 @@ static struct ehca_qp *internal_create_qp( resp.ext_type = my_qp->ext_type; resp.qkey = my_qp->qkey; resp.real_qp_num = my_qp->real_qp_num; + resp.ipz_rqueue.offset = my_qp->ipz_rqueue.offset; + resp.ipz_squeue.offset = my_qp->ipz_squeue.offset; if (HAS_SQ(my_qp)) queue2resp(&resp.ipz_squeue, &my_qp->ipz_squeue); if (HAS_RQ(my_qp)) @@ -724,11 +757,11 @@ static struct ehca_qp *internal_create_qp( create_qp_exit4: if (HAS_RQ(my_qp)) - ipz_queue_dtor(&my_qp->ipz_rqueue); + ipz_queue_dtor(my_pd, &my_qp->ipz_rqueue); create_qp_exit3: if (HAS_SQ(my_qp)) - ipz_queue_dtor(&my_qp->ipz_squeue); + ipz_queue_dtor(my_pd, &my_qp->ipz_squeue); create_qp_exit2: hipz_h_destroy_qp(shca->ipz_hca_handle, my_qp); @@ -1735,9 +1768,9 @@ static int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp, } if (HAS_RQ(my_qp)) - ipz_queue_dtor(&my_qp->ipz_rqueue); + ipz_queue_dtor(my_pd, &my_qp->ipz_rqueue); if (HAS_SQ(my_qp)) - ipz_queue_dtor(&my_qp->ipz_squeue); + ipz_queue_dtor(my_pd, &my_qp->ipz_squeue); kmem_cache_free(qp_cache, my_qp); return 0; } diff --git a/drivers/infiniband/hw/ehca/ehca_uverbs.c b/drivers/infiniband/hw/ehca/ehca_uverbs.c index 05c415744e3b..4bc687fdf531 100644 --- a/drivers/infiniband/hw/ehca/ehca_uverbs.c +++ b/drivers/infiniband/hw/ehca/ehca_uverbs.c @@ -149,7 +149,7 @@ static int ehca_mmap_queue(struct vm_area_struct *vma, struct ipz_queue *queue, ehca_gen_err("vm_insert_page() failed rc=%x", ret); return ret; } - start += PAGE_SIZE; + start += PAGE_SIZE; } vma->vm_private_data = mm_count; (*mm_count)++; diff --git a/drivers/infiniband/hw/ehca/hcp_if.c b/drivers/infiniband/hw/ehca/hcp_if.c index 358796ccf008..fdbfebea7d11 100644 --- a/drivers/infiniband/hw/ehca/hcp_if.c +++ b/drivers/infiniband/hw/ehca/hcp_if.c @@ -52,10 +52,13 @@ #define H_ALL_RES_QP_ENHANCED_OPS EHCA_BMASK_IBM(9, 11) #define H_ALL_RES_QP_PTE_PIN EHCA_BMASK_IBM(12, 12) #define H_ALL_RES_QP_SERVICE_TYPE EHCA_BMASK_IBM(13, 15) +#define H_ALL_RES_QP_STORAGE EHCA_BMASK_IBM(16, 17) #define H_ALL_RES_QP_LL_RQ_CQE_POSTING EHCA_BMASK_IBM(18, 18) #define H_ALL_RES_QP_LL_SQ_CQE_POSTING EHCA_BMASK_IBM(19, 21) #define H_ALL_RES_QP_SIGNALING_TYPE EHCA_BMASK_IBM(22, 23) #define H_ALL_RES_QP_UD_AV_LKEY_CTRL EHCA_BMASK_IBM(31, 31) +#define H_ALL_RES_QP_SMALL_SQ_PAGE_SIZE EHCA_BMASK_IBM(32, 35) +#define H_ALL_RES_QP_SMALL_RQ_PAGE_SIZE EHCA_BMASK_IBM(36, 39) #define H_ALL_RES_QP_RESOURCE_TYPE EHCA_BMASK_IBM(56, 63) #define H_ALL_RES_QP_MAX_OUTST_SEND_WR EHCA_BMASK_IBM(0, 15) @@ -299,6 +302,11 @@ u64 hipz_h_alloc_resource_qp(const struct ipz_adapter_handle adapter_handle, | EHCA_BMASK_SET(H_ALL_RES_QP_PTE_PIN, 0) | EHCA_BMASK_SET(H_ALL_RES_QP_SERVICE_TYPE, parms->servicetype) | EHCA_BMASK_SET(H_ALL_RES_QP_SIGNALING_TYPE, parms->sigtype) + | EHCA_BMASK_SET(H_ALL_RES_QP_STORAGE, parms->qp_storage) + | EHCA_BMASK_SET(H_ALL_RES_QP_SMALL_SQ_PAGE_SIZE, + parms->squeue.page_size) + | EHCA_BMASK_SET(H_ALL_RES_QP_SMALL_RQ_PAGE_SIZE, + parms->rqueue.page_size) | EHCA_BMASK_SET(H_ALL_RES_QP_LL_RQ_CQE_POSTING, !!(parms->ll_comp_flags & LLQP_RECV_COMP)) | EHCA_BMASK_SET(H_ALL_RES_QP_LL_SQ_CQE_POSTING, @@ -309,13 +317,13 @@ u64 hipz_h_alloc_resource_qp(const struct ipz_adapter_handle adapter_handle, max_r10_reg = EHCA_BMASK_SET(H_ALL_RES_QP_MAX_OUTST_SEND_WR, - parms->max_send_wr + 1) + parms->squeue.max_wr + 1) | EHCA_BMASK_SET(H_ALL_RES_QP_MAX_OUTST_RECV_WR, - parms->max_recv_wr + 1) + parms->rqueue.max_wr + 1) | EHCA_BMASK_SET(H_ALL_RES_QP_MAX_SEND_SGE, - parms->max_send_sge) + parms->squeue.max_sge) | EHCA_BMASK_SET(H_ALL_RES_QP_MAX_RECV_SGE, - parms->max_recv_sge); + parms->rqueue.max_sge); r11 = EHCA_BMASK_SET(H_ALL_RES_QP_SRQ_QP_TOKEN, parms->srq_token); @@ -335,17 +343,17 @@ u64 hipz_h_alloc_resource_qp(const struct ipz_adapter_handle adapter_handle, parms->qp_handle.handle = outs[0]; parms->real_qp_num = (u32)outs[1]; - parms->act_nr_send_wqes = + parms->squeue.act_nr_wqes = (u16)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_OUTST_SEND_WR, outs[2]); - parms->act_nr_recv_wqes = + parms->rqueue.act_nr_wqes = (u16)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_OUTST_RECV_WR, outs[2]); - parms->act_nr_send_sges = + parms->squeue.act_nr_sges = (u8)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_SEND_SGE, outs[3]); - parms->act_nr_recv_sges = + parms->rqueue.act_nr_sges = (u8)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_RECV_SGE, outs[3]); - parms->nr_sq_pages = + parms->squeue.queue_size = (u32)EHCA_BMASK_GET(H_ALL_RES_QP_SQUEUE_SIZE_PAGES, outs[4]); - parms->nr_rq_pages = + parms->rqueue.queue_size = (u32)EHCA_BMASK_GET(H_ALL_RES_QP_RQUEUE_SIZE_PAGES, outs[4]); if (ret == H_SUCCESS) @@ -497,7 +505,7 @@ u64 hipz_h_register_rpage_qp(const struct ipz_adapter_handle adapter_handle, const u64 count, const struct h_galpa galpa) { - if (count != 1) { + if (count > 1) { ehca_gen_err("Page counter=%lx", count); return H_PARAMETER; } diff --git a/drivers/infiniband/hw/ehca/ipz_pt_fn.c b/drivers/infiniband/hw/ehca/ipz_pt_fn.c index 9606f13ed092..a090c679c397 100644 --- a/drivers/infiniband/hw/ehca/ipz_pt_fn.c +++ b/drivers/infiniband/hw/ehca/ipz_pt_fn.c @@ -40,6 +40,11 @@ #include "ehca_tools.h" #include "ipz_pt_fn.h" +#include "ehca_classes.h" + +#define PAGES_PER_KPAGE (PAGE_SIZE >> EHCA_PAGESHIFT) + +struct kmem_cache *small_qp_cache; void *ipz_qpageit_get_inc(struct ipz_queue *queue) { @@ -49,7 +54,7 @@ void *ipz_qpageit_get_inc(struct ipz_queue *queue) queue->current_q_offset -= queue->pagesize; ret = NULL; } - if (((u64)ret) % EHCA_PAGESIZE) { + if (((u64)ret) % queue->pagesize) { ehca_gen_err("ERROR!! not at PAGE-Boundary"); return NULL; } @@ -83,80 +88,195 @@ int ipz_queue_abs_to_offset(struct ipz_queue *queue, u64 addr, u64 *q_offset) return -EINVAL; } -int ipz_queue_ctor(struct ipz_queue *queue, - const u32 nr_of_pages, - const u32 pagesize, const u32 qe_size, const u32 nr_of_sg) +#if PAGE_SHIFT < EHCA_PAGESHIFT +#error Kernel pages must be at least as large than eHCA pages (4K) ! +#endif + +/* + * allocate pages for queue: + * outer loop allocates whole kernel pages (page aligned) and + * inner loop divides a kernel page into smaller hca queue pages + */ +static int alloc_queue_pages(struct ipz_queue *queue, const u32 nr_of_pages) { - int pages_per_kpage = PAGE_SIZE >> EHCA_PAGESHIFT; - int f; + int k, f = 0; + u8 *kpage; - if (pagesize > PAGE_SIZE) { - ehca_gen_err("FATAL ERROR: pagesize=%x is greater " - "than kernel page size", pagesize); - return 0; - } - if (!pages_per_kpage) { - ehca_gen_err("FATAL ERROR: invalid kernel page size. " - "pages_per_kpage=%x", pages_per_kpage); - return 0; - } - queue->queue_length = nr_of_pages * pagesize; - queue->queue_pages = vmalloc(nr_of_pages * sizeof(void *)); - if (!queue->queue_pages) { - ehca_gen_err("ERROR!! didn't get the memory"); - return 0; - } - memset(queue->queue_pages, 0, nr_of_pages * sizeof(void *)); - /* - * allocate pages for queue: - * outer loop allocates whole kernel pages (page aligned) and - * inner loop divides a kernel page into smaller hca queue pages - */ - f = 0; while (f < nr_of_pages) { - u8 *kpage = (u8 *)get_zeroed_page(GFP_KERNEL); - int k; + kpage = (u8 *)get_zeroed_page(GFP_KERNEL); if (!kpage) - goto ipz_queue_ctor_exit0; /*NOMEM*/ - for (k = 0; k < pages_per_kpage && f < nr_of_pages; k++) { - (queue->queue_pages)[f] = (struct ipz_page *)kpage; + goto out; + + for (k = 0; k < PAGES_PER_KPAGE && f < nr_of_pages; k++) { + queue->queue_pages[f] = (struct ipz_page *)kpage; kpage += EHCA_PAGESIZE; f++; } } + return 1; - queue->current_q_offset = 0; +out: + for (f = 0; f < nr_of_pages && queue->queue_pages[f]; + f += PAGES_PER_KPAGE) + free_page((unsigned long)(queue->queue_pages)[f]); + return 0; +} + +static int alloc_small_queue_page(struct ipz_queue *queue, struct ehca_pd *pd) +{ + int order = ilog2(queue->pagesize) - 9; + struct ipz_small_queue_page *page; + unsigned long bit; + + mutex_lock(&pd->lock); + + if (!list_empty(&pd->free[order])) + page = list_entry(pd->free[order].next, + struct ipz_small_queue_page, list); + else { + page = kmem_cache_zalloc(small_qp_cache, GFP_KERNEL); + if (!page) + goto out; + + page->page = get_zeroed_page(GFP_KERNEL); + if (!page->page) { + kmem_cache_free(small_qp_cache, page); + goto out; + } + + list_add(&page->list, &pd->free[order]); + } + + bit = find_first_zero_bit(page->bitmap, IPZ_SPAGE_PER_KPAGE >> order); + __set_bit(bit, page->bitmap); + page->fill++; + + if (page->fill == IPZ_SPAGE_PER_KPAGE >> order) + list_move(&page->list, &pd->full[order]); + + mutex_unlock(&pd->lock); + + queue->queue_pages[0] = (void *)(page->page | (bit << (order + 9))); + queue->small_page = page; + return 1; + +out: + ehca_err(pd->ib_pd.device, "failed to allocate small queue page"); + return 0; +} + +static void free_small_queue_page(struct ipz_queue *queue, struct ehca_pd *pd) +{ + int order = ilog2(queue->pagesize) - 9; + struct ipz_small_queue_page *page = queue->small_page; + unsigned long bit; + int free_page = 0; + + bit = ((unsigned long)queue->queue_pages[0] & PAGE_MASK) + >> (order + 9); + + mutex_lock(&pd->lock); + + __clear_bit(bit, page->bitmap); + page->fill--; + + if (page->fill == 0) { + list_del(&page->list); + free_page = 1; + } + + if (page->fill == (IPZ_SPAGE_PER_KPAGE >> order) - 1) + /* the page was full until we freed the chunk */ + list_move_tail(&page->list, &pd->free[order]); + + mutex_unlock(&pd->lock); + + if (free_page) { + free_page(page->page); + kmem_cache_free(small_qp_cache, page); + } +} + +int ipz_queue_ctor(struct ehca_pd *pd, struct ipz_queue *queue, + const u32 nr_of_pages, const u32 pagesize, + const u32 qe_size, const u32 nr_of_sg, + int is_small) +{ + if (pagesize > PAGE_SIZE) { + ehca_gen_err("FATAL ERROR: pagesize=%x " + "is greater than kernel page size", pagesize); + return 0; + } + + /* init queue fields */ + queue->queue_length = nr_of_pages * pagesize; + queue->pagesize = pagesize; queue->qe_size = qe_size; queue->act_nr_of_sg = nr_of_sg; - queue->pagesize = pagesize; + queue->current_q_offset = 0; queue->toggle_state = 1; - return 1; + queue->small_page = NULL; - ipz_queue_ctor_exit0: - ehca_gen_err("Couldn't get alloc pages queue=%p f=%x nr_of_pages=%x", - queue, f, nr_of_pages); - for (f = 0; f < nr_of_pages; f += pages_per_kpage) { - if (!(queue->queue_pages)[f]) - break; - free_page((unsigned long)(queue->queue_pages)[f]); + /* allocate queue page pointers */ + queue->queue_pages = vmalloc(nr_of_pages * sizeof(void *)); + if (!queue->queue_pages) { + ehca_gen_err("Couldn't allocate queue page list"); + return 0; } + memset(queue->queue_pages, 0, nr_of_pages * sizeof(void *)); + + /* allocate actual queue pages */ + if (is_small) { + if (!alloc_small_queue_page(queue, pd)) + goto ipz_queue_ctor_exit0; + } else + if (!alloc_queue_pages(queue, nr_of_pages)) + goto ipz_queue_ctor_exit0; + + return 1; + +ipz_queue_ctor_exit0: + ehca_gen_err("Couldn't alloc pages queue=%p " + "nr_of_pages=%x", queue, nr_of_pages); + vfree(queue->queue_pages); + return 0; } -int ipz_queue_dtor(struct ipz_queue *queue) +int ipz_queue_dtor(struct ehca_pd *pd, struct ipz_queue *queue) { - int pages_per_kpage = PAGE_SIZE >> EHCA_PAGESHIFT; - int g; - int nr_pages; + int i, nr_pages; if (!queue || !queue->queue_pages) { ehca_gen_dbg("queue or queue_pages is NULL"); return 0; } - nr_pages = queue->queue_length / queue->pagesize; - for (g = 0; g < nr_pages; g += pages_per_kpage) - free_page((unsigned long)(queue->queue_pages)[g]); + + if (queue->small_page) + free_small_queue_page(queue, pd); + else { + nr_pages = queue->queue_length / queue->pagesize; + for (i = 0; i < nr_pages; i += PAGES_PER_KPAGE) + free_page((unsigned long)queue->queue_pages[i]); + } + vfree(queue->queue_pages); return 1; } + +int ehca_init_small_qp_cache(void) +{ + small_qp_cache = kmem_cache_create("ehca_cache_small_qp", + sizeof(struct ipz_small_queue_page), + 0, SLAB_HWCACHE_ALIGN, NULL); + if (!small_qp_cache) + return -ENOMEM; + + return 0; +} + +void ehca_cleanup_small_qp_cache(void) +{ + kmem_cache_destroy(small_qp_cache); +} diff --git a/drivers/infiniband/hw/ehca/ipz_pt_fn.h b/drivers/infiniband/hw/ehca/ipz_pt_fn.h index 39a4f64aff41..c6937a044e8a 100644 --- a/drivers/infiniband/hw/ehca/ipz_pt_fn.h +++ b/drivers/infiniband/hw/ehca/ipz_pt_fn.h @@ -51,11 +51,25 @@ #include "ehca_tools.h" #include "ehca_qes.h" +struct ehca_pd; +struct ipz_small_queue_page; + /* struct generic ehca page */ struct ipz_page { u8 entries[EHCA_PAGESIZE]; }; +#define IPZ_SPAGE_PER_KPAGE (PAGE_SIZE / 512) + +struct ipz_small_queue_page { + unsigned long page; + unsigned long bitmap[IPZ_SPAGE_PER_KPAGE / BITS_PER_LONG]; + int fill; + void *mapped_addr; + u32 mmap_count; + struct list_head list; +}; + /* struct generic queue in linux kernel virtual memory (kv) */ struct ipz_queue { u64 current_q_offset; /* current queue entry */ @@ -66,7 +80,8 @@ struct ipz_queue { u32 queue_length; /* queue length allocated in bytes */ u32 pagesize; u32 toggle_state; /* toggle flag - per page */ - u32 dummy3; /* 64 bit alignment */ + u32 offset; /* save offset within page for small_qp */ + struct ipz_small_queue_page *small_page; }; /* @@ -188,9 +203,10 @@ struct ipz_qpt { * see ipz_qpt_ctor() * returns true if ok, false if out of memory */ -int ipz_queue_ctor(struct ipz_queue *queue, const u32 nr_of_pages, - const u32 pagesize, const u32 qe_size, - const u32 nr_of_sg); +int ipz_queue_ctor(struct ehca_pd *pd, struct ipz_queue *queue, + const u32 nr_of_pages, const u32 pagesize, + const u32 qe_size, const u32 nr_of_sg, + int is_small); /* * destructor for a ipz_queue_t @@ -198,7 +214,7 @@ int ipz_queue_ctor(struct ipz_queue *queue, const u32 nr_of_pages, * see ipz_queue_ctor() * returns true if ok, false if queue was NULL-ptr of free failed */ -int ipz_queue_dtor(struct ipz_queue *queue); +int ipz_queue_dtor(struct ehca_pd *pd, struct ipz_queue *queue); /* * constructor for a ipz_qpt_t, -- cgit v1.2.3