diff options
Diffstat (limited to 'drivers/net/ethernet/mellanox/mlx4')
26 files changed, 1553 insertions, 434 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx4/alloc.c b/drivers/net/ethernet/mellanox/mlx4/alloc.c index 963dd7e6d547..0c51c69f802f 100644 --- a/drivers/net/ethernet/mellanox/mlx4/alloc.c +++ b/drivers/net/ethernet/mellanox/mlx4/alloc.c @@ -592,7 +592,7 @@ int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct, buf->nbufs = 1; buf->npages = 1; buf->page_shift = get_order(size) + PAGE_SHIFT; - buf->direct.buf = dma_alloc_coherent(&dev->pdev->dev, + buf->direct.buf = dma_alloc_coherent(&dev->persist->pdev->dev, size, &t, gfp); if (!buf->direct.buf) return -ENOMEM; @@ -619,7 +619,8 @@ int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct, for (i = 0; i < buf->nbufs; ++i) { buf->page_list[i].buf = - dma_alloc_coherent(&dev->pdev->dev, PAGE_SIZE, + dma_alloc_coherent(&dev->persist->pdev->dev, + PAGE_SIZE, &t, gfp); if (!buf->page_list[i].buf) goto err_free; @@ -657,15 +658,17 @@ void mlx4_buf_free(struct mlx4_dev *dev, int size, struct mlx4_buf *buf) int i; if (buf->nbufs == 1) - dma_free_coherent(&dev->pdev->dev, size, buf->direct.buf, + dma_free_coherent(&dev->persist->pdev->dev, size, + buf->direct.buf, buf->direct.map); else { - if (BITS_PER_LONG == 64 && buf->direct.buf) + if (BITS_PER_LONG == 64) vunmap(buf->direct.buf); for (i = 0; i < buf->nbufs; ++i) if (buf->page_list[i].buf) - dma_free_coherent(&dev->pdev->dev, PAGE_SIZE, + dma_free_coherent(&dev->persist->pdev->dev, + PAGE_SIZE, buf->page_list[i].buf, buf->page_list[i].map); kfree(buf->page_list); @@ -738,7 +741,7 @@ int mlx4_db_alloc(struct mlx4_dev *dev, struct mlx4_db *db, int order, gfp_t gfp if (!mlx4_alloc_db_from_pgdir(pgdir, db, order)) goto out; - pgdir = mlx4_alloc_db_pgdir(&(dev->pdev->dev), gfp); + pgdir = mlx4_alloc_db_pgdir(&dev->persist->pdev->dev, gfp); if (!pgdir) { ret = -ENOMEM; goto out; @@ -775,7 +778,7 @@ void mlx4_db_free(struct mlx4_dev *dev, struct mlx4_db *db) set_bit(i, db->u.pgdir->bits[o]); if (bitmap_full(db->u.pgdir->order1, MLX4_DB_PER_PAGE / 2)) { - dma_free_coherent(&(dev->pdev->dev), PAGE_SIZE, + dma_free_coherent(&dev->persist->pdev->dev, PAGE_SIZE, db->u.pgdir->db_page, db->u.pgdir->db_dma); list_del(&db->u.pgdir->list); kfree(db->u.pgdir); diff --git a/drivers/net/ethernet/mellanox/mlx4/catas.c b/drivers/net/ethernet/mellanox/mlx4/catas.c index 9c656fe4983d..715de8affcc9 100644 --- a/drivers/net/ethernet/mellanox/mlx4/catas.c +++ b/drivers/net/ethernet/mellanox/mlx4/catas.c @@ -40,16 +40,177 @@ enum { MLX4_CATAS_POLL_INTERVAL = 5 * HZ, }; -static DEFINE_SPINLOCK(catas_lock); -static LIST_HEAD(catas_list); -static struct work_struct catas_work; -static int internal_err_reset = 1; -module_param(internal_err_reset, int, 0644); +int mlx4_internal_err_reset = 1; +module_param_named(internal_err_reset, mlx4_internal_err_reset, int, 0644); MODULE_PARM_DESC(internal_err_reset, - "Reset device on internal errors if non-zero" - " (default 1, in SRIOV mode default is 0)"); + "Reset device on internal errors if non-zero (default 1)"); + +static int read_vendor_id(struct mlx4_dev *dev) +{ + u16 vendor_id = 0; + int ret; + + ret = pci_read_config_word(dev->persist->pdev, 0, &vendor_id); + if (ret) { + mlx4_err(dev, "Failed to read vendor ID, ret=%d\n", ret); + return ret; + } + + if (vendor_id == 0xffff) { + mlx4_err(dev, "PCI can't be accessed to read vendor id\n"); + return -EINVAL; + } + + return 0; +} + +static int mlx4_reset_master(struct mlx4_dev *dev) +{ + int err = 0; + + if (mlx4_is_master(dev)) + mlx4_report_internal_err_comm_event(dev); + + if (!pci_channel_offline(dev->persist->pdev)) { + err = read_vendor_id(dev); + /* If PCI can't be accessed to read vendor ID we assume that its + * link was disabled and chip was already reset. + */ + if (err) + return 0; + + err = mlx4_reset(dev); + if (err) + mlx4_err(dev, "Fail to reset HCA\n"); + } + + return err; +} + +static int mlx4_reset_slave(struct mlx4_dev *dev) +{ +#define COM_CHAN_RST_REQ_OFFSET 0x10 +#define COM_CHAN_RST_ACK_OFFSET 0x08 + + u32 comm_flags; + u32 rst_req; + u32 rst_ack; + unsigned long end; + struct mlx4_priv *priv = mlx4_priv(dev); + + if (pci_channel_offline(dev->persist->pdev)) + return 0; + + comm_flags = swab32(readl((__iomem char *)priv->mfunc.comm + + MLX4_COMM_CHAN_FLAGS)); + if (comm_flags == 0xffffffff) { + mlx4_err(dev, "VF reset is not needed\n"); + return 0; + } + + if (!(dev->caps.vf_caps & MLX4_VF_CAP_FLAG_RESET)) { + mlx4_err(dev, "VF reset is not supported\n"); + return -EOPNOTSUPP; + } + + rst_req = (comm_flags & (u32)(1 << COM_CHAN_RST_REQ_OFFSET)) >> + COM_CHAN_RST_REQ_OFFSET; + rst_ack = (comm_flags & (u32)(1 << COM_CHAN_RST_ACK_OFFSET)) >> + COM_CHAN_RST_ACK_OFFSET; + if (rst_req != rst_ack) { + mlx4_err(dev, "Communication channel isn't sync, fail to send reset\n"); + return -EIO; + } + + rst_req ^= 1; + mlx4_warn(dev, "VF is sending reset request to Firmware\n"); + comm_flags = rst_req << COM_CHAN_RST_REQ_OFFSET; + __raw_writel((__force u32)cpu_to_be32(comm_flags), + (__iomem char *)priv->mfunc.comm + MLX4_COMM_CHAN_FLAGS); + /* Make sure that our comm channel write doesn't + * get mixed in with writes from another CPU. + */ + mmiowb(); + + end = msecs_to_jiffies(MLX4_COMM_TIME) + jiffies; + while (time_before(jiffies, end)) { + comm_flags = swab32(readl((__iomem char *)priv->mfunc.comm + + MLX4_COMM_CHAN_FLAGS)); + rst_ack = (comm_flags & (u32)(1 << COM_CHAN_RST_ACK_OFFSET)) >> + COM_CHAN_RST_ACK_OFFSET; + + /* Reading rst_req again since the communication channel can + * be reset at any time by the PF and all its bits will be + * set to zero. + */ + rst_req = (comm_flags & (u32)(1 << COM_CHAN_RST_REQ_OFFSET)) >> + COM_CHAN_RST_REQ_OFFSET; + + if (rst_ack == rst_req) { + mlx4_warn(dev, "VF Reset succeed\n"); + return 0; + } + cond_resched(); + } + mlx4_err(dev, "Fail to send reset over the communication channel\n"); + return -ETIMEDOUT; +} + +static int mlx4_comm_internal_err(u32 slave_read) +{ + return (u32)COMM_CHAN_EVENT_INTERNAL_ERR == + (slave_read & (u32)COMM_CHAN_EVENT_INTERNAL_ERR) ? 1 : 0; +} + +void mlx4_enter_error_state(struct mlx4_dev_persistent *persist) +{ + int err; + struct mlx4_dev *dev; + + if (!mlx4_internal_err_reset) + return; + + mutex_lock(&persist->device_state_mutex); + if (persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) + goto out; + + dev = persist->dev; + mlx4_err(dev, "device is going to be reset\n"); + if (mlx4_is_slave(dev)) + err = mlx4_reset_slave(dev); + else + err = mlx4_reset_master(dev); + BUG_ON(err != 0); + + dev->persist->state |= MLX4_DEVICE_STATE_INTERNAL_ERROR; + mlx4_err(dev, "device was reset successfully\n"); + mutex_unlock(&persist->device_state_mutex); + + /* At that step HW was already reset, now notify clients */ + mlx4_dispatch_event(dev, MLX4_DEV_EVENT_CATASTROPHIC_ERROR, 0); + mlx4_cmd_wake_completions(dev); + return; + +out: + mutex_unlock(&persist->device_state_mutex); +} + +static void mlx4_handle_error_state(struct mlx4_dev_persistent *persist) +{ + int err = 0; + + mlx4_enter_error_state(persist); + mutex_lock(&persist->interface_state_mutex); + if (persist->interface_state & MLX4_INTERFACE_STATE_UP && + !(persist->interface_state & MLX4_INTERFACE_STATE_DELETION)) { + err = mlx4_restart_one(persist->pdev); + mlx4_info(persist->dev, "mlx4_restart_one was ended, ret=%d\n", + err); + } + mutex_unlock(&persist->interface_state_mutex); +} static void dump_err_buf(struct mlx4_dev *dev) { @@ -67,58 +228,40 @@ static void poll_catas(unsigned long dev_ptr) { struct mlx4_dev *dev = (struct mlx4_dev *) dev_ptr; struct mlx4_priv *priv = mlx4_priv(dev); + u32 slave_read; - if (readl(priv->catas_err.map)) { - /* If the device is off-line, we cannot try to recover it */ - if (pci_channel_offline(dev->pdev)) - mod_timer(&priv->catas_err.timer, - round_jiffies(jiffies + MLX4_CATAS_POLL_INTERVAL)); - else { - dump_err_buf(dev); - mlx4_dispatch_event(dev, MLX4_DEV_EVENT_CATASTROPHIC_ERROR, 0); - - if (internal_err_reset) { - spin_lock(&catas_lock); - list_add(&priv->catas_err.list, &catas_list); - spin_unlock(&catas_lock); - - queue_work(mlx4_wq, &catas_work); - } + if (mlx4_is_slave(dev)) { + slave_read = swab32(readl(&priv->mfunc.comm->slave_read)); + if (mlx4_comm_internal_err(slave_read)) { + mlx4_warn(dev, "Internal error detected on the communication channel\n"); + goto internal_err; } - } else - mod_timer(&priv->catas_err.timer, - round_jiffies(jiffies + MLX4_CATAS_POLL_INTERVAL)); + } else if (readl(priv->catas_err.map)) { + dump_err_buf(dev); + goto internal_err; + } + + if (dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) { + mlx4_warn(dev, "Internal error mark was detected on device\n"); + goto internal_err; + } + + mod_timer(&priv->catas_err.timer, + round_jiffies(jiffies + MLX4_CATAS_POLL_INTERVAL)); + return; + +internal_err: + if (mlx4_internal_err_reset) + queue_work(dev->persist->catas_wq, &dev->persist->catas_work); } static void catas_reset(struct work_struct *work) { - struct mlx4_priv *priv, *tmppriv; - struct mlx4_dev *dev; + struct mlx4_dev_persistent *persist = + container_of(work, struct mlx4_dev_persistent, + catas_work); - LIST_HEAD(tlist); - int ret; - - spin_lock_irq(&catas_lock); - list_splice_init(&catas_list, &tlist); - spin_unlock_irq(&catas_lock); - - list_for_each_entry_safe(priv, tmppriv, &tlist, catas_err.list) { - struct pci_dev *pdev = priv->dev.pdev; - - /* If the device is off-line, we cannot reset it */ - if (pci_channel_offline(pdev)) - continue; - - ret = mlx4_restart_one(priv->dev.pdev); - /* 'priv' now is not valid */ - if (ret) - pr_err("mlx4 %s: Reset failed (%d)\n", - pci_name(pdev), ret); - else { - dev = pci_get_drvdata(pdev); - mlx4_dbg(dev, "Reset succeeded\n"); - } - } + mlx4_handle_error_state(persist); } void mlx4_start_catas_poll(struct mlx4_dev *dev) @@ -126,22 +269,21 @@ void mlx4_start_catas_poll(struct mlx4_dev *dev) struct mlx4_priv *priv = mlx4_priv(dev); phys_addr_t addr; - /*If we are in SRIOV the default of the module param must be 0*/ - if (mlx4_is_mfunc(dev)) - internal_err_reset = 0; - INIT_LIST_HEAD(&priv->catas_err.list); init_timer(&priv->catas_err.timer); priv->catas_err.map = NULL; - addr = pci_resource_start(dev->pdev, priv->fw.catas_bar) + - priv->fw.catas_offset; + if (!mlx4_is_slave(dev)) { + addr = pci_resource_start(dev->persist->pdev, + priv->fw.catas_bar) + + priv->fw.catas_offset; - priv->catas_err.map = ioremap(addr, priv->fw.catas_size * 4); - if (!priv->catas_err.map) { - mlx4_warn(dev, "Failed to map internal error buffer at 0x%llx\n", - (unsigned long long) addr); - return; + priv->catas_err.map = ioremap(addr, priv->fw.catas_size * 4); + if (!priv->catas_err.map) { + mlx4_warn(dev, "Failed to map internal error buffer at 0x%llx\n", + (unsigned long long)addr); + return; + } } priv->catas_err.timer.data = (unsigned long) dev; @@ -157,15 +299,29 @@ void mlx4_stop_catas_poll(struct mlx4_dev *dev) del_timer_sync(&priv->catas_err.timer); - if (priv->catas_err.map) + if (priv->catas_err.map) { iounmap(priv->catas_err.map); + priv->catas_err.map = NULL; + } - spin_lock_irq(&catas_lock); - list_del(&priv->catas_err.list); - spin_unlock_irq(&catas_lock); + if (dev->persist->interface_state & MLX4_INTERFACE_STATE_DELETION) + flush_workqueue(dev->persist->catas_wq); } -void __init mlx4_catas_init(void) +int mlx4_catas_init(struct mlx4_dev *dev) { - INIT_WORK(&catas_work, catas_reset); + INIT_WORK(&dev->persist->catas_work, catas_reset); + dev->persist->catas_wq = create_singlethread_workqueue("mlx4_health"); + if (!dev->persist->catas_wq) + return -ENOMEM; + + return 0; +} + +void mlx4_catas_end(struct mlx4_dev *dev) +{ + if (dev->persist->catas_wq) { + destroy_workqueue(dev->persist->catas_wq); + dev->persist->catas_wq = NULL; + } } diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index 5c93d1451c44..a681d7c0bb9f 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -42,6 +42,7 @@ #include <linux/mlx4/device.h> #include <linux/semaphore.h> #include <rdma/ib_smi.h> +#include <linux/delay.h> #include <asm/io.h> @@ -182,6 +183,72 @@ static u8 mlx4_errno_to_status(int errno) } } +static int mlx4_internal_err_ret_value(struct mlx4_dev *dev, u16 op, + u8 op_modifier) +{ + switch (op) { + case MLX4_CMD_UNMAP_ICM: + case MLX4_CMD_UNMAP_ICM_AUX: + case MLX4_CMD_UNMAP_FA: + case MLX4_CMD_2RST_QP: + case MLX4_CMD_HW2SW_EQ: + case MLX4_CMD_HW2SW_CQ: + case MLX4_CMD_HW2SW_SRQ: + case MLX4_CMD_HW2SW_MPT: + case MLX4_CMD_CLOSE_HCA: + case MLX4_QP_FLOW_STEERING_DETACH: + case MLX4_CMD_FREE_RES: + case MLX4_CMD_CLOSE_PORT: + return CMD_STAT_OK; + + case MLX4_CMD_QP_ATTACH: + /* On Detach case return success */ + if (op_modifier == 0) + return CMD_STAT_OK; + return mlx4_status_to_errno(CMD_STAT_INTERNAL_ERR); + + default: + return mlx4_status_to_errno(CMD_STAT_INTERNAL_ERR); + } +} + +static int mlx4_closing_cmd_fatal_error(u16 op, u8 fw_status) +{ + /* Any error during the closing commands below is considered fatal */ + if (op == MLX4_CMD_CLOSE_HCA || + op == MLX4_CMD_HW2SW_EQ || + op == MLX4_CMD_HW2SW_CQ || + op == MLX4_CMD_2RST_QP || + op == MLX4_CMD_HW2SW_SRQ || + op == MLX4_CMD_SYNC_TPT || + op == MLX4_CMD_UNMAP_ICM || + op == MLX4_CMD_UNMAP_ICM_AUX || + op == MLX4_CMD_UNMAP_FA) + return 1; + /* Error on MLX4_CMD_HW2SW_MPT is fatal except when fw status equals + * CMD_STAT_REG_BOUND. + * This status indicates that memory region has memory windows bound to it + * which may result from invalid user space usage and is not fatal. + */ + if (op == MLX4_CMD_HW2SW_MPT && fw_status != CMD_STAT_REG_BOUND) + return 1; + return 0; +} + +static int mlx4_cmd_reset_flow(struct mlx4_dev *dev, u16 op, u8 op_modifier, + int err) +{ + /* Only if reset flow is really active return code is based on + * command, otherwise current error code is returned. + */ + if (mlx4_internal_err_reset) { + mlx4_enter_error_state(dev->persist); + err = mlx4_internal_err_ret_value(dev, op, op_modifier); + } + + return err; +} + static int comm_pending(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); @@ -190,16 +257,30 @@ static int comm_pending(struct mlx4_dev *dev) return (swab32(status) >> 31) != priv->cmd.comm_toggle; } -static void mlx4_comm_cmd_post(struct mlx4_dev *dev, u8 cmd, u16 param) +static int mlx4_comm_cmd_post(struct mlx4_dev *dev, u8 cmd, u16 param) { struct mlx4_priv *priv = mlx4_priv(dev); u32 val; + /* To avoid writing to unknown addresses after the device state was + * changed to internal error and the function was rest, + * check the INTERNAL_ERROR flag which is updated under + * device_state_mutex lock. + */ + mutex_lock(&dev->persist->device_state_mutex); + + if (dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) { + mutex_unlock(&dev->persist->device_state_mutex); + return -EIO; + } + priv->cmd.comm_toggle ^= 1; val = param | (cmd << 16) | (priv->cmd.comm_toggle << 31); __raw_writel((__force u32) cpu_to_be32(val), &priv->mfunc.comm->slave_write); mmiowb(); + mutex_unlock(&dev->persist->device_state_mutex); + return 0; } static int mlx4_comm_cmd_poll(struct mlx4_dev *dev, u8 cmd, u16 param, @@ -219,7 +300,13 @@ static int mlx4_comm_cmd_poll(struct mlx4_dev *dev, u8 cmd, u16 param, /* Write command */ down(&priv->cmd.poll_sem); - mlx4_comm_cmd_post(dev, cmd, param); + if (mlx4_comm_cmd_post(dev, cmd, param)) { + /* Only in case the device state is INTERNAL_ERROR, + * mlx4_comm_cmd_post returns with an error + */ + err = mlx4_status_to_errno(CMD_STAT_INTERNAL_ERR); + goto out; + } end = msecs_to_jiffies(timeout) + jiffies; while (comm_pending(dev) && time_before(jiffies, end)) @@ -231,18 +318,23 @@ static int mlx4_comm_cmd_poll(struct mlx4_dev *dev, u8 cmd, u16 param, * is MLX4_DELAY_RESET_SLAVE*/ if ((MLX4_COMM_CMD_RESET == cmd)) { err = MLX4_DELAY_RESET_SLAVE; + goto out; } else { - mlx4_warn(dev, "Communication channel timed out\n"); - err = -ETIMEDOUT; + mlx4_warn(dev, "Communication channel command 0x%x timed out\n", + cmd); + err = mlx4_status_to_errno(CMD_STAT_INTERNAL_ERR); } } + if (err) + mlx4_enter_error_state(dev->persist); +out: up(&priv->cmd.poll_sem); return err; } -static int mlx4_comm_cmd_wait(struct mlx4_dev *dev, u8 op, - u16 param, unsigned long timeout) +static int mlx4_comm_cmd_wait(struct mlx4_dev *dev, u8 vhcr_cmd, + u16 param, u16 op, unsigned long timeout) { struct mlx4_cmd *cmd = &mlx4_priv(dev)->cmd; struct mlx4_cmd_context *context; @@ -258,34 +350,49 @@ static int mlx4_comm_cmd_wait(struct mlx4_dev *dev, u8 op, cmd->free_head = context->next; spin_unlock(&cmd->context_lock); - init_completion(&context->done); + reinit_completion(&context->done); - mlx4_comm_cmd_post(dev, op, param); + if (mlx4_comm_cmd_post(dev, vhcr_cmd, param)) { + /* Only in case the device state is INTERNAL_ERROR, + * mlx4_comm_cmd_post returns with an error + */ + err = mlx4_status_to_errno(CMD_STAT_INTERNAL_ERR); + goto out; + } if (!wait_for_completion_timeout(&context->done, msecs_to_jiffies(timeout))) { - mlx4_warn(dev, "communication channel command 0x%x timed out\n", - op); - err = -EBUSY; - goto out; + mlx4_warn(dev, "communication channel command 0x%x (op=0x%x) timed out\n", + vhcr_cmd, op); + goto out_reset; } err = context->result; if (err && context->fw_status != CMD_STAT_MULTI_FUNC_REQ) { mlx4_err(dev, "command 0x%x failed: fw status = 0x%x\n", - op, context->fw_status); - goto out; + vhcr_cmd, context->fw_status); + if (mlx4_closing_cmd_fatal_error(op, context->fw_status)) + goto out_reset; } -out: /* wait for comm channel ready * this is necessary for prevention the race * when switching between event to polling mode + * Skipping this section in case the device is in FATAL_ERROR state, + * In this state, no commands are sent via the comm channel until + * the device has returned from reset. */ - end = msecs_to_jiffies(timeout) + jiffies; - while (comm_pending(dev) && time_before(jiffies, end)) - cond_resched(); + if (!(dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR)) { + end = msecs_to_jiffies(timeout) + jiffies; + while (comm_pending(dev) && time_before(jiffies, end)) + cond_resched(); + } + goto out; +out_reset: + err = mlx4_status_to_errno(CMD_STAT_INTERNAL_ERR); + mlx4_enter_error_state(dev->persist); +out: spin_lock(&cmd->context_lock); context->next = cmd->free_head; cmd->free_head = context - cmd->context; @@ -296,10 +403,13 @@ out: } int mlx4_comm_cmd(struct mlx4_dev *dev, u8 cmd, u16 param, - unsigned long timeout) + u16 op, unsigned long timeout) { + if (dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) + return mlx4_status_to_errno(CMD_STAT_INTERNAL_ERR); + if (mlx4_priv(dev)->cmd.use_events) - return mlx4_comm_cmd_wait(dev, cmd, param, timeout); + return mlx4_comm_cmd_wait(dev, cmd, param, op, timeout); return mlx4_comm_cmd_poll(dev, cmd, param, timeout); } @@ -307,7 +417,7 @@ static int cmd_pending(struct mlx4_dev *dev) { u32 status; - if (pci_channel_offline(dev->pdev)) + if (pci_channel_offline(dev->persist->pdev)) return -EIO; status = readl(mlx4_priv(dev)->cmd.hcr + HCR_STATUS_OFFSET); @@ -323,17 +433,21 @@ static int mlx4_cmd_post(struct mlx4_dev *dev, u64 in_param, u64 out_param, { struct mlx4_cmd *cmd = &mlx4_priv(dev)->cmd; u32 __iomem *hcr = cmd->hcr; - int ret = -EAGAIN; + int ret = -EIO; unsigned long end; - mutex_lock(&cmd->hcr_mutex); - - if (pci_channel_offline(dev->pdev)) { + mutex_lock(&dev->persist->device_state_mutex); + /* To avoid writing to unknown addresses after the device state was + * changed to internal error and the chip was reset, + * check the INTERNAL_ERROR flag which is updated under + * device_state_mutex lock. + */ + if (pci_channel_offline(dev->persist->pdev) || + (dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR)) { /* * Device is going through error recovery * and cannot accept commands. */ - ret = -EIO; goto out; } @@ -342,12 +456,11 @@ static int mlx4_cmd_post(struct mlx4_dev *dev, u64 in_param, u64 out_param, end += msecs_to_jiffies(GO_BIT_TIMEOUT_MSECS); while (cmd_pending(dev)) { - if (pci_channel_offline(dev->pdev)) { + if (pci_channel_offline(dev->persist->pdev)) { /* * Device is going through error recovery * and cannot accept commands. */ - ret = -EIO; goto out; } @@ -391,7 +504,11 @@ static int mlx4_cmd_post(struct mlx4_dev *dev, u64 in_param, u64 out_param, ret = 0; out: - mutex_unlock(&cmd->hcr_mutex); + if (ret) + mlx4_warn(dev, "Could not post command 0x%x: ret=%d, in_param=0x%llx, in_mod=0x%x, op_mod=0x%x\n", + op, ret, in_param, in_modifier, op_modifier); + mutex_unlock(&dev->persist->device_state_mutex); + return ret; } @@ -428,8 +545,11 @@ static int mlx4_slave_cmd(struct mlx4_dev *dev, u64 in_param, u64 *out_param, } ret = mlx4_status_to_errno(vhcr->status); } + if (ret && + dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) + ret = mlx4_internal_err_ret_value(dev, op, op_modifier); } else { - ret = mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR_POST, 0, + ret = mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR_POST, 0, op, MLX4_COMM_TIME + timeout); if (!ret) { if (out_is_imm) { @@ -443,9 +563,14 @@ static int mlx4_slave_cmd(struct mlx4_dev *dev, u64 in_param, u64 *out_param, } } ret = mlx4_status_to_errno(vhcr->status); - } else - mlx4_err(dev, "failed execution of VHCR_POST command opcode 0x%x\n", - op); + } else { + if (dev->persist->state & + MLX4_DEVICE_STATE_INTERNAL_ERROR) + ret = mlx4_internal_err_ret_value(dev, op, + op_modifier); + else + mlx4_err(dev, "failed execution of VHCR_POST command opcode 0x%x\n", op); + } } mutex_unlock(&priv->cmd.slave_cmd_mutex); @@ -464,12 +589,12 @@ static int mlx4_cmd_poll(struct mlx4_dev *dev, u64 in_param, u64 *out_param, down(&priv->cmd.poll_sem); - if (pci_channel_offline(dev->pdev)) { + if (dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) { /* * Device is going through error recovery * and cannot accept commands. */ - err = -EIO; + err = mlx4_internal_err_ret_value(dev, op, op_modifier); goto out; } @@ -483,16 +608,21 @@ static int mlx4_cmd_poll(struct mlx4_dev *dev, u64 in_param, u64 *out_param, err = mlx4_cmd_post(dev, in_param, out_param ? *out_param : 0, in_modifier, op_modifier, op, CMD_POLL_TOKEN, 0); if (err) - goto out; + goto out_reset; end = msecs_to_jiffies(timeout) + jiffies; while (cmd_pending(dev) && time_before(jiffies, end)) { - if (pci_channel_offline(dev->pdev)) { + if (pci_channel_offline(dev->persist->pdev)) { /* * Device is going through error recovery * and cannot accept commands. */ err = -EIO; + goto out_reset; + } + + if (dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) { + err = mlx4_internal_err_ret_value(dev, op, op_modifier); goto out; } @@ -502,8 +632,8 @@ static int mlx4_cmd_poll(struct mlx4_dev *dev, u64 in_param, u64 *out_param, if (cmd_pending(dev)) { mlx4_warn(dev, "command 0x%x timed out (go bit not cleared)\n", op); - err = -ETIMEDOUT; - goto out; + err = -EIO; + goto out_reset; } if (out_is_imm) @@ -515,10 +645,17 @@ static int mlx4_cmd_poll(struct mlx4_dev *dev, u64 in_param, u64 *out_param, stat = be32_to_cpu((__force __be32) __raw_readl(hcr + HCR_STATUS_OFFSET)) >> 24; err = mlx4_status_to_errno(stat); - if (err) + if (err) { mlx4_err(dev, "command 0x%x failed: fw status = 0x%x\n", op, stat); + if (mlx4_closing_cmd_fatal_error(op, stat)) + goto out_reset; + goto out; + } +out_reset: + if (err) + err = mlx4_cmd_reset_flow(dev, op, op_modifier, err); out: up(&priv->cmd.poll_sem); return err; @@ -565,17 +702,19 @@ static int mlx4_cmd_wait(struct mlx4_dev *dev, u64 in_param, u64 *out_param, goto out; } - init_completion(&context->done); + reinit_completion(&context->done); - mlx4_cmd_post(dev, in_param, out_param ? *out_param : 0, - in_modifier, op_modifier, op, context->token, 1); + err = mlx4_cmd_post(dev, in_param, out_param ? *out_param : 0, + in_modifier, op_modifier, op, context->token, 1); + if (err) + goto out_reset; if (!wait_for_completion_timeout(&context->done, msecs_to_jiffies(timeout))) { mlx4_warn(dev, "command 0x%x timed out (go bit not cleared)\n", op); - err = -EBUSY; - goto out; + err = -EIO; + goto out_reset; } err = context->result; @@ -592,12 +731,20 @@ static int mlx4_cmd_wait(struct mlx4_dev *dev, u64 in_param, u64 *out_param, else mlx4_err(dev, "command 0x%x failed: fw status = 0x%x\n", op, context->fw_status); + if (dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) + err = mlx4_internal_err_ret_value(dev, op, op_modifier); + else if (mlx4_closing_cmd_fatal_error(op, context->fw_status)) + goto out_reset; + goto out; } if (out_is_imm) *out_param = context->out_param; +out_reset: + if (err) + err = mlx4_cmd_reset_flow(dev, op, op_modifier, err); out: spin_lock(&cmd->context_lock); context->next = cmd->free_head; @@ -612,10 +759,13 @@ int __mlx4_cmd(struct mlx4_dev *dev, u64 in_param, u64 *out_param, int out_is_imm, u32 in_modifier, u8 op_modifier, u16 op, unsigned long timeout, int native) { - if (pci_channel_offline(dev->pdev)) - return -EIO; + if (pci_channel_offline(dev->persist->pdev)) + return mlx4_cmd_reset_flow(dev, op, op_modifier, -EIO); if (!mlx4_is_mfunc(dev) || (native && mlx4_is_master(dev))) { + if (dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) + return mlx4_internal_err_ret_value(dev, op, + op_modifier); if (mlx4_priv(dev)->cmd.use_events) return mlx4_cmd_wait(dev, in_param, out_param, out_is_imm, in_modifier, @@ -631,7 +781,7 @@ int __mlx4_cmd(struct mlx4_dev *dev, u64 in_param, u64 *out_param, EXPORT_SYMBOL_GPL(__mlx4_cmd); -static int mlx4_ARM_COMM_CHANNEL(struct mlx4_dev *dev) +int mlx4_ARM_COMM_CHANNEL(struct mlx4_dev *dev) { return mlx4_cmd(dev, 0, 0, 0, MLX4_CMD_ARM_COMM_CHANNEL, MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE); @@ -751,7 +901,9 @@ static int mlx4_MAD_IFC_wrapper(struct mlx4_dev *dev, int slave, index = be32_to_cpu(smp->attr_mod); if (port < 1 || port > dev->caps.num_ports) return -EINVAL; - table = kcalloc(dev->caps.pkey_table_len[port], sizeof *table, GFP_KERNEL); + table = kcalloc((dev->caps.pkey_table_len[port] / 32) + 1, + sizeof(*table) * 32, GFP_KERNEL); + if (!table) return -ENOMEM; /* need to get the full pkey table because the paravirtualized @@ -1071,7 +1223,7 @@ static struct mlx4_cmd_info cmd_info[] = { { .opcode = MLX4_CMD_HW2SW_EQ, .has_inbox = false, - .has_outbox = true, + .has_outbox = false, .out_is_imm = false, .encode_slave_id = true, .verify = NULL, @@ -1431,6 +1583,15 @@ static struct mlx4_cmd_info cmd_info[] = { .verify = NULL, .wrapper = mlx4_CMD_EPERM_wrapper }, + { + .opcode = MLX4_CMD_VIRT_PORT_MAP, + .has_inbox = false, + .has_outbox = false, + .out_is_imm = false, + .encode_slave_id = false, + .verify = NULL, + .wrapper = mlx4_CMD_EPERM_wrapper + }, }; static int mlx4_master_process_vhcr(struct mlx4_dev *dev, int slave, @@ -1460,8 +1621,10 @@ static int mlx4_master_process_vhcr(struct mlx4_dev *dev, int slave, ALIGN(sizeof(struct mlx4_vhcr_cmd), MLX4_ACCESS_MEM_ALIGN), 1); if (ret) { - mlx4_err(dev, "%s: Failed reading vhcr ret: 0x%x\n", - __func__, ret); + if (!(dev->persist->state & + MLX4_DEVICE_STATE_INTERNAL_ERROR)) + mlx4_err(dev, "%s: Failed reading vhcr ret: 0x%x\n", + __func__, ret); kfree(vhcr); return ret; } @@ -1500,11 +1663,14 @@ static int mlx4_master_process_vhcr(struct mlx4_dev *dev, int slave, goto out_status; } - if (mlx4_ACCESS_MEM(dev, inbox->dma, slave, - vhcr->in_param, - MLX4_MAILBOX_SIZE, 1)) { - mlx4_err(dev, "%s: Failed reading inbox (cmd:0x%x)\n", - __func__, cmd->opcode); + ret = mlx4_ACCESS_MEM(dev, inbox->dma, slave, + vhcr->in_param, + MLX4_MAILBOX_SIZE, 1); + if (ret) { + if (!(dev->persist->state & + MLX4_DEVICE_STATE_INTERNAL_ERROR)) + mlx4_err(dev, "%s: Failed reading inbox (cmd:0x%x)\n", + __func__, cmd->opcode); vhcr_cmd->status = CMD_STAT_INTERNAL_ERR; goto out_status; } @@ -1552,8 +1718,9 @@ static int mlx4_master_process_vhcr(struct mlx4_dev *dev, int slave, } if (err) { - mlx4_warn(dev, "vhcr command:0x%x slave:%d failed with error:%d, status %d\n", - vhcr->op, slave, vhcr->errno, err); + if (!(dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR)) + mlx4_warn(dev, "vhcr command:0x%x slave:%d failed with error:%d, status %d\n", + vhcr->op, slave, vhcr->errno, err); vhcr_cmd->status = mlx4_errno_to_status(err); goto out_status; } @@ -1568,7 +1735,9 @@ static int mlx4_master_process_vhcr(struct mlx4_dev *dev, int slave, /* If we failed to write back the outbox after the *command was successfully executed, we must fail this * slave, as it is now in undefined state */ - mlx4_err(dev, "%s:Failed writing outbox\n", __func__); + if (!(dev->persist->state & + MLX4_DEVICE_STATE_INTERNAL_ERROR)) + mlx4_err(dev, "%s:Failed writing outbox\n", __func__); goto out; } } @@ -1847,8 +2016,11 @@ static void mlx4_master_do_cmd(struct mlx4_dev *dev, int slave, u8 cmd, break; case MLX4_COMM_CMD_VHCR_POST: if ((slave_state[slave].last_cmd != MLX4_COMM_CMD_VHCR_EN) && - (slave_state[slave].last_cmd != MLX4_COMM_CMD_VHCR_POST)) + (slave_state[slave].last_cmd != MLX4_COMM_CMD_VHCR_POST)) { + mlx4_warn(dev, "slave:%d is out of sync, cmd=0x%x, last command=0x%x, reset is needed\n", + slave, cmd, slave_state[slave].last_cmd); goto reset_slave; + } mutex_lock(&priv->cmd.slave_cmd_mutex); if (mlx4_master_process_vhcr(dev, slave, NULL)) { @@ -1882,7 +2054,18 @@ static void mlx4_master_do_cmd(struct mlx4_dev *dev, int slave, u8 cmd, reset_slave: /* cleanup any slave resources */ - mlx4_delete_all_resources_for_slave(dev, slave); + if (dev->persist->interface_state & MLX4_INTERFACE_STATE_UP) + mlx4_delete_all_resources_for_slave(dev, slave); + + if (cmd != MLX4_COMM_CMD_RESET) { + mlx4_warn(dev, "Turn on internal error to force reset, slave=%d, cmd=0x%x\n", + slave, cmd); + /* Turn on internal error letting slave reset itself immeditaly, + * otherwise it might take till timeout on command is passed + */ + reply |= ((u32)COMM_CHAN_EVENT_INTERNAL_ERR); + } + spin_lock_irqsave(&priv->mfunc.master.slave_state_lock, flags); if (!slave_state[slave].is_slave_going_down) slave_state[slave].last_cmd = MLX4_COMM_CMD_RESET; @@ -1958,17 +2141,28 @@ void mlx4_master_comm_channel(struct work_struct *work) static int sync_toggles(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); - int wr_toggle; - int rd_toggle; + u32 wr_toggle; + u32 rd_toggle; unsigned long end; - wr_toggle = swab32(readl(&priv->mfunc.comm->slave_write)) >> 31; - end = jiffies + msecs_to_jiffies(5000); + wr_toggle = swab32(readl(&priv->mfunc.comm->slave_write)); + if (wr_toggle == 0xffffffff) + end = jiffies + msecs_to_jiffies(30000); + else + end = jiffies + msecs_to_jiffies(5000); while (time_before(jiffies, end)) { - rd_toggle = swab32(readl(&priv->mfunc.comm->slave_read)) >> 31; - if (rd_toggle == wr_toggle) { - priv->cmd.comm_toggle = rd_toggle; + rd_toggle = swab32(readl(&priv->mfunc.comm->slave_read)); + if (wr_toggle == 0xffffffff || rd_toggle == 0xffffffff) { + /* PCI might be offline */ + msleep(100); + wr_toggle = swab32(readl(&priv->mfunc.comm-> + slave_write)); + continue; + } + + if (rd_toggle >> 31 == wr_toggle >> 31) { + priv->cmd.comm_toggle = rd_toggle >> 31; return 0; } @@ -1997,11 +2191,12 @@ int mlx4_multi_func_init(struct mlx4_dev *dev) if (mlx4_is_master(dev)) priv->mfunc.comm = - ioremap(pci_resource_start(dev->pdev, priv->fw.comm_bar) + + ioremap(pci_resource_start(dev->persist->pdev, + priv->fw.comm_bar) + priv->fw.comm_base, MLX4_COMM_PAGESIZE); else priv->mfunc.comm = - ioremap(pci_resource_start(dev->pdev, 2) + + ioremap(pci_resource_start(dev->persist->pdev, 2) + MLX4_SLAVE_COMM_BASE, MLX4_COMM_PAGESIZE); if (!priv->mfunc.comm) { mlx4_err(dev, "Couldn't map communication vector\n"); @@ -2073,13 +2268,6 @@ int mlx4_multi_func_init(struct mlx4_dev *dev) if (mlx4_init_resource_tracker(dev)) goto err_thread; - err = mlx4_ARM_COMM_CHANNEL(dev); - if (err) { - mlx4_err(dev, " Failed to arm comm channel eq: %x\n", - err); - goto err_resource; - } - } else { err = sync_toggles(dev); if (err) { @@ -2089,8 +2277,6 @@ int mlx4_multi_func_init(struct mlx4_dev *dev) } return 0; -err_resource: - mlx4_free_resource_tracker(dev, RES_TR_FREE_ALL); err_thread: flush_workqueue(priv->mfunc.master.comm_wq); destroy_workqueue(priv->mfunc.master.comm_wq); @@ -2107,9 +2293,9 @@ err_comm_admin: err_comm: iounmap(priv->mfunc.comm); err_vhcr: - dma_free_coherent(&(dev->pdev->dev), PAGE_SIZE, - priv->mfunc.vhcr, - priv->mfunc.vhcr_dma); + dma_free_coherent(&dev->persist->pdev->dev, PAGE_SIZE, + priv->mfunc.vhcr, + priv->mfunc.vhcr_dma); priv->mfunc.vhcr = NULL; return -ENOMEM; } @@ -2120,7 +2306,6 @@ int mlx4_cmd_init(struct mlx4_dev *dev) int flags = 0; if (!priv->cmd.initialized) { - mutex_init(&priv->cmd.hcr_mutex); mutex_init(&priv->cmd.slave_cmd_mutex); sema_init(&priv->cmd.poll_sem, 1); priv->cmd.use_events = 0; @@ -2130,8 +2315,8 @@ int mlx4_cmd_init(struct mlx4_dev *dev) } if (!mlx4_is_slave(dev) && !priv->cmd.hcr) { - priv->cmd.hcr = ioremap(pci_resource_start(dev->pdev, 0) + - MLX4_HCR_BASE, MLX4_HCR_SIZE); + priv->cmd.hcr = ioremap(pci_resource_start(dev->persist->pdev, + 0) + MLX4_HCR_BASE, MLX4_HCR_SIZE); if (!priv->cmd.hcr) { mlx4_err(dev, "Couldn't map command register\n"); goto err; @@ -2140,7 +2325,8 @@ int mlx4_cmd_init(struct mlx4_dev *dev) } if (mlx4_is_mfunc(dev) && !priv->mfunc.vhcr) { - priv->mfunc.vhcr = dma_alloc_coherent(&(dev->pdev->dev), PAGE_SIZE, + priv->mfunc.vhcr = dma_alloc_coherent(&dev->persist->pdev->dev, + PAGE_SIZE, &priv->mfunc.vhcr_dma, GFP_KERNEL); if (!priv->mfunc.vhcr) @@ -2150,7 +2336,8 @@ int mlx4_cmd_init(struct mlx4_dev *dev) } if (!priv->cmd.pool) { - priv->cmd.pool = pci_pool_create("mlx4_cmd", dev->pdev, + priv->cmd.pool = pci_pool_create("mlx4_cmd", + dev->persist->pdev, MLX4_MAILBOX_SIZE, MLX4_MAILBOX_SIZE, 0); if (!priv->cmd.pool) @@ -2166,6 +2353,27 @@ err: return -ENOMEM; } +void mlx4_report_internal_err_comm_event(struct mlx4_dev *dev) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + int slave; + u32 slave_read; + + /* Report an internal error event to all + * communication channels. + */ + for (slave = 0; slave < dev->num_slaves; slave++) { + slave_read = swab32(readl(&priv->mfunc.comm[slave].slave_read)); + slave_read |= (u32)COMM_CHAN_EVENT_INTERNAL_ERR; + __raw_writel((__force u32)cpu_to_be32(slave_read), + &priv->mfunc.comm[slave].slave_read); + /* Make sure that our comm channel write doesn't + * get mixed in with writes from another CPU. + */ + mmiowb(); + } +} + void mlx4_multi_func_cleanup(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); @@ -2181,6 +2389,7 @@ void mlx4_multi_func_cleanup(struct mlx4_dev *dev) kfree(priv->mfunc.master.slave_state); kfree(priv->mfunc.master.vf_admin); kfree(priv->mfunc.master.vf_oper); + dev->num_slaves = 0; } iounmap(priv->mfunc.comm); @@ -2202,7 +2411,7 @@ void mlx4_cmd_cleanup(struct mlx4_dev *dev, int cleanup_mask) } if (mlx4_is_mfunc(dev) && priv->mfunc.vhcr && (cleanup_mask & MLX4_CMD_CLEANUP_VHCR)) { - dma_free_coherent(&(dev->pdev->dev), PAGE_SIZE, + dma_free_coherent(&dev->persist->pdev->dev, PAGE_SIZE, priv->mfunc.vhcr, priv->mfunc.vhcr_dma); priv->mfunc.vhcr = NULL; } @@ -2229,6 +2438,11 @@ int mlx4_cmd_use_events(struct mlx4_dev *dev) for (i = 0; i < priv->cmd.max_cmds; ++i) { priv->cmd.context[i].token = i; priv->cmd.context[i].next = i + 1; + /* To support fatal error flow, initialize all + * cmd contexts to allow simulating completions + * with complete() at any time. + */ + init_completion(&priv->cmd.context[i].done); } priv->cmd.context[priv->cmd.max_cmds - 1].next = -1; @@ -2306,8 +2520,9 @@ u32 mlx4_comm_get_version(void) static int mlx4_get_slave_indx(struct mlx4_dev *dev, int vf) { - if ((vf < 0) || (vf >= dev->num_vfs)) { - mlx4_err(dev, "Bad vf number:%d (number of activated vf: %d)\n", vf, dev->num_vfs); + if ((vf < 0) || (vf >= dev->persist->num_vfs)) { + mlx4_err(dev, "Bad vf number:%d (number of activated vf: %d)\n", + vf, dev->persist->num_vfs); return -EINVAL; } @@ -2316,7 +2531,7 @@ static int mlx4_get_slave_indx(struct mlx4_dev *dev, int vf) int mlx4_get_vf_indx(struct mlx4_dev *dev, int slave) { - if (slave < 1 || slave > dev->num_vfs) { + if (slave < 1 || slave > dev->persist->num_vfs) { mlx4_err(dev, "Bad slave number:%d (number of activated slaves: %lu)\n", slave, dev->num_slaves); @@ -2325,6 +2540,25 @@ int mlx4_get_vf_indx(struct mlx4_dev *dev, int slave) return slave - 1; } +void mlx4_cmd_wake_completions(struct mlx4_dev *dev) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_cmd_context *context; + int i; + + spin_lock(&priv->cmd.context_lock); + if (priv->cmd.context) { + for (i = 0; i < priv->cmd.max_cmds; ++i) { + context = &priv->cmd.context[i]; + context->fw_status = CMD_STAT_INTERNAL_ERR; + context->result = + mlx4_status_to_errno(CMD_STAT_INTERNAL_ERR); + complete(&context->done); + } + } + spin_unlock(&priv->cmd.context_lock); +} + struct mlx4_active_ports mlx4_get_active_ports(struct mlx4_dev *dev, int slave) { struct mlx4_active_ports actv_ports; @@ -2388,7 +2622,7 @@ struct mlx4_slaves_pport mlx4_phys_to_slaves_pport(struct mlx4_dev *dev, if (port <= 0 || port > dev->caps.num_ports) return slaves_pport; - for (i = 0; i < dev->num_vfs + 1; i++) { + for (i = 0; i < dev->persist->num_vfs + 1; i++) { struct mlx4_active_ports actv_ports = mlx4_get_active_ports(dev, i); if (test_bit(port - 1, actv_ports.ports)) @@ -2408,7 +2642,7 @@ struct mlx4_slaves_pport mlx4_phys_to_slaves_pport_actv( bitmap_zero(slaves_pport.slaves, MLX4_MFUNC_MAX); - for (i = 0; i < dev->num_vfs + 1; i++) { + for (i = 0; i < dev->persist->num_vfs + 1; i++) { struct mlx4_active_ports actv_ports = mlx4_get_active_ports(dev, i); if (bitmap_equal(crit_ports->ports, actv_ports.ports, diff --git a/drivers/net/ethernet/mellanox/mlx4/en_clock.c b/drivers/net/ethernet/mellanox/mlx4/en_clock.c index 999014413b1a..90b5309cdb5c 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_clock.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_clock.c @@ -32,6 +32,7 @@ */ #include <linux/mlx4/device.h> +#include <linux/clocksource.h> #include "mlx4_en.h" @@ -147,12 +148,9 @@ static int mlx4_en_phc_adjtime(struct ptp_clock_info *ptp, s64 delta) struct mlx4_en_dev *mdev = container_of(ptp, struct mlx4_en_dev, ptp_clock_info); unsigned long flags; - s64 now; write_lock_irqsave(&mdev->clock_lock, flags); - now = timecounter_read(&mdev->clock); - now += delta; - timecounter_init(&mdev->clock, &mdev->cycles, now); + timecounter_adjtime(&mdev->clock, delta); write_unlock_irqrestore(&mdev->clock_lock, flags); return 0; @@ -243,7 +241,7 @@ void mlx4_en_init_timestamp(struct mlx4_en_dev *mdev) { struct mlx4_dev *dev = mdev->dev; unsigned long flags; - u64 ns; + u64 ns, zero = 0; rwlock_init(&mdev->clock_lock); @@ -268,7 +266,7 @@ void mlx4_en_init_timestamp(struct mlx4_en_dev *mdev) /* Calculate period in seconds to call the overflow watchdog - to make * sure counter is checked at least once every wrap around. */ - ns = cyclecounter_cyc2ns(&mdev->cycles, mdev->cycles.mask); + ns = cyclecounter_cyc2ns(&mdev->cycles, mdev->cycles.mask, zero, &zero); do_div(ns, NSEC_PER_SEC / 2 / HZ); mdev->overflow_period = ns; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_cq.c b/drivers/net/ethernet/mellanox/mlx4/en_cq.c index 82322b1c8411..22da4d0d0f05 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_cq.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_cq.c @@ -70,10 +70,10 @@ int mlx4_en_create_cq(struct mlx4_en_priv *priv, /* Allocate HW buffers on provided NUMA node. * dev->numa_node is used in mtt range allocation flow. */ - set_dev_node(&mdev->dev->pdev->dev, node); + set_dev_node(&mdev->dev->persist->pdev->dev, node); err = mlx4_alloc_hwq_res(mdev->dev, &cq->wqres, cq->buf_size, 2 * PAGE_SIZE); - set_dev_node(&mdev->dev->pdev->dev, mdev->dev->numa_node); + set_dev_node(&mdev->dev->persist->pdev->dev, mdev->dev->numa_node); if (err) goto err_cq; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c index 90e0f045a6bc..a7b58ba8492b 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c @@ -92,7 +92,7 @@ mlx4_en_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *drvinfo) (u16) (mdev->dev->caps.fw_ver >> 32), (u16) ((mdev->dev->caps.fw_ver >> 16) & 0xffff), (u16) (mdev->dev->caps.fw_ver & 0xffff)); - strlcpy(drvinfo->bus_info, pci_name(mdev->dev->pdev), + strlcpy(drvinfo->bus_info, pci_name(mdev->dev->persist->pdev), sizeof(drvinfo->bus_info)); drvinfo->n_stats = 0; drvinfo->regdump_len = 0; @@ -770,22 +770,20 @@ static int mlx4_en_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) return 0; } - proto_admin = cpu_to_be32(ptys_adv); - if (speed >= 0 && speed != priv->port_state.link_speed) - /* If speed was set then speed decides :-) */ - proto_admin = speed_set_ptys_admin(priv, speed, - ptys_reg.eth_proto_cap); + proto_admin = cmd->autoneg == AUTONEG_ENABLE ? + cpu_to_be32(ptys_adv) : + speed_set_ptys_admin(priv, speed, + ptys_reg.eth_proto_cap); proto_admin &= ptys_reg.eth_proto_cap; - - if (proto_admin == ptys_reg.eth_proto_admin) - return 0; /* Nothing to change */ - if (!proto_admin) { en_warn(priv, "Not supported link mode(s) requested, check supported link modes.\n"); return -EINVAL; /* nothing to change due to bad input */ } + if (proto_admin == ptys_reg.eth_proto_admin) + return 0; /* Nothing to change */ + en_dbg(DRV, priv, "mlx4_ACCESS_PTYS_REG SET: ptys_reg.eth_proto_admin = 0x%x\n", be32_to_cpu(proto_admin)); @@ -798,9 +796,9 @@ static int mlx4_en_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) return ret; } - en_warn(priv, "Port link mode changed, restarting port...\n"); mutex_lock(&priv->mdev->state_lock); if (priv->port_up) { + en_warn(priv, "Port link mode changed, restarting port...\n"); mlx4_en_stop_port(dev, 1); if (mlx4_en_start_port(dev)) en_err(priv, "Failed restarting port %d\n", priv->port); diff --git a/drivers/net/ethernet/mellanox/mlx4/en_main.c b/drivers/net/ethernet/mellanox/mlx4/en_main.c index 9f16f754137b..58d5a07d0ff4 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_main.c @@ -214,6 +214,8 @@ static void mlx4_en_remove(struct mlx4_dev *dev, void *endev_ptr) iounmap(mdev->uar_map); mlx4_uar_free(dev, &mdev->priv_uar); mlx4_pd_free(dev, mdev->priv_pdn); + if (mdev->nb.notifier_call) + unregister_netdevice_notifier(&mdev->nb); kfree(mdev); } @@ -241,8 +243,8 @@ static void *mlx4_en_add(struct mlx4_dev *dev) spin_lock_init(&mdev->uar_lock); mdev->dev = dev; - mdev->dma_device = &(dev->pdev->dev); - mdev->pdev = dev->pdev; + mdev->dma_device = &dev->persist->pdev->dev; + mdev->pdev = dev->persist->pdev; mdev->device_up = false; mdev->LSO_support = !!(dev->caps.flags & (1 << 15)); @@ -298,6 +300,12 @@ static void *mlx4_en_add(struct mlx4_dev *dev) if (mlx4_en_init_netdev(mdev, i, &mdev->profile.prof[i])) mdev->pndev[i] = NULL; } + /* register notifier */ + mdev->nb.notifier_call = mlx4_en_netdev_event; + if (register_netdevice_notifier(&mdev->nb)) { + mdev->nb.notifier_call = NULL; + mlx4_err(mdev, "Failed to create notifier\n"); + } return mdev; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index d0d6dc1b8e46..2a210c4efb89 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -475,7 +475,8 @@ static int mlx4_en_tunnel_steer_add(struct mlx4_en_priv *priv, unsigned char *ad { int err; - if (priv->mdev->dev->caps.tunnel_offload_mode != MLX4_TUNNEL_OFFLOAD_MODE_VXLAN) + if (priv->mdev->dev->caps.tunnel_offload_mode != MLX4_TUNNEL_OFFLOAD_MODE_VXLAN || + priv->mdev->dev->caps.dmfs_high_steer_mode == MLX4_STEERING_DMFS_A0_STATIC) return 0; /* do nothing */ err = mlx4_tunnel_steer_add(priv->mdev->dev, addr, priv->port, qpn, @@ -2061,6 +2062,7 @@ void mlx4_en_destroy_netdev(struct net_device *dev) /* Detach the netdev so tasks would not attempt to access it */ mutex_lock(&mdev->state_lock); mdev->pndev[priv->port] = NULL; + mdev->upper[priv->port] = NULL; mutex_unlock(&mdev->state_lock); mlx4_en_free_resources(priv); @@ -2200,6 +2202,10 @@ static int mlx4_en_set_features(struct net_device *netdev, return ret; } + if (DEV_FEATURE_CHANGED(netdev, features, NETIF_F_HW_VLAN_CTAG_TX)) + en_info(priv, "Turn %s TX vlan strip offload\n", + (features & NETIF_F_HW_VLAN_CTAG_TX) ? "ON" : "OFF"); + if (features & NETIF_F_LOOPBACK) priv->ctrl_flags |= cpu_to_be32(MLX4_WQE_CTRL_FORCE_LOOPBACK); else @@ -2440,6 +2446,180 @@ static const struct net_device_ops mlx4_netdev_ops_master = { #endif }; +struct mlx4_en_bond { + struct work_struct work; + struct mlx4_en_priv *priv; + int is_bonded; + struct mlx4_port_map port_map; +}; + +static void mlx4_en_bond_work(struct work_struct *work) +{ + struct mlx4_en_bond *bond = container_of(work, + struct mlx4_en_bond, + work); + int err = 0; + struct mlx4_dev *dev = bond->priv->mdev->dev; + + if (bond->is_bonded) { + if (!mlx4_is_bonded(dev)) { + err = mlx4_bond(dev); + if (err) + en_err(bond->priv, "Fail to bond device\n"); + } + if (!err) { + err = mlx4_port_map_set(dev, &bond->port_map); + if (err) + en_err(bond->priv, "Fail to set port map [%d][%d]: %d\n", + bond->port_map.port1, + bond->port_map.port2, + err); + } + } else if (mlx4_is_bonded(dev)) { + err = mlx4_unbond(dev); + if (err) + en_err(bond->priv, "Fail to unbond device\n"); + } + dev_put(bond->priv->dev); + kfree(bond); +} + +static int mlx4_en_queue_bond_work(struct mlx4_en_priv *priv, int is_bonded, + u8 v2p_p1, u8 v2p_p2) +{ + struct mlx4_en_bond *bond = NULL; + + bond = kzalloc(sizeof(*bond), GFP_ATOMIC); + if (!bond) + return -ENOMEM; + + INIT_WORK(&bond->work, mlx4_en_bond_work); + bond->priv = priv; + bond->is_bonded = is_bonded; + bond->port_map.port1 = v2p_p1; + bond->port_map.port2 = v2p_p2; + dev_hold(priv->dev); + queue_work(priv->mdev->workqueue, &bond->work); + return 0; +} + +int mlx4_en_netdev_event(struct notifier_block *this, + unsigned long event, void *ptr) +{ + struct net_device *ndev = netdev_notifier_info_to_dev(ptr); + u8 port = 0; + struct mlx4_en_dev *mdev; + struct mlx4_dev *dev; + int i, num_eth_ports = 0; + bool do_bond = true; + struct mlx4_en_priv *priv; + u8 v2p_port1 = 0; + u8 v2p_port2 = 0; + + if (!net_eq(dev_net(ndev), &init_net)) + return NOTIFY_DONE; + + mdev = container_of(this, struct mlx4_en_dev, nb); + dev = mdev->dev; + + /* Go into this mode only when two network devices set on two ports + * of the same mlx4 device are slaves of the same bonding master + */ + mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_ETH) { + ++num_eth_ports; + if (!port && (mdev->pndev[i] == ndev)) + port = i; + mdev->upper[i] = mdev->pndev[i] ? + netdev_master_upper_dev_get(mdev->pndev[i]) : NULL; + /* condition not met: network device is a slave */ + if (!mdev->upper[i]) + do_bond = false; + if (num_eth_ports < 2) + continue; + /* condition not met: same master */ + if (mdev->upper[i] != mdev->upper[i-1]) + do_bond = false; + } + /* condition not met: 2 salves */ + do_bond = (num_eth_ports == 2) ? do_bond : false; + + /* handle only events that come with enough info */ + if ((do_bond && (event != NETDEV_BONDING_INFO)) || !port) + return NOTIFY_DONE; + + priv = netdev_priv(ndev); + if (do_bond) { + struct netdev_notifier_bonding_info *notifier_info = ptr; + struct netdev_bonding_info *bonding_info = + ¬ifier_info->bonding_info; + + /* required mode 1, 2 or 4 */ + if ((bonding_info->master.bond_mode != BOND_MODE_ACTIVEBACKUP) && + (bonding_info->master.bond_mode != BOND_MODE_XOR) && + (bonding_info->master.bond_mode != BOND_MODE_8023AD)) + do_bond = false; + + /* require exactly 2 slaves */ + if (bonding_info->master.num_slaves != 2) + do_bond = false; + + /* calc v2p */ + if (do_bond) { + if (bonding_info->master.bond_mode == + BOND_MODE_ACTIVEBACKUP) { + /* in active-backup mode virtual ports are + * mapped to the physical port of the active + * slave */ + if (bonding_info->slave.state == + BOND_STATE_BACKUP) { + if (port == 1) { + v2p_port1 = 2; + v2p_port2 = 2; + } else { + v2p_port1 = 1; + v2p_port2 = 1; + } + } else { /* BOND_STATE_ACTIVE */ + if (port == 1) { + v2p_port1 = 1; + v2p_port2 = 1; + } else { + v2p_port1 = 2; + v2p_port2 = 2; + } + } + } else { /* Active-Active */ + /* in active-active mode a virtual port is + * mapped to the native physical port if and only + * if the physical port is up */ + __s8 link = bonding_info->slave.link; + + if (port == 1) + v2p_port2 = 2; + else + v2p_port1 = 1; + if ((link == BOND_LINK_UP) || + (link == BOND_LINK_FAIL)) { + if (port == 1) + v2p_port1 = 1; + else + v2p_port2 = 2; + } else { /* BOND_LINK_DOWN || BOND_LINK_BACK */ + if (port == 1) + v2p_port1 = 2; + else + v2p_port2 = 1; + } + } + } + } + + mlx4_en_queue_bond_work(priv, do_bond, + v2p_port1, v2p_port2); + + return NOTIFY_DONE; +} + int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, struct mlx4_en_port_profile *prof) { @@ -2457,7 +2637,7 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, netif_set_real_num_tx_queues(dev, prof->tx_ring_num); netif_set_real_num_rx_queues(dev, prof->rx_ring_num); - SET_NETDEV_DEV(dev, &mdev->dev->pdev->dev); + SET_NETDEV_DEV(dev, &mdev->dev->persist->pdev->dev); dev->dev_port = port - 1; /* @@ -2622,6 +2802,7 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, } mdev->pndev[port] = dev; + mdev->upper[port] = NULL; netif_carrier_off(dev); mlx4_en_set_default_moderation(priv); diff --git a/drivers/net/ethernet/mellanox/mlx4/en_resources.c b/drivers/net/ethernet/mellanox/mlx4/en_resources.c index f1a5500ff72d..34f2fdf4fe5d 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_resources.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_resources.c @@ -50,10 +50,14 @@ void mlx4_en_fill_qp_context(struct mlx4_en_priv *priv, int size, int stride, context->mtu_msgmax = 0xff; if (!is_tx && !rss) context->rq_size_stride = ilog2(size) << 3 | (ilog2(stride) - 4); - if (is_tx) + if (is_tx) { context->sq_size_stride = ilog2(size) << 3 | (ilog2(stride) - 4); - else + if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_PORT_REMAP) + context->params2 |= MLX4_QP_BIT_FPP; + + } else { context->sq_size_stride = ilog2(TXBB_SIZE) - 4; + } context->usr_page = cpu_to_be32(mdev->priv_uar.index); context->local_qpn = cpu_to_be32(qpn); context->pri_path.ackto = 1 & 0x07; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index a0474eb94aa3..698d60de1255 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -162,6 +162,10 @@ static int mlx4_en_init_allocator(struct mlx4_en_priv *priv, if (mlx4_alloc_pages(priv, &ring->page_alloc[i], frag_info, GFP_KERNEL | __GFP_COLD)) goto out; + + en_dbg(DRV, priv, " frag %d allocator: - size:%d frags:%d\n", + i, ring->page_alloc[i].page_size, + atomic_read(&ring->page_alloc[i].page->_count)); } return 0; @@ -387,10 +391,10 @@ int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv, ring->rx_info, tmp); /* Allocate HW buffers on provided NUMA node */ - set_dev_node(&mdev->dev->pdev->dev, node); + set_dev_node(&mdev->dev->persist->pdev->dev, node); err = mlx4_alloc_hwq_res(mdev->dev, &ring->wqres, ring->buf_size, 2 * PAGE_SIZE); - set_dev_node(&mdev->dev->pdev->dev, mdev->dev->numa_node); + set_dev_node(&mdev->dev->persist->pdev->dev, mdev->dev->numa_node); if (err) goto err_info; @@ -1059,8 +1063,9 @@ void mlx4_en_calc_rx_buf(struct net_device *dev) (eff_mtu > buf_size + frag_sizes[i]) ? frag_sizes[i] : eff_mtu - buf_size; priv->frag_info[i].frag_prefix_size = buf_size; - priv->frag_info[i].frag_stride = ALIGN(frag_sizes[i], - SMP_CACHE_BYTES); + priv->frag_info[i].frag_stride = + ALIGN(priv->frag_info[i].frag_size, + SMP_CACHE_BYTES); buf_size += priv->frag_info[i].frag_size; i++; } diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c index e3357bf523df..55f9f5c5344e 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c @@ -91,10 +91,10 @@ int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv, ring->buf_size = ALIGN(size * ring->stride, MLX4_EN_PAGE_SIZE); /* Allocate HW buffers on provided NUMA node */ - set_dev_node(&mdev->dev->pdev->dev, node); + set_dev_node(&mdev->dev->persist->pdev->dev, node); err = mlx4_alloc_hwq_res(mdev->dev, &ring->wqres, ring->buf_size, 2 * PAGE_SIZE); - set_dev_node(&mdev->dev->pdev->dev, mdev->dev->numa_node); + set_dev_node(&mdev->dev->persist->pdev->dev, mdev->dev->numa_node); if (err) { en_err(priv, "Failed allocating hwq resources\n"); goto err_bounce; @@ -682,8 +682,8 @@ u16 mlx4_en_select_queue(struct net_device *dev, struct sk_buff *skb, if (dev->num_tc) return skb_tx_hash(dev, skb); - if (vlan_tx_tag_present(skb)) - up = vlan_tx_tag_get(skb) >> VLAN_PRIO_SHIFT; + if (skb_vlan_tag_present(skb)) + up = skb_vlan_tag_get(skb) >> VLAN_PRIO_SHIFT; return fallback(dev, skb) % rings_p_up + up * rings_p_up; } @@ -742,8 +742,8 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) goto tx_drop; } - if (vlan_tx_tag_present(skb)) - vlan_tag = vlan_tx_tag_get(skb); + if (skb_vlan_tag_present(skb)) + vlan_tag = skb_vlan_tag_get(skb); netdev_txq_bql_enqueue_prefetchw(ring->tx_queue); @@ -930,7 +930,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) real_size = (real_size / 16) & 0x3f; if (ring->bf_enabled && desc_size <= MAX_BF && !bounce && - !vlan_tx_tag_present(skb) && send_doorbell) { + !skb_vlan_tag_present(skb) && send_doorbell) { tx_desc->ctrl.bf_qpn = ring->doorbell_qpn | cpu_to_be32(real_size); @@ -952,7 +952,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) } else { tx_desc->ctrl.vlan_tag = cpu_to_be16(vlan_tag); tx_desc->ctrl.ins_vlan = MLX4_WQE_CTRL_INS_VLAN * - !!vlan_tx_tag_present(skb); + !!skb_vlan_tag_present(skb); tx_desc->ctrl.fence_size = real_size; /* Ensure new descriptor hits memory diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c index 3d275fbaf0eb..264bc15c1ff2 100644 --- a/drivers/net/ethernet/mellanox/mlx4/eq.c +++ b/drivers/net/ethernet/mellanox/mlx4/eq.c @@ -88,6 +88,8 @@ static u64 get_async_ev_mask(struct mlx4_dev *dev) u64 async_ev_mask = MLX4_ASYNC_EVENT_MASK; if (dev->caps.flags & MLX4_DEV_CAP_FLAG_PORT_MNG_CHG_EV) async_ev_mask |= (1ull << MLX4_EVENT_TYPE_PORT_MNG_CHG_EVENT); + if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_RECOVERABLE_ERROR_EVENT) + async_ev_mask |= (1ull << MLX4_EVENT_TYPE_RECOVERABLE_ERROR_EVENT); return async_ev_mask; } @@ -237,7 +239,7 @@ int mlx4_gen_guid_change_eqe(struct mlx4_dev *dev, int slave, u8 port) struct mlx4_eqe eqe; /*don't send if we don't have the that slave */ - if (dev->num_vfs < slave) + if (dev->persist->num_vfs < slave) return 0; memset(&eqe, 0, sizeof eqe); @@ -255,7 +257,7 @@ int mlx4_gen_port_state_change_eqe(struct mlx4_dev *dev, int slave, u8 port, struct mlx4_eqe eqe; /*don't send if we don't have the that slave */ - if (dev->num_vfs < slave) + if (dev->persist->num_vfs < slave) return 0; memset(&eqe, 0, sizeof eqe); @@ -310,7 +312,7 @@ static void set_all_slave_state(struct mlx4_dev *dev, u8 port, int event) struct mlx4_slaves_pport slaves_pport = mlx4_phys_to_slaves_pport(dev, port); - for (i = 0; i < dev->num_vfs + 1; i++) + for (i = 0; i < dev->persist->num_vfs + 1; i++) if (test_bit(i, slaves_pport.slaves)) set_and_calc_slave_port_state(dev, i, port, event, &gen_event); @@ -429,8 +431,14 @@ void mlx4_master_handle_slave_flr(struct work_struct *work) if (MLX4_COMM_CMD_FLR == slave_state[i].last_cmd) { mlx4_dbg(dev, "mlx4_handle_slave_flr: clean slave: %d\n", i); - - mlx4_delete_all_resources_for_slave(dev, i); + /* In case of 'Reset flow' FLR can be generated for + * a slave before mlx4_load_one is done. + * make sure interface is up before trying to delete + * slave resources which weren't allocated yet. + */ + if (dev->persist->interface_state & + MLX4_INTERFACE_STATE_UP) + mlx4_delete_all_resources_for_slave(dev, i); /*return the slave to running mode*/ spin_lock_irqsave(&priv->mfunc.master.slave_state_lock, flags); slave_state[i].last_cmd = MLX4_COMM_CMD_RESET; @@ -560,7 +568,8 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq) mlx4_priv(dev)->sense.do_sense_port[port] = 1; if (!mlx4_is_master(dev)) break; - for (i = 0; i < dev->num_vfs + 1; i++) { + for (i = 0; i < dev->persist->num_vfs + 1; + i++) { if (!test_bit(i, slaves_port.slaves)) continue; if (dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH) { @@ -596,7 +605,9 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq) if (!mlx4_is_master(dev)) break; if (dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH) - for (i = 0; i < dev->num_vfs + 1; i++) { + for (i = 0; + i < dev->persist->num_vfs + 1; + i++) { if (!test_bit(i, slaves_port.slaves)) continue; if (i == mlx4_master_func_num(dev)) @@ -727,6 +738,26 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq) (unsigned long) eqe); break; + case MLX4_EVENT_TYPE_RECOVERABLE_ERROR_EVENT: + switch (eqe->subtype) { + case MLX4_RECOVERABLE_ERROR_EVENT_SUBTYPE_BAD_CABLE: + mlx4_warn(dev, "Bad cable detected on port %u\n", + eqe->event.bad_cable.port); + break; + case MLX4_RECOVERABLE_ERROR_EVENT_SUBTYPE_UNSUPPORTED_CABLE: + mlx4_warn(dev, "Unsupported cable detected\n"); + break; + default: + mlx4_dbg(dev, + "Unhandled recoverable error event detected: %02x(%02x) on EQ %d at index %u. owner=%x, nent=0x%x, ownership=%s\n", + eqe->type, eqe->subtype, eq->eqn, + eq->cons_index, eqe->owner, eq->nent, + !!(eqe->owner & 0x80) ^ + !!(eq->cons_index & eq->nent) ? "HW" : "SW"); + break; + } + break; + case MLX4_EVENT_TYPE_EEC_CATAS_ERROR: case MLX4_EVENT_TYPE_ECC_DETECT: default: @@ -837,12 +868,10 @@ static int mlx4_SW2HW_EQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox, MLX4_CMD_WRAPPED); } -static int mlx4_HW2SW_EQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox, - int eq_num) +static int mlx4_HW2SW_EQ(struct mlx4_dev *dev, int eq_num) { - return mlx4_cmd_box(dev, 0, mailbox->dma, eq_num, - 0, MLX4_CMD_HW2SW_EQ, MLX4_CMD_TIME_CLASS_A, - MLX4_CMD_WRAPPED); + return mlx4_cmd(dev, 0, eq_num, 1, MLX4_CMD_HW2SW_EQ, + MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); } static int mlx4_num_eq_uar(struct mlx4_dev *dev) @@ -865,7 +894,7 @@ static void __iomem *mlx4_get_eq_uar(struct mlx4_dev *dev, struct mlx4_eq *eq) if (!priv->eq_table.uar_map[index]) { priv->eq_table.uar_map[index] = - ioremap(pci_resource_start(dev->pdev, 2) + + ioremap(pci_resource_start(dev->persist->pdev, 2) + ((eq->eqn / 4) << PAGE_SHIFT), PAGE_SIZE); if (!priv->eq_table.uar_map[index]) { @@ -928,8 +957,10 @@ static int mlx4_create_eq(struct mlx4_dev *dev, int nent, eq_context = mailbox->buf; for (i = 0; i < npages; ++i) { - eq->page_list[i].buf = dma_alloc_coherent(&dev->pdev->dev, - PAGE_SIZE, &t, GFP_KERNEL); + eq->page_list[i].buf = dma_alloc_coherent(&dev->persist-> + pdev->dev, + PAGE_SIZE, &t, + GFP_KERNEL); if (!eq->page_list[i].buf) goto err_out_free_pages; @@ -995,7 +1026,7 @@ err_out_free_eq: err_out_free_pages: for (i = 0; i < npages; ++i) if (eq->page_list[i].buf) - dma_free_coherent(&dev->pdev->dev, PAGE_SIZE, + dma_free_coherent(&dev->persist->pdev->dev, PAGE_SIZE, eq->page_list[i].buf, eq->page_list[i].map); @@ -1013,7 +1044,6 @@ static void mlx4_free_eq(struct mlx4_dev *dev, struct mlx4_eq *eq) { struct mlx4_priv *priv = mlx4_priv(dev); - struct mlx4_cmd_mailbox *mailbox; int err; int i; /* CX3 is capable of extending the CQE/EQE from 32 to 64 bytes, with @@ -1021,36 +1051,21 @@ static void mlx4_free_eq(struct mlx4_dev *dev, */ int npages = PAGE_ALIGN(dev->caps.eqe_size * eq->nent) / PAGE_SIZE; - mailbox = mlx4_alloc_cmd_mailbox(dev); - if (IS_ERR(mailbox)) - return; - - err = mlx4_HW2SW_EQ(dev, mailbox, eq->eqn); + err = mlx4_HW2SW_EQ(dev, eq->eqn); if (err) mlx4_warn(dev, "HW2SW_EQ failed (%d)\n", err); - if (0) { - mlx4_dbg(dev, "Dumping EQ context %02x:\n", eq->eqn); - for (i = 0; i < sizeof (struct mlx4_eq_context) / 4; ++i) { - if (i % 4 == 0) - pr_cont("[%02x] ", i * 4); - pr_cont(" %08x", be32_to_cpup(mailbox->buf + i * 4)); - if ((i + 1) % 4 == 0) - pr_cont("\n"); - } - } synchronize_irq(eq->irq); tasklet_disable(&eq->tasklet_ctx.task); mlx4_mtt_cleanup(dev, &eq->mtt); for (i = 0; i < npages; ++i) - dma_free_coherent(&dev->pdev->dev, PAGE_SIZE, - eq->page_list[i].buf, - eq->page_list[i].map); + dma_free_coherent(&dev->persist->pdev->dev, PAGE_SIZE, + eq->page_list[i].buf, + eq->page_list[i].map); kfree(eq->page_list); mlx4_bitmap_free(&priv->eq_table.bitmap, eq->eqn, MLX4_USE_RR); - mlx4_free_cmd_mailbox(dev, mailbox); } static void mlx4_free_irqs(struct mlx4_dev *dev) @@ -1060,7 +1075,7 @@ static void mlx4_free_irqs(struct mlx4_dev *dev) int i, vec; if (eq_table->have_irq) - free_irq(dev->pdev->irq, dev); + free_irq(dev->persist->pdev->irq, dev); for (i = 0; i < dev->caps.num_comp_vectors + 1; ++i) if (eq_table->eq[i].have_irq) { @@ -1089,7 +1104,8 @@ static int mlx4_map_clr_int(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); - priv->clr_base = ioremap(pci_resource_start(dev->pdev, priv->fw.clr_int_bar) + + priv->clr_base = ioremap(pci_resource_start(dev->persist->pdev, + priv->fw.clr_int_bar) + priv->fw.clr_int_base, MLX4_CLR_INT_SIZE); if (!priv->clr_base) { mlx4_err(dev, "Couldn't map interrupt clear register, aborting\n"); @@ -1212,13 +1228,13 @@ int mlx4_init_eq_table(struct mlx4_dev *dev) i * MLX4_IRQNAME_SIZE, MLX4_IRQNAME_SIZE, "mlx4-comp-%d@pci:%s", i, - pci_name(dev->pdev)); + pci_name(dev->persist->pdev)); } else { snprintf(priv->eq_table.irq_names + i * MLX4_IRQNAME_SIZE, MLX4_IRQNAME_SIZE, "mlx4-async@pci:%s", - pci_name(dev->pdev)); + pci_name(dev->persist->pdev)); } eq_name = priv->eq_table.irq_names + @@ -1235,8 +1251,8 @@ int mlx4_init_eq_table(struct mlx4_dev *dev) snprintf(priv->eq_table.irq_names, MLX4_IRQNAME_SIZE, DRV_NAME "@pci:%s", - pci_name(dev->pdev)); - err = request_irq(dev->pdev->irq, mlx4_interrupt, + pci_name(dev->persist->pdev)); + err = request_irq(dev->persist->pdev->irq, mlx4_interrupt, IRQF_SHARED, priv->eq_table.irq_names, dev); if (err) goto err_out_async; diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index 982861d1df44..5a21e5dc94cb 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -84,13 +84,10 @@ static void dump_dev_cap_flags(struct mlx4_dev *dev, u64 flags) [ 1] = "UC transport", [ 2] = "UD transport", [ 3] = "XRC transport", - [ 4] = "reliable multicast", - [ 5] = "FCoIB support", [ 6] = "SRQ support", [ 7] = "IPoIB checksum offload", [ 8] = "P_Key violation counter", [ 9] = "Q_Key violation counter", - [10] = "VMM", [12] = "Dual Port Different Protocol (DPDP) support", [15] = "Big LSO headers", [16] = "MW support", @@ -99,12 +96,11 @@ static void dump_dev_cap_flags(struct mlx4_dev *dev, u64 flags) [19] = "Raw multicast support", [20] = "Address vector port checking support", [21] = "UD multicast support", - [24] = "Demand paging support", - [25] = "Router support", [30] = "IBoE support", [32] = "Unicast loopback support", [34] = "FCS header control", - [38] = "Wake On LAN support", + [37] = "Wake On LAN (port1) support", + [38] = "Wake On LAN (port2) support", [40] = "UDP RSS support", [41] = "Unicast VEP steering support", [42] = "Multicast VEP steering support", @@ -145,7 +141,9 @@ static void dump_dev_cap_flags2(struct mlx4_dev *dev, u64 flags) [16] = "CONFIG DEV support", [17] = "Asymmetric EQs support", [18] = "More than 80 VFs support", - [19] = "Performance optimized for limited rule configuration flow steering support" + [19] = "Performance optimized for limited rule configuration flow steering support", + [20] = "Recoverable error events support", + [21] = "Port Remap support" }; int i; @@ -259,6 +257,7 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave, #define QUERY_FUNC_CAP_MCG_QUOTA_OFFSET_DEP 0x28 #define QUERY_FUNC_CAP_MAX_EQ_OFFSET 0x2c #define QUERY_FUNC_CAP_RESERVED_EQ_OFFSET 0x30 +#define QUERY_FUNC_CAP_QP_RESD_LKEY_OFFSET 0x48 #define QUERY_FUNC_CAP_QP_QUOTA_OFFSET 0x50 #define QUERY_FUNC_CAP_CQ_QUOTA_OFFSET 0x54 @@ -273,6 +272,7 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave, #define QUERY_FUNC_CAP_FLAG_RDMA 0x40 #define QUERY_FUNC_CAP_FLAG_ETH 0x80 #define QUERY_FUNC_CAP_FLAG_QUOTAS 0x10 +#define QUERY_FUNC_CAP_FLAG_RESD_LKEY 0x08 #define QUERY_FUNC_CAP_FLAG_VALID_MAILBOX 0x04 #define QUERY_FUNC_CAP_EXTRA_FLAGS_BF_QP_ALLOC_FLAG (1UL << 31) @@ -344,9 +344,12 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave, } else if (vhcr->op_modifier == 0) { struct mlx4_active_ports actv_ports = mlx4_get_active_ports(dev, slave); - /* enable rdma and ethernet interfaces, and new quota locations */ + /* enable rdma and ethernet interfaces, new quota locations, + * and reserved lkey + */ field = (QUERY_FUNC_CAP_FLAG_ETH | QUERY_FUNC_CAP_FLAG_RDMA | - QUERY_FUNC_CAP_FLAG_QUOTAS | QUERY_FUNC_CAP_FLAG_VALID_MAILBOX); + QUERY_FUNC_CAP_FLAG_QUOTAS | QUERY_FUNC_CAP_FLAG_VALID_MAILBOX | + QUERY_FUNC_CAP_FLAG_RESD_LKEY); MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_FLAGS_OFFSET); field = min( @@ -411,6 +414,9 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave, size = QUERY_FUNC_CAP_EXTRA_FLAGS_BF_QP_ALLOC_FLAG | QUERY_FUNC_CAP_EXTRA_FLAGS_A0_QP_ALLOC_FLAG; MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_EXTRA_FLAGS_OFFSET); + + size = dev->caps.reserved_lkey + ((slave << 8) & 0xFF00); + MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_QP_RESD_LKEY_OFFSET); } else err = -EINVAL; @@ -503,6 +509,13 @@ int mlx4_QUERY_FUNC_CAP(struct mlx4_dev *dev, u8 gen_or_port, MLX4_GET(size, outbox, QUERY_FUNC_CAP_RESERVED_EQ_OFFSET); func_cap->reserved_eq = size & 0xFFFFFF; + if (func_cap->flags & QUERY_FUNC_CAP_FLAG_RESD_LKEY) { + MLX4_GET(size, outbox, QUERY_FUNC_CAP_QP_RESD_LKEY_OFFSET); + func_cap->reserved_lkey = size; + } else { + func_cap->reserved_lkey = 0; + } + func_cap->extra_flags = 0; /* Mailbox data from 0x6c and onward should only be treated if @@ -851,6 +864,8 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_EQE_STRIDE; MLX4_GET(dev_cap->bmme_flags, outbox, QUERY_DEV_CAP_BMME_FLAGS_OFFSET); + if (dev_cap->bmme_flags & MLX4_FLAG_PORT_REMAP) + dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_PORT_REMAP; MLX4_GET(field, outbox, QUERY_DEV_CAP_CONFIG_DEV_OFFSET); if (field & 0x20) dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_CONFIG_DEV; @@ -859,6 +874,8 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) MLX4_GET(field32, outbox, QUERY_DEV_CAP_ETH_BACKPL_OFFSET); if (field32 & (1 << 0)) dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_ETH_BACKPL_AN_REP; + if (field32 & (1 << 7)) + dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_RECOVERABLE_ERROR_EVENT; MLX4_GET(field, outbox, QUERY_DEV_CAP_FW_REASSIGN_MAC); if (field & 1<<6) dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_REASSIGN_MAC_EN; @@ -1106,9 +1123,10 @@ int mlx4_QUERY_DEV_CAP_wrapper(struct mlx4_dev *dev, int slave, field &= 0x7f; MLX4_PUT(outbox->buf, field, QUERY_DEV_CAP_BF_OFFSET); - /* For guests, disable mw type 2 */ + /* For guests, disable mw type 2 and port remap*/ MLX4_GET(bmme_flags, outbox->buf, QUERY_DEV_CAP_BMME_FLAGS_OFFSET); bmme_flags &= ~MLX4_BMME_FLAG_TYPE_2_WIN; + bmme_flags &= ~MLX4_FLAG_PORT_REMAP; MLX4_PUT(outbox->buf, bmme_flags, QUERY_DEV_CAP_BMME_FLAGS_OFFSET); /* turn off device-managed steering capability if not enabled */ @@ -1562,6 +1580,7 @@ int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param) #define INIT_HCA_VXLAN_OFFSET 0x0c #define INIT_HCA_CACHELINE_SZ_OFFSET 0x0e #define INIT_HCA_FLAGS_OFFSET 0x014 +#define INIT_HCA_RECOVERABLE_ERROR_EVENT_OFFSET 0x018 #define INIT_HCA_QPC_OFFSET 0x020 #define INIT_HCA_QPC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x10) #define INIT_HCA_LOG_QP_OFFSET (INIT_HCA_QPC_OFFSET + 0x17) @@ -1668,6 +1687,9 @@ int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param) dev->caps.userspace_caps |= MLX4_USER_DEV_CAP_LARGE_CQE; } + if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_RECOVERABLE_ERROR_EVENT) + *(inbox + INIT_HCA_RECOVERABLE_ERROR_EVENT_OFFSET / 4) |= cpu_to_be32(1 << 31); + /* QPC/EEC/CQC/EQC/RDMARC attributes */ MLX4_PUT(inbox, param->qpc_base, INIT_HCA_QPC_BASE_OFFSET); @@ -1752,8 +1774,8 @@ int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param) MLX4_PUT(inbox, parser_params, INIT_HCA_VXLAN_OFFSET); } - err = mlx4_cmd(dev, mailbox->dma, 0, 0, MLX4_CMD_INIT_HCA, 10000, - MLX4_CMD_NATIVE); + err = mlx4_cmd(dev, mailbox->dma, 0, 0, MLX4_CMD_INIT_HCA, + MLX4_CMD_TIME_CLASS_C, MLX4_CMD_NATIVE); if (err) mlx4_err(dev, "INIT_HCA returns %d\n", err); @@ -1879,6 +1901,36 @@ out: return err; } +static int mlx4_hca_core_clock_update(struct mlx4_dev *dev) +{ + struct mlx4_cmd_mailbox *mailbox; + __be32 *outbox; + int err; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) { + mlx4_warn(dev, "hca_core_clock mailbox allocation failed\n"); + return PTR_ERR(mailbox); + } + outbox = mailbox->buf; + + err = mlx4_cmd_box(dev, 0, mailbox->dma, 0, 0, + MLX4_CMD_QUERY_HCA, + MLX4_CMD_TIME_CLASS_B, + !mlx4_is_slave(dev)); + if (err) { + mlx4_warn(dev, "hca_core_clock update failed\n"); + goto out; + } + + MLX4_GET(dev->caps.hca_core_clock, outbox, QUERY_HCA_CORE_CLOCK_OFFSET); + +out: + mlx4_free_cmd_mailbox(dev, mailbox); + + return err; +} + /* for IB-type ports only in SRIOV mode. Checks that both proxy QP0 * and real QP0 are active, so that the paravirtualized QP0 is ready * to operate */ @@ -1983,6 +2035,9 @@ int mlx4_INIT_PORT(struct mlx4_dev *dev, int port) err = mlx4_cmd(dev, 0, port, 0, MLX4_CMD_INIT_PORT, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); + if (!err) + mlx4_hca_core_clock_update(dev); + return err; } EXPORT_SYMBOL_GPL(mlx4_INIT_PORT); @@ -2007,7 +2062,7 @@ int mlx4_CLOSE_PORT_wrapper(struct mlx4_dev *dev, int slave, if (dev->caps.port_mask[port] != MLX4_PORT_TYPE_IB) { if (priv->mfunc.master.init_port_ref[port] == 1) { err = mlx4_cmd(dev, 0, port, 0, MLX4_CMD_CLOSE_PORT, - 1000, MLX4_CMD_NATIVE); + MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); if (err) return err; } @@ -2018,7 +2073,7 @@ int mlx4_CLOSE_PORT_wrapper(struct mlx4_dev *dev, int slave, if (!priv->mfunc.master.qp0_state[port].qp0_active && priv->mfunc.master.qp0_state[port].port_active) { err = mlx4_cmd(dev, 0, port, 0, MLX4_CMD_CLOSE_PORT, - 1000, MLX4_CMD_NATIVE); + MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); if (err) return err; priv->mfunc.master.slave_state[slave].init_port_mask &= ~(1 << port); @@ -2033,15 +2088,15 @@ int mlx4_CLOSE_PORT_wrapper(struct mlx4_dev *dev, int slave, int mlx4_CLOSE_PORT(struct mlx4_dev *dev, int port) { - return mlx4_cmd(dev, 0, port, 0, MLX4_CMD_CLOSE_PORT, 1000, - MLX4_CMD_WRAPPED); + return mlx4_cmd(dev, 0, port, 0, MLX4_CMD_CLOSE_PORT, + MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); } EXPORT_SYMBOL_GPL(mlx4_CLOSE_PORT); int mlx4_CLOSE_HCA(struct mlx4_dev *dev, int panic) { - return mlx4_cmd(dev, 0, 0, panic, MLX4_CMD_CLOSE_HCA, 1000, - MLX4_CMD_NATIVE); + return mlx4_cmd(dev, 0, 0, panic, MLX4_CMD_CLOSE_HCA, + MLX4_CMD_TIME_CLASS_C, MLX4_CMD_NATIVE); } struct mlx4_config_dev { @@ -2049,13 +2104,16 @@ struct mlx4_config_dev { __be32 rsvd1[3]; __be16 vxlan_udp_dport; __be16 rsvd2; - __be32 rsvd3[27]; - __be16 rsvd4; - u8 rsvd5; + __be32 rsvd3; + __be32 roce_flags; + __be32 rsvd4[25]; + __be16 rsvd5; + u8 rsvd6; u8 rx_checksum_val; }; #define MLX4_VXLAN_UDP_DPORT (1 << 0) +#define MLX4_DISABLE_RX_PORT BIT(18) static int mlx4_CONFIG_DEV_set(struct mlx4_dev *dev, struct mlx4_config_dev *config_dev) { @@ -2111,7 +2169,7 @@ static const u8 config_dev_csum_flags[] = { int mlx4_config_dev_retrieval(struct mlx4_dev *dev, struct mlx4_config_dev_params *params) { - struct mlx4_config_dev config_dev; + struct mlx4_config_dev config_dev = {0}; int err; u8 csum_mask; @@ -2158,6 +2216,45 @@ int mlx4_config_vxlan_port(struct mlx4_dev *dev, __be16 udp_port) } EXPORT_SYMBOL_GPL(mlx4_config_vxlan_port); +#define CONFIG_DISABLE_RX_PORT BIT(15) +int mlx4_disable_rx_port_check(struct mlx4_dev *dev, bool dis) +{ + struct mlx4_config_dev config_dev; + + memset(&config_dev, 0, sizeof(config_dev)); + config_dev.update_flags = cpu_to_be32(MLX4_DISABLE_RX_PORT); + if (dis) + config_dev.roce_flags = + cpu_to_be32(CONFIG_DISABLE_RX_PORT); + + return mlx4_CONFIG_DEV_set(dev, &config_dev); +} + +int mlx4_virt2phy_port_map(struct mlx4_dev *dev, u32 port1, u32 port2) +{ + struct mlx4_cmd_mailbox *mailbox; + struct { + __be32 v_port1; + __be32 v_port2; + } *v2p; + int err; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return -ENOMEM; + + v2p = mailbox->buf; + v2p->v_port1 = cpu_to_be32(port1); + v2p->v_port2 = cpu_to_be32(port2); + + err = mlx4_cmd(dev, mailbox->dma, 0, + MLX4_SET_PORT_VIRT2PHY, MLX4_CMD_VIRT_PORT_MAP, + MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE); + + mlx4_free_cmd_mailbox(dev, mailbox); + return err; +} + int mlx4_SET_ICM_SIZE(struct mlx4_dev *dev, u64 icm_size, u64 *aux_pages) { @@ -2180,7 +2277,8 @@ int mlx4_SET_ICM_SIZE(struct mlx4_dev *dev, u64 icm_size, u64 *aux_pages) int mlx4_NOP(struct mlx4_dev *dev) { /* Input modifier of 0x1f means "finish as soon as possible." */ - return mlx4_cmd(dev, 0, 0x1f, 0, MLX4_CMD_NOP, 100, MLX4_CMD_NATIVE); + return mlx4_cmd(dev, 0, 0x1f, 0, MLX4_CMD_NOP, MLX4_CMD_TIME_CLASS_A, + MLX4_CMD_NATIVE); } int mlx4_get_phys_port_id(struct mlx4_dev *dev) diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.h b/drivers/net/ethernet/mellanox/mlx4/fw.h index 62562b60fa87..f44f7f6017ed 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.h +++ b/drivers/net/ethernet/mellanox/mlx4/fw.h @@ -147,6 +147,7 @@ struct mlx4_func_cap { u32 qp0_proxy_qpn; u32 qp1_tunnel_qpn; u32 qp1_proxy_qpn; + u32 reserved_lkey; u8 physical_port; u8 port_flags; u8 flags1; diff --git a/drivers/net/ethernet/mellanox/mlx4/icm.c b/drivers/net/ethernet/mellanox/mlx4/icm.c index 97c9b1db1d27..2a9dd460a95f 100644 --- a/drivers/net/ethernet/mellanox/mlx4/icm.c +++ b/drivers/net/ethernet/mellanox/mlx4/icm.c @@ -56,7 +56,7 @@ static void mlx4_free_icm_pages(struct mlx4_dev *dev, struct mlx4_icm_chunk *chu int i; if (chunk->nsg > 0) - pci_unmap_sg(dev->pdev, chunk->mem, chunk->npages, + pci_unmap_sg(dev->persist->pdev, chunk->mem, chunk->npages, PCI_DMA_BIDIRECTIONAL); for (i = 0; i < chunk->npages; ++i) @@ -69,7 +69,8 @@ static void mlx4_free_icm_coherent(struct mlx4_dev *dev, struct mlx4_icm_chunk * int i; for (i = 0; i < chunk->npages; ++i) - dma_free_coherent(&dev->pdev->dev, chunk->mem[i].length, + dma_free_coherent(&dev->persist->pdev->dev, + chunk->mem[i].length, lowmem_page_address(sg_page(&chunk->mem[i])), sg_dma_address(&chunk->mem[i])); } @@ -173,7 +174,7 @@ struct mlx4_icm *mlx4_alloc_icm(struct mlx4_dev *dev, int npages, --cur_order; if (coherent) - ret = mlx4_alloc_icm_coherent(&dev->pdev->dev, + ret = mlx4_alloc_icm_coherent(&dev->persist->pdev->dev, &chunk->mem[chunk->npages], cur_order, gfp_mask); else @@ -193,7 +194,7 @@ struct mlx4_icm *mlx4_alloc_icm(struct mlx4_dev *dev, int npages, if (coherent) ++chunk->nsg; else if (chunk->npages == MLX4_ICM_CHUNK_LEN) { - chunk->nsg = pci_map_sg(dev->pdev, chunk->mem, + chunk->nsg = pci_map_sg(dev->persist->pdev, chunk->mem, chunk->npages, PCI_DMA_BIDIRECTIONAL); @@ -208,7 +209,7 @@ struct mlx4_icm *mlx4_alloc_icm(struct mlx4_dev *dev, int npages, } if (!coherent && chunk) { - chunk->nsg = pci_map_sg(dev->pdev, chunk->mem, + chunk->nsg = pci_map_sg(dev->persist->pdev, chunk->mem, chunk->npages, PCI_DMA_BIDIRECTIONAL); diff --git a/drivers/net/ethernet/mellanox/mlx4/intf.c b/drivers/net/ethernet/mellanox/mlx4/intf.c index 116895ac8b35..6fce58718837 100644 --- a/drivers/net/ethernet/mellanox/mlx4/intf.c +++ b/drivers/net/ethernet/mellanox/mlx4/intf.c @@ -33,11 +33,13 @@ #include <linux/slab.h> #include <linux/export.h> +#include <linux/errno.h> #include "mlx4.h" struct mlx4_device_context { struct list_head list; + struct list_head bond_list; struct mlx4_interface *intf; void *context; }; @@ -115,6 +117,58 @@ void mlx4_unregister_interface(struct mlx4_interface *intf) } EXPORT_SYMBOL_GPL(mlx4_unregister_interface); +int mlx4_do_bond(struct mlx4_dev *dev, bool enable) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_device_context *dev_ctx = NULL, *temp_dev_ctx; + unsigned long flags; + int ret; + LIST_HEAD(bond_list); + + if (!(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_PORT_REMAP)) + return -ENOTSUPP; + + ret = mlx4_disable_rx_port_check(dev, enable); + if (ret) { + mlx4_err(dev, "Fail to %s rx port check\n", + enable ? "enable" : "disable"); + return ret; + } + if (enable) { + dev->flags |= MLX4_FLAG_BONDED; + } else { + ret = mlx4_virt2phy_port_map(dev, 1, 2); + if (ret) { + mlx4_err(dev, "Fail to reset port map\n"); + return ret; + } + dev->flags &= ~MLX4_FLAG_BONDED; + } + + spin_lock_irqsave(&priv->ctx_lock, flags); + list_for_each_entry_safe(dev_ctx, temp_dev_ctx, &priv->ctx_list, list) { + if (dev_ctx->intf->flags & MLX4_INTFF_BONDING) { + list_add_tail(&dev_ctx->bond_list, &bond_list); + list_del(&dev_ctx->list); + } + } + spin_unlock_irqrestore(&priv->ctx_lock, flags); + + list_for_each_entry(dev_ctx, &bond_list, bond_list) { + dev_ctx->intf->remove(dev, dev_ctx->context); + dev_ctx->context = dev_ctx->intf->add(dev); + + spin_lock_irqsave(&priv->ctx_lock, flags); + list_add_tail(&dev_ctx->list, &priv->ctx_list); + spin_unlock_irqrestore(&priv->ctx_lock, flags); + + mlx4_dbg(dev, "Inrerface for protocol %d restarted with when bonded mode is %s\n", + dev_ctx->intf->protocol, enable ? + "enabled" : "disabled"); + } + return 0; +} + void mlx4_dispatch_event(struct mlx4_dev *dev, enum mlx4_dev_event type, unsigned long param) { @@ -138,13 +192,13 @@ int mlx4_register_device(struct mlx4_dev *dev) mutex_lock(&intf_mutex); + dev->persist->interface_state |= MLX4_INTERFACE_STATE_UP; list_add_tail(&priv->dev_list, &dev_list); list_for_each_entry(intf, &intf_list, list) mlx4_add_device(intf, priv); mutex_unlock(&intf_mutex); - if (!mlx4_is_slave(dev)) - mlx4_start_catas_poll(dev); + mlx4_start_catas_poll(dev); return 0; } @@ -154,14 +208,14 @@ void mlx4_unregister_device(struct mlx4_dev *dev) struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_interface *intf; - if (!mlx4_is_slave(dev)) - mlx4_stop_catas_poll(dev); + mlx4_stop_catas_poll(dev); mutex_lock(&intf_mutex); list_for_each_entry(intf, &intf_list, list) mlx4_remove_device(intf, priv); list_del(&priv->dev_list); + dev->persist->interface_state &= ~MLX4_INTERFACE_STATE_UP; mutex_unlock(&intf_mutex); } diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 03e9eb0dc761..7e487223489a 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -108,6 +108,8 @@ MODULE_PARM_DESC(enable_64b_cqe_eqe, MLX4_FUNC_CAP_EQE_CQE_STRIDE | \ MLX4_FUNC_CAP_DMFS_A0_STATIC) +#define RESET_PERSIST_MASK_FLAGS (MLX4_FLAG_SRIOV) + static char mlx4_version[] = DRV_NAME ": Mellanox ConnectX core driver v" DRV_VERSION " (" DRV_RELDATE ")\n"; @@ -249,7 +251,8 @@ static void mlx4_enable_cqe_eqe_stride(struct mlx4_dev *dev) if (mlx4_is_master(dev)) dev_cap->function_caps |= MLX4_FUNC_CAP_EQE_CQE_STRIDE; } else { - mlx4_dbg(dev, "Disabling CQE stride cacheLine unsupported\n"); + if (cache_line_size() != 32 && cache_line_size() != 64) + mlx4_dbg(dev, "Disabling CQE stride, cacheLine size unsupported\n"); dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_CQE_STRIDE; dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_EQE_STRIDE; } @@ -318,10 +321,11 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) return -ENODEV; } - if (dev_cap->uar_size > pci_resource_len(dev->pdev, 2)) { + if (dev_cap->uar_size > pci_resource_len(dev->persist->pdev, 2)) { mlx4_err(dev, "HCA reported UAR size of 0x%x bigger than PCI resource 2 size of 0x%llx, aborting\n", dev_cap->uar_size, - (unsigned long long) pci_resource_len(dev->pdev, 2)); + (unsigned long long) + pci_resource_len(dev->persist->pdev, 2)); return -ENODEV; } @@ -541,8 +545,10 @@ static int mlx4_get_pcie_dev_link_caps(struct mlx4_dev *dev, *speed = PCI_SPEED_UNKNOWN; *width = PCIE_LNK_WIDTH_UNKNOWN; - err1 = pcie_capability_read_dword(dev->pdev, PCI_EXP_LNKCAP, &lnkcap1); - err2 = pcie_capability_read_dword(dev->pdev, PCI_EXP_LNKCAP2, &lnkcap2); + err1 = pcie_capability_read_dword(dev->persist->pdev, PCI_EXP_LNKCAP, + &lnkcap1); + err2 = pcie_capability_read_dword(dev->persist->pdev, PCI_EXP_LNKCAP2, + &lnkcap2); if (!err2 && lnkcap2) { /* PCIe r3.0-compliant */ if (lnkcap2 & PCI_EXP_LNKCAP2_SLS_8_0GB) *speed = PCIE_SPEED_8_0GT; @@ -587,7 +593,7 @@ static void mlx4_check_pcie_caps(struct mlx4_dev *dev) return; } - err = pcie_get_minimum_link(dev->pdev, &speed, &width); + err = pcie_get_minimum_link(dev->persist->pdev, &speed, &width); if (err || speed == PCI_SPEED_UNKNOWN || width == PCIE_LNK_WIDTH_UNKNOWN) { mlx4_warn(dev, @@ -792,6 +798,7 @@ static int mlx4_slave_cap(struct mlx4_dev *dev) dev->caps.num_mpts = 1 << hca_param.log_mpt_sz; dev->caps.num_eqs = func_cap.max_eq; dev->caps.reserved_eqs = func_cap.reserved_eq; + dev->caps.reserved_lkey = func_cap.reserved_lkey; dev->caps.num_pds = MLX4_NUM_PDS; dev->caps.num_mgms = 0; dev->caps.num_amgms = 0; @@ -837,10 +844,12 @@ static int mlx4_slave_cap(struct mlx4_dev *dev) if (dev->caps.uar_page_size * (dev->caps.num_uars - dev->caps.reserved_uars) > - pci_resource_len(dev->pdev, 2)) { + pci_resource_len(dev->persist->pdev, + 2)) { mlx4_err(dev, "HCA reported UAR region size of 0x%x bigger than PCI resource 2 size of 0x%llx, aborting\n", dev->caps.uar_page_size * dev->caps.num_uars, - (unsigned long long) pci_resource_len(dev->pdev, 2)); + (unsigned long long) + pci_resource_len(dev->persist->pdev, 2)); goto err_mem; } @@ -1152,6 +1161,91 @@ err_set_port: return err ? err : count; } +int mlx4_bond(struct mlx4_dev *dev) +{ + int ret = 0; + struct mlx4_priv *priv = mlx4_priv(dev); + + mutex_lock(&priv->bond_mutex); + + if (!mlx4_is_bonded(dev)) + ret = mlx4_do_bond(dev, true); + else + ret = 0; + + mutex_unlock(&priv->bond_mutex); + if (ret) + mlx4_err(dev, "Failed to bond device: %d\n", ret); + else + mlx4_dbg(dev, "Device is bonded\n"); + return ret; +} +EXPORT_SYMBOL_GPL(mlx4_bond); + +int mlx4_unbond(struct mlx4_dev *dev) +{ + int ret = 0; + struct mlx4_priv *priv = mlx4_priv(dev); + + mutex_lock(&priv->bond_mutex); + + if (mlx4_is_bonded(dev)) + ret = mlx4_do_bond(dev, false); + + mutex_unlock(&priv->bond_mutex); + if (ret) + mlx4_err(dev, "Failed to unbond device: %d\n", ret); + else + mlx4_dbg(dev, "Device is unbonded\n"); + return ret; +} +EXPORT_SYMBOL_GPL(mlx4_unbond); + + +int mlx4_port_map_set(struct mlx4_dev *dev, struct mlx4_port_map *v2p) +{ + u8 port1 = v2p->port1; + u8 port2 = v2p->port2; + struct mlx4_priv *priv = mlx4_priv(dev); + int err; + + if (!(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_PORT_REMAP)) + return -ENOTSUPP; + + mutex_lock(&priv->bond_mutex); + + /* zero means keep current mapping for this port */ + if (port1 == 0) + port1 = priv->v2p.port1; + if (port2 == 0) + port2 = priv->v2p.port2; + + if ((port1 < 1) || (port1 > MLX4_MAX_PORTS) || + (port2 < 1) || (port2 > MLX4_MAX_PORTS) || + (port1 == 2 && port2 == 1)) { + /* besides boundary checks cross mapping makes + * no sense and therefore not allowed */ + err = -EINVAL; + } else if ((port1 == priv->v2p.port1) && + (port2 == priv->v2p.port2)) { + err = 0; + } else { + err = mlx4_virt2phy_port_map(dev, port1, port2); + if (!err) { + mlx4_dbg(dev, "port map changed: [%d][%d]\n", + port1, port2); + priv->v2p.port1 = port1; + priv->v2p.port2 = port2; + } else { + mlx4_err(dev, "Failed to change port mape: %d\n", err); + } + } + + mutex_unlock(&priv->bond_mutex); + return err; +} +EXPORT_SYMBOL_GPL(mlx4_port_map_set); + static int mlx4_load_fw(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); @@ -1477,7 +1571,8 @@ static void mlx4_slave_exit(struct mlx4_dev *dev) struct mlx4_priv *priv = mlx4_priv(dev); mutex_lock(&priv->cmd.slave_cmd_mutex); - if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, MLX4_COMM_TIME)) + if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, MLX4_COMM_CMD_NA_OP, + MLX4_COMM_TIME)) mlx4_warn(dev, "Failed to close slave function\n"); mutex_unlock(&priv->cmd.slave_cmd_mutex); } @@ -1492,9 +1587,9 @@ static int map_bf_area(struct mlx4_dev *dev) if (!dev->caps.bf_reg_size) return -ENXIO; - bf_start = pci_resource_start(dev->pdev, 2) + + bf_start = pci_resource_start(dev->persist->pdev, 2) + (dev->caps.num_uars << PAGE_SHIFT); - bf_len = pci_resource_len(dev->pdev, 2) - + bf_len = pci_resource_len(dev->persist->pdev, 2) - (dev->caps.num_uars << PAGE_SHIFT); priv->bf_mapping = io_mapping_create_wc(bf_start, bf_len); if (!priv->bf_mapping) @@ -1536,7 +1631,8 @@ static int map_internal_clock(struct mlx4_dev *dev) struct mlx4_priv *priv = mlx4_priv(dev); priv->clock_mapping = - ioremap(pci_resource_start(dev->pdev, priv->fw.clock_bar) + + ioremap(pci_resource_start(dev->persist->pdev, + priv->fw.clock_bar) + priv->fw.clock_offset, MLX4_CLOCK_SIZE); if (!priv->clock_mapping) @@ -1573,6 +1669,50 @@ static void mlx4_close_fw(struct mlx4_dev *dev) } } +static int mlx4_comm_check_offline(struct mlx4_dev *dev) +{ +#define COMM_CHAN_OFFLINE_OFFSET 0x09 + + u32 comm_flags; + u32 offline_bit; + unsigned long end; + struct mlx4_priv *priv = mlx4_priv(dev); + + end = msecs_to_jiffies(MLX4_COMM_OFFLINE_TIME_OUT) + jiffies; + while (time_before(jiffies, end)) { + comm_flags = swab32(readl((__iomem char *)priv->mfunc.comm + + MLX4_COMM_CHAN_FLAGS)); + offline_bit = (comm_flags & + (u32)(1 << COMM_CHAN_OFFLINE_OFFSET)); + if (!offline_bit) + return 0; + /* There are cases as part of AER/Reset flow that PF needs + * around 100 msec to load. We therefore sleep for 100 msec + * to allow other tasks to make use of that CPU during this + * time interval. + */ + msleep(100); + } + mlx4_err(dev, "Communication channel is offline.\n"); + return -EIO; +} + +static void mlx4_reset_vf_support(struct mlx4_dev *dev) +{ +#define COMM_CHAN_RST_OFFSET 0x1e + + struct mlx4_priv *priv = mlx4_priv(dev); + u32 comm_rst; + u32 comm_caps; + + comm_caps = swab32(readl((__iomem char *)priv->mfunc.comm + + MLX4_COMM_CHAN_CAPS)); + comm_rst = (comm_caps & (u32)(1 << COMM_CHAN_RST_OFFSET)); + + if (comm_rst) + dev->caps.vf_caps |= MLX4_VF_CAP_FLAG_RESET; +} + static int mlx4_init_slave(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); @@ -1588,9 +1728,15 @@ static int mlx4_init_slave(struct mlx4_dev *dev) mutex_lock(&priv->cmd.slave_cmd_mutex); priv->cmd.max_cmds = 1; + if (mlx4_comm_check_offline(dev)) { + mlx4_err(dev, "PF is not responsive, skipping initialization\n"); + goto err_offline; + } + + mlx4_reset_vf_support(dev); mlx4_warn(dev, "Sending reset\n"); ret_from_reset = mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, - MLX4_COMM_TIME); + MLX4_COMM_CMD_NA_OP, MLX4_COMM_TIME); /* if we are in the middle of flr the slave will try * NUM_OF_RESET_RETRIES times before leaving.*/ if (ret_from_reset) { @@ -1615,22 +1761,24 @@ static int mlx4_init_slave(struct mlx4_dev *dev) mlx4_warn(dev, "Sending vhcr0\n"); if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR0, dma >> 48, - MLX4_COMM_TIME)) + MLX4_COMM_CMD_NA_OP, MLX4_COMM_TIME)) goto err; if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR1, dma >> 32, - MLX4_COMM_TIME)) + MLX4_COMM_CMD_NA_OP, MLX4_COMM_TIME)) goto err; if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR2, dma >> 16, - MLX4_COMM_TIME)) + MLX4_COMM_CMD_NA_OP, MLX4_COMM_TIME)) goto err; - if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR_EN, dma, MLX4_COMM_TIME)) + if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR_EN, dma, + MLX4_COMM_CMD_NA_OP, MLX4_COMM_TIME)) goto err; mutex_unlock(&priv->cmd.slave_cmd_mutex); return 0; err: - mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, 0); + mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, MLX4_COMM_CMD_NA_OP, 0); +err_offline: mutex_unlock(&priv->cmd.slave_cmd_mutex); return -EIO; } @@ -1705,7 +1853,8 @@ static void choose_steering_mode(struct mlx4_dev *dev, if (mlx4_log_num_mgm_entry_size <= 0 && dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_FS_EN && (!mlx4_is_mfunc(dev) || - (dev_cap->fs_max_num_qp_per_entry >= (dev->num_vfs + 1))) && + (dev_cap->fs_max_num_qp_per_entry >= + (dev->persist->num_vfs + 1))) && choose_log_fs_mgm_entry_size(dev_cap->fs_max_num_qp_per_entry) >= MLX4_MIN_MGM_LOG_ENTRY_SIZE) { dev->oper_log_mgm_entry_size = @@ -1744,8 +1893,7 @@ static void choose_tunnel_offload_mode(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) { if (dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED && - dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_VXLAN_OFFLOADS && - dev->caps.dmfs_high_steer_mode != MLX4_STEERING_DMFS_A0_STATIC) + dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_VXLAN_OFFLOADS) dev->caps.tunnel_offload_mode = MLX4_TUNNEL_OFFLOAD_MODE_VXLAN; else dev->caps.tunnel_offload_mode = MLX4_TUNNEL_OFFLOAD_MODE_NONE; @@ -2288,7 +2436,8 @@ static void mlx4_enable_msi_x(struct mlx4_dev *dev) for (i = 0; i < nreq; ++i) entries[i].entry = i; - nreq = pci_enable_msix_range(dev->pdev, entries, 2, nreq); + nreq = pci_enable_msix_range(dev->persist->pdev, entries, 2, + nreq); if (nreq < 0) { kfree(entries); @@ -2316,7 +2465,7 @@ no_msi: dev->caps.comp_pool = 0; for (i = 0; i < 2; ++i) - priv->eq_table.eq[i].irq = dev->pdev->irq; + priv->eq_table.eq[i].irq = dev->persist->pdev->irq; } static int mlx4_init_port_info(struct mlx4_dev *dev, int port) @@ -2344,7 +2493,7 @@ static int mlx4_init_port_info(struct mlx4_dev *dev, int port) info->port_attr.show = show_port_type; sysfs_attr_init(&info->port_attr.attr); - err = device_create_file(&dev->pdev->dev, &info->port_attr); + err = device_create_file(&dev->persist->pdev->dev, &info->port_attr); if (err) { mlx4_err(dev, "Failed to create file for port %d\n", port); info->port = -1; @@ -2361,10 +2510,12 @@ static int mlx4_init_port_info(struct mlx4_dev *dev, int port) info->port_mtu_attr.show = show_port_ib_mtu; sysfs_attr_init(&info->port_mtu_attr.attr); - err = device_create_file(&dev->pdev->dev, &info->port_mtu_attr); + err = device_create_file(&dev->persist->pdev->dev, + &info->port_mtu_attr); if (err) { mlx4_err(dev, "Failed to create mtu file for port %d\n", port); - device_remove_file(&info->dev->pdev->dev, &info->port_attr); + device_remove_file(&info->dev->persist->pdev->dev, + &info->port_attr); info->port = -1; } @@ -2376,8 +2527,9 @@ static void mlx4_cleanup_port_info(struct mlx4_port_info *info) if (info->port < 0) return; - device_remove_file(&info->dev->pdev->dev, &info->port_attr); - device_remove_file(&info->dev->pdev->dev, &info->port_mtu_attr); + device_remove_file(&info->dev->persist->pdev->dev, &info->port_attr); + device_remove_file(&info->dev->persist->pdev->dev, + &info->port_mtu_attr); } static int mlx4_init_steering(struct mlx4_dev *dev) @@ -2444,10 +2596,11 @@ static int mlx4_get_ownership(struct mlx4_dev *dev) void __iomem *owner; u32 ret; - if (pci_channel_offline(dev->pdev)) + if (pci_channel_offline(dev->persist->pdev)) return -EIO; - owner = ioremap(pci_resource_start(dev->pdev, 0) + MLX4_OWNER_BASE, + owner = ioremap(pci_resource_start(dev->persist->pdev, 0) + + MLX4_OWNER_BASE, MLX4_OWNER_SIZE); if (!owner) { mlx4_err(dev, "Failed to obtain ownership bit\n"); @@ -2463,10 +2616,11 @@ static void mlx4_free_ownership(struct mlx4_dev *dev) { void __iomem *owner; - if (pci_channel_offline(dev->pdev)) + if (pci_channel_offline(dev->persist->pdev)) return; - owner = ioremap(pci_resource_start(dev->pdev, 0) + MLX4_OWNER_BASE, + owner = ioremap(pci_resource_start(dev->persist->pdev, 0) + + MLX4_OWNER_BASE, MLX4_OWNER_SIZE); if (!owner) { mlx4_err(dev, "Failed to obtain ownership bit\n"); @@ -2481,11 +2635,19 @@ static void mlx4_free_ownership(struct mlx4_dev *dev) !!((flags) & MLX4_FLAG_MASTER)) static u64 mlx4_enable_sriov(struct mlx4_dev *dev, struct pci_dev *pdev, - u8 total_vfs, int existing_vfs) + u8 total_vfs, int existing_vfs, int reset_flow) { u64 dev_flags = dev->flags; int err = 0; + if (reset_flow) { + dev->dev_vfs = kcalloc(total_vfs, sizeof(*dev->dev_vfs), + GFP_KERNEL); + if (!dev->dev_vfs) + goto free_mem; + return dev_flags; + } + atomic_inc(&pf_loading); if (dev->flags & MLX4_FLAG_SRIOV) { if (existing_vfs != total_vfs) { @@ -2514,13 +2676,14 @@ static u64 mlx4_enable_sriov(struct mlx4_dev *dev, struct pci_dev *pdev, dev_flags |= MLX4_FLAG_SRIOV | MLX4_FLAG_MASTER; dev_flags &= ~MLX4_FLAG_SLAVE; - dev->num_vfs = total_vfs; + dev->persist->num_vfs = total_vfs; } return dev_flags; disable_sriov: atomic_dec(&pf_loading); - dev->num_vfs = 0; +free_mem: + dev->persist->num_vfs = 0; kfree(dev->dev_vfs); return dev_flags & ~MLX4_FLAG_MASTER; } @@ -2544,7 +2707,8 @@ static int mlx4_check_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap } static int mlx4_load_one(struct pci_dev *pdev, int pci_dev_data, - int total_vfs, int *nvfs, struct mlx4_priv *priv) + int total_vfs, int *nvfs, struct mlx4_priv *priv, + int reset_flow) { struct mlx4_dev *dev; unsigned sum = 0; @@ -2560,6 +2724,7 @@ static int mlx4_load_one(struct pci_dev *pdev, int pci_dev_data, spin_lock_init(&priv->ctx_lock); mutex_init(&priv->port_mutex); + mutex_init(&priv->bond_mutex); INIT_LIST_HEAD(&priv->pgdir_list); mutex_init(&priv->pgdir_mutex); @@ -2607,10 +2772,15 @@ static int mlx4_load_one(struct pci_dev *pdev, int pci_dev_data, existing_vfs = pci_num_vf(pdev); if (existing_vfs) dev->flags |= MLX4_FLAG_SRIOV; - dev->num_vfs = total_vfs; + dev->persist->num_vfs = total_vfs; } } + /* on load remove any previous indication of internal error, + * device is up. + */ + dev->persist->state = MLX4_DEVICE_STATE_UP; + slave_start: err = mlx4_cmd_init(dev); if (err) { @@ -2661,8 +2831,10 @@ slave_start: goto err_fw; if (!(dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS)) { - u64 dev_flags = mlx4_enable_sriov(dev, pdev, total_vfs, - existing_vfs); + u64 dev_flags = mlx4_enable_sriov(dev, pdev, + total_vfs, + existing_vfs, + reset_flow); mlx4_cmd_cleanup(dev, MLX4_CMD_CLEANUP_ALL); dev->flags = dev_flags; @@ -2704,7 +2876,7 @@ slave_start: if (dev->flags & MLX4_FLAG_SRIOV) { if (!existing_vfs) pci_disable_sriov(pdev); - if (mlx4_is_master(dev)) + if (mlx4_is_master(dev) && !reset_flow) atomic_dec(&pf_loading); dev->flags &= ~MLX4_FLAG_SRIOV; } @@ -2718,7 +2890,8 @@ slave_start: } if (mlx4_is_master(dev) && (dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS)) { - u64 dev_flags = mlx4_enable_sriov(dev, pdev, total_vfs, existing_vfs); + u64 dev_flags = mlx4_enable_sriov(dev, pdev, total_vfs, + existing_vfs, reset_flow); if ((dev->flags ^ dev_flags) & (MLX4_FLAG_MASTER | MLX4_FLAG_SLAVE)) { mlx4_cmd_cleanup(dev, MLX4_CMD_CLEANUP_VHCR); @@ -2771,12 +2944,14 @@ slave_start: dev->caps.num_ports); goto err_close; } - memcpy(dev->nvfs, nvfs, sizeof(dev->nvfs)); + memcpy(dev->persist->nvfs, nvfs, sizeof(dev->persist->nvfs)); - for (i = 0; i < sizeof(dev->nvfs)/sizeof(dev->nvfs[0]); i++) { + for (i = 0; + i < sizeof(dev->persist->nvfs)/ + sizeof(dev->persist->nvfs[0]); i++) { unsigned j; - for (j = 0; j < dev->nvfs[i]; ++sum, ++j) { + for (j = 0; j < dev->persist->nvfs[i]; ++sum, ++j) { dev->dev_vfs[sum].min_port = i < 2 ? i + 1 : 1; dev->dev_vfs[sum].n_ports = i < 2 ? 1 : dev->caps.num_ports; @@ -2828,6 +3003,17 @@ slave_start: goto err_steer; mlx4_init_quotas(dev); + /* When PF resources are ready arm its comm channel to enable + * getting commands + */ + if (mlx4_is_master(dev)) { + err = mlx4_ARM_COMM_CHANNEL(dev); + if (err) { + mlx4_err(dev, " Failed to arm comm channel eq: %x\n", + err); + goto err_steer; + } + } for (port = 1; port <= dev->caps.num_ports; port++) { err = mlx4_init_port_info(dev, port); @@ -2835,6 +3021,9 @@ slave_start: goto err_port; } + priv->v2p.port1 = 1; + priv->v2p.port2 = 2; + err = mlx4_register_device(dev); if (err) goto err_port; @@ -2846,7 +3035,7 @@ slave_start: priv->removed = 0; - if (mlx4_is_master(dev) && dev->num_vfs) + if (mlx4_is_master(dev) && dev->persist->num_vfs && !reset_flow) atomic_dec(&pf_loading); kfree(dev_cap); @@ -2880,8 +3069,10 @@ err_free_eq: mlx4_free_eq_table(dev); err_master_mfunc: - if (mlx4_is_master(dev)) + if (mlx4_is_master(dev)) { + mlx4_free_resource_tracker(dev, RES_TR_FREE_STRUCTS_ONLY); mlx4_multi_func_cleanup(dev); + } if (mlx4_is_slave(dev)) { kfree(dev->caps.qp0_qkey); @@ -2905,10 +3096,12 @@ err_cmd: mlx4_cmd_cleanup(dev, MLX4_CMD_CLEANUP_ALL); err_sriov: - if (dev->flags & MLX4_FLAG_SRIOV && !existing_vfs) + if (dev->flags & MLX4_FLAG_SRIOV && !existing_vfs) { pci_disable_sriov(pdev); + dev->flags &= ~MLX4_FLAG_SRIOV; + } - if (mlx4_is_master(dev) && dev->num_vfs) + if (mlx4_is_master(dev) && dev->persist->num_vfs && !reset_flow) atomic_dec(&pf_loading); kfree(priv->dev.dev_vfs); @@ -3049,11 +3242,19 @@ static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data, } } - err = mlx4_load_one(pdev, pci_dev_data, total_vfs, nvfs, priv); + err = mlx4_catas_init(&priv->dev); if (err) goto err_release_regions; + + err = mlx4_load_one(pdev, pci_dev_data, total_vfs, nvfs, priv, 0); + if (err) + goto err_catas; + return 0; +err_catas: + mlx4_catas_end(&priv->dev); + err_release_regions: pci_release_regions(pdev); @@ -3076,38 +3277,60 @@ static int mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id) return -ENOMEM; dev = &priv->dev; - dev->pdev = pdev; - pci_set_drvdata(pdev, dev); + dev->persist = kzalloc(sizeof(*dev->persist), GFP_KERNEL); + if (!dev->persist) { + kfree(priv); + return -ENOMEM; + } + dev->persist->pdev = pdev; + dev->persist->dev = dev; + pci_set_drvdata(pdev, dev->persist); priv->pci_dev_data = id->driver_data; + mutex_init(&dev->persist->device_state_mutex); + mutex_init(&dev->persist->interface_state_mutex); ret = __mlx4_init_one(pdev, id->driver_data, priv); - if (ret) + if (ret) { + kfree(dev->persist); kfree(priv); + } else { + pci_save_state(pdev); + } return ret; } +static void mlx4_clean_dev(struct mlx4_dev *dev) +{ + struct mlx4_dev_persistent *persist = dev->persist; + struct mlx4_priv *priv = mlx4_priv(dev); + unsigned long flags = (dev->flags & RESET_PERSIST_MASK_FLAGS); + + memset(priv, 0, sizeof(*priv)); + priv->dev.persist = persist; + priv->dev.flags = flags; +} + static void mlx4_unload_one(struct pci_dev *pdev) { - struct mlx4_dev *dev = pci_get_drvdata(pdev); + struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev); + struct mlx4_dev *dev = persist->dev; struct mlx4_priv *priv = mlx4_priv(dev); int pci_dev_data; - int p; - int active_vfs = 0; + int p, i; if (priv->removed) return; + /* saving current ports type for further use */ + for (i = 0; i < dev->caps.num_ports; i++) { + dev->persist->curr_port_type[i] = dev->caps.port_type[i + 1]; + dev->persist->curr_port_poss_type[i] = dev->caps. + possible_type[i + 1]; + } + pci_dev_data = priv->pci_dev_data; - /* Disabling SR-IOV is not allowed while there are active vf's */ - if (mlx4_is_master(dev)) { - active_vfs = mlx4_how_many_lives_vf(dev); - if (active_vfs) { - pr_warn("Removing PF when there are active VF's !!\n"); - pr_warn("Will not disable SR-IOV.\n"); - } - } mlx4_stop_sense(dev); mlx4_unregister_device(dev); @@ -3151,12 +3374,6 @@ static void mlx4_unload_one(struct pci_dev *pdev) if (dev->flags & MLX4_FLAG_MSI_X) pci_disable_msix(pdev); - if (dev->flags & MLX4_FLAG_SRIOV && !active_vfs) { - mlx4_warn(dev, "Disabling SR-IOV\n"); - pci_disable_sriov(pdev); - dev->flags &= ~MLX4_FLAG_SRIOV; - dev->num_vfs = 0; - } if (!mlx4_is_slave(dev)) mlx4_free_ownership(dev); @@ -3168,42 +3385,96 @@ static void mlx4_unload_one(struct pci_dev *pdev) kfree(dev->caps.qp1_proxy); kfree(dev->dev_vfs); - memset(priv, 0, sizeof(*priv)); + mlx4_clean_dev(dev); priv->pci_dev_data = pci_dev_data; priv->removed = 1; } static void mlx4_remove_one(struct pci_dev *pdev) { - struct mlx4_dev *dev = pci_get_drvdata(pdev); + struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev); + struct mlx4_dev *dev = persist->dev; struct mlx4_priv *priv = mlx4_priv(dev); + int active_vfs = 0; + + mutex_lock(&persist->interface_state_mutex); + persist->interface_state |= MLX4_INTERFACE_STATE_DELETION; + mutex_unlock(&persist->interface_state_mutex); + + /* Disabling SR-IOV is not allowed while there are active vf's */ + if (mlx4_is_master(dev) && dev->flags & MLX4_FLAG_SRIOV) { + active_vfs = mlx4_how_many_lives_vf(dev); + if (active_vfs) { + pr_warn("Removing PF when there are active VF's !!\n"); + pr_warn("Will not disable SR-IOV.\n"); + } + } + + /* device marked to be under deletion running now without the lock + * letting other tasks to be terminated + */ + if (persist->interface_state & MLX4_INTERFACE_STATE_UP) + mlx4_unload_one(pdev); + else + mlx4_info(dev, "%s: interface is down\n", __func__); + mlx4_catas_end(dev); + if (dev->flags & MLX4_FLAG_SRIOV && !active_vfs) { + mlx4_warn(dev, "Disabling SR-IOV\n"); + pci_disable_sriov(pdev); + } - mlx4_unload_one(pdev); pci_release_regions(pdev); pci_disable_device(pdev); + kfree(dev->persist); kfree(priv); pci_set_drvdata(pdev, NULL); } +static int restore_current_port_types(struct mlx4_dev *dev, + enum mlx4_port_type *types, + enum mlx4_port_type *poss_types) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + int err, i; + + mlx4_stop_sense(dev); + + mutex_lock(&priv->port_mutex); + for (i = 0; i < dev->caps.num_ports; i++) + dev->caps.possible_type[i + 1] = poss_types[i]; + err = mlx4_change_port_types(dev, types); + mlx4_start_sense(dev); + mutex_unlock(&priv->port_mutex); + + return err; +} + int mlx4_restart_one(struct pci_dev *pdev) { - struct mlx4_dev *dev = pci_get_drvdata(pdev); + struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev); + struct mlx4_dev *dev = persist->dev; struct mlx4_priv *priv = mlx4_priv(dev); int nvfs[MLX4_MAX_PORTS + 1] = {0, 0, 0}; int pci_dev_data, err, total_vfs; pci_dev_data = priv->pci_dev_data; - total_vfs = dev->num_vfs; - memcpy(nvfs, dev->nvfs, sizeof(dev->nvfs)); + total_vfs = dev->persist->num_vfs; + memcpy(nvfs, dev->persist->nvfs, sizeof(dev->persist->nvfs)); mlx4_unload_one(pdev); - err = mlx4_load_one(pdev, pci_dev_data, total_vfs, nvfs, priv); + err = mlx4_load_one(pdev, pci_dev_data, total_vfs, nvfs, priv, 1); if (err) { mlx4_err(dev, "%s: ERROR: mlx4_load_one failed, pci_name=%s, err=%d\n", __func__, pci_name(pdev), err); return err; } + err = restore_current_port_types(dev, dev->persist->curr_port_type, + dev->persist->curr_port_poss_type); + if (err) + mlx4_err(dev, "could not restore original port types (%d)\n", + err); + return err; } @@ -3258,23 +3529,79 @@ MODULE_DEVICE_TABLE(pci, mlx4_pci_table); static pci_ers_result_t mlx4_pci_err_detected(struct pci_dev *pdev, pci_channel_state_t state) { - mlx4_unload_one(pdev); + struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev); + + mlx4_err(persist->dev, "mlx4_pci_err_detected was called\n"); + mlx4_enter_error_state(persist); - return state == pci_channel_io_perm_failure ? - PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_NEED_RESET; + mutex_lock(&persist->interface_state_mutex); + if (persist->interface_state & MLX4_INTERFACE_STATE_UP) + mlx4_unload_one(pdev); + + mutex_unlock(&persist->interface_state_mutex); + if (state == pci_channel_io_perm_failure) + return PCI_ERS_RESULT_DISCONNECT; + + pci_disable_device(pdev); + return PCI_ERS_RESULT_NEED_RESET; } static pci_ers_result_t mlx4_pci_slot_reset(struct pci_dev *pdev) { - struct mlx4_dev *dev = pci_get_drvdata(pdev); + struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev); + struct mlx4_dev *dev = persist->dev; struct mlx4_priv *priv = mlx4_priv(dev); int ret; + int nvfs[MLX4_MAX_PORTS + 1] = {0, 0, 0}; + int total_vfs; - ret = __mlx4_init_one(pdev, priv->pci_dev_data, priv); + mlx4_err(dev, "mlx4_pci_slot_reset was called\n"); + ret = pci_enable_device(pdev); + if (ret) { + mlx4_err(dev, "Can not re-enable device, ret=%d\n", ret); + return PCI_ERS_RESULT_DISCONNECT; + } + + pci_set_master(pdev); + pci_restore_state(pdev); + pci_save_state(pdev); + + total_vfs = dev->persist->num_vfs; + memcpy(nvfs, dev->persist->nvfs, sizeof(dev->persist->nvfs)); + + mutex_lock(&persist->interface_state_mutex); + if (!(persist->interface_state & MLX4_INTERFACE_STATE_UP)) { + ret = mlx4_load_one(pdev, priv->pci_dev_data, total_vfs, nvfs, + priv, 1); + if (ret) { + mlx4_err(dev, "%s: mlx4_load_one failed, ret=%d\n", + __func__, ret); + goto end; + } + + ret = restore_current_port_types(dev, dev->persist-> + curr_port_type, dev->persist-> + curr_port_poss_type); + if (ret) + mlx4_err(dev, "could not restore original port types (%d)\n", ret); + } +end: + mutex_unlock(&persist->interface_state_mutex); return ret ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED; } +static void mlx4_shutdown(struct pci_dev *pdev) +{ + struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev); + + mlx4_info(persist->dev, "mlx4_shutdown was called\n"); + mutex_lock(&persist->interface_state_mutex); + if (persist->interface_state & MLX4_INTERFACE_STATE_UP) + mlx4_unload_one(pdev); + mutex_unlock(&persist->interface_state_mutex); +} + static const struct pci_error_handlers mlx4_err_handler = { .error_detected = mlx4_pci_err_detected, .slot_reset = mlx4_pci_slot_reset, @@ -3284,7 +3611,7 @@ static struct pci_driver mlx4_driver = { .name = DRV_NAME, .id_table = mlx4_pci_table, .probe = mlx4_init_one, - .shutdown = mlx4_unload_one, + .shutdown = mlx4_shutdown, .remove = mlx4_remove_one, .err_handler = &mlx4_err_handler, }; @@ -3336,7 +3663,6 @@ static int __init mlx4_init(void) if (mlx4_verify_params()) return -EINVAL; - mlx4_catas_init(); mlx4_wq = create_singlethread_workqueue("mlx4"); if (!mlx4_wq) diff --git a/drivers/net/ethernet/mellanox/mlx4/mcg.c b/drivers/net/ethernet/mellanox/mlx4/mcg.c index a3867e7ef885..bd9ea0d01aae 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mcg.c +++ b/drivers/net/ethernet/mellanox/mlx4/mcg.c @@ -1318,6 +1318,9 @@ out: mutex_unlock(&priv->mcg_table.mutex); mlx4_free_cmd_mailbox(dev, mailbox); + if (err && dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) + /* In case device is under an error, return success as a closing command */ + err = 0; return err; } @@ -1347,6 +1350,9 @@ static int mlx4_QP_ATTACH(struct mlx4_dev *dev, struct mlx4_qp *qp, MLX4_CMD_WRAPPED); mlx4_free_cmd_mailbox(dev, mailbox); + if (err && !attach && + dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) + err = 0; return err; } diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index bdd4eea2247c..1409d0cd6143 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -85,7 +85,9 @@ enum { MLX4_CLR_INT_SIZE = 0x00008, MLX4_SLAVE_COMM_BASE = 0x0, MLX4_COMM_PAGESIZE = 0x1000, - MLX4_CLOCK_SIZE = 0x00008 + MLX4_CLOCK_SIZE = 0x00008, + MLX4_COMM_CHAN_CAPS = 0x8, + MLX4_COMM_CHAN_FLAGS = 0xc }; enum { @@ -120,6 +122,10 @@ enum mlx4_mpt_state { }; #define MLX4_COMM_TIME 10000 +#define MLX4_COMM_OFFLINE_TIME_OUT 30000 +#define MLX4_COMM_CMD_NA_OP 0x0 + + enum { MLX4_COMM_CMD_RESET, MLX4_COMM_CMD_VHCR0, @@ -190,6 +196,7 @@ struct mlx4_vhcr { struct mlx4_vhcr_cmd { __be64 in_param; __be32 in_modifier; + u32 reserved1; __be64 out_param; __be16 token; u16 reserved; @@ -221,21 +228,24 @@ extern int mlx4_debug_level; #define mlx4_dbg(mdev, format, ...) \ do { \ if (mlx4_debug_level) \ - dev_printk(KERN_DEBUG, &(mdev)->pdev->dev, format, \ + dev_printk(KERN_DEBUG, \ + &(mdev)->persist->pdev->dev, format, \ ##__VA_ARGS__); \ } while (0) #define mlx4_err(mdev, format, ...) \ - dev_err(&(mdev)->pdev->dev, format, ##__VA_ARGS__) + dev_err(&(mdev)->persist->pdev->dev, format, ##__VA_ARGS__) #define mlx4_info(mdev, format, ...) \ - dev_info(&(mdev)->pdev->dev, format, ##__VA_ARGS__) + dev_info(&(mdev)->persist->pdev->dev, format, ##__VA_ARGS__) #define mlx4_warn(mdev, format, ...) \ - dev_warn(&(mdev)->pdev->dev, format, ##__VA_ARGS__) + dev_warn(&(mdev)->persist->pdev->dev, format, ##__VA_ARGS__) extern int mlx4_log_num_mgm_entry_size; extern int log_mtts_per_seg; +extern int mlx4_internal_err_reset; -#define MLX4_MAX_NUM_SLAVES (MLX4_MAX_NUM_PF + MLX4_MAX_NUM_VF) +#define MLX4_MAX_NUM_SLAVES (min(MLX4_MAX_NUM_PF + MLX4_MAX_NUM_VF, \ + MLX4_MFUNC_MAX)) #define ALL_SLAVES 0xff struct mlx4_bitmap { @@ -606,7 +616,6 @@ struct mlx4_mgm { struct mlx4_cmd { struct pci_pool *pool; void __iomem *hcr; - struct mutex hcr_mutex; struct mutex slave_cmd_mutex; struct semaphore poll_sem; struct semaphore event_sem; @@ -877,6 +886,8 @@ struct mlx4_priv { int reserved_mtts; int fs_hash_mode; u8 virt2phys_pkey[MLX4_MFUNC_MAX][MLX4_MAX_PORTS][MLX4_MAX_PORT_PKEYS]; + struct mlx4_port_map v2p; /* cached port mapping configuration */ + struct mutex bond_mutex; /* for bond mode */ __be64 slave_node_guids[MLX4_MFUNC_MAX]; atomic_t opreq_count; @@ -994,7 +1005,8 @@ void __mlx4_xrcd_free(struct mlx4_dev *dev, u32 xrcdn); void mlx4_start_catas_poll(struct mlx4_dev *dev); void mlx4_stop_catas_poll(struct mlx4_dev *dev); -void mlx4_catas_init(void); +int mlx4_catas_init(struct mlx4_dev *dev); +void mlx4_catas_end(struct mlx4_dev *dev); int mlx4_restart_one(struct pci_dev *pdev); int mlx4_register_device(struct mlx4_dev *dev); void mlx4_unregister_device(struct mlx4_dev *dev); @@ -1160,13 +1172,14 @@ enum { int mlx4_cmd_init(struct mlx4_dev *dev); void mlx4_cmd_cleanup(struct mlx4_dev *dev, int cleanup_mask); int mlx4_multi_func_init(struct mlx4_dev *dev); +int mlx4_ARM_COMM_CHANNEL(struct mlx4_dev *dev); void mlx4_multi_func_cleanup(struct mlx4_dev *dev); void mlx4_cmd_event(struct mlx4_dev *dev, u16 token, u8 status, u64 out_param); int mlx4_cmd_use_events(struct mlx4_dev *dev); void mlx4_cmd_use_polling(struct mlx4_dev *dev); int mlx4_comm_cmd(struct mlx4_dev *dev, u8 cmd, u16 param, - unsigned long timeout); + u16 op, unsigned long timeout); void mlx4_cq_tasklet_cb(unsigned long data); void mlx4_cq_completion(struct mlx4_dev *dev, u32 cqn); @@ -1176,7 +1189,7 @@ void mlx4_qp_event(struct mlx4_dev *dev, u32 qpn, int event_type); void mlx4_srq_event(struct mlx4_dev *dev, u32 srqn, int event_type); -void mlx4_handle_catas_err(struct mlx4_dev *dev); +void mlx4_enter_error_state(struct mlx4_dev_persistent *persist); int mlx4_SENSE_PORT(struct mlx4_dev *dev, int port, enum mlx4_port_type *type); @@ -1354,6 +1367,7 @@ int mlx4_get_slave_num_gids(struct mlx4_dev *dev, int slave, int port); /* Returns the VF index of slave */ int mlx4_get_vf_indx(struct mlx4_dev *dev, int slave); int mlx4_config_mad_demux(struct mlx4_dev *dev); +int mlx4_do_bond(struct mlx4_dev *dev, bool enable); enum mlx4_zone_flags { MLX4_ZONE_ALLOW_ALLOC_FROM_LOWER_PRIO = 1UL << 0, diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index 944a112dff37..2a8268e6be15 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -390,6 +390,7 @@ struct mlx4_en_dev { struct pci_dev *pdev; struct mutex state_lock; struct net_device *pndev[MLX4_MAX_PORTS + 1]; + struct net_device *upper[MLX4_MAX_PORTS + 1]; u32 port_cnt; bool device_up; struct mlx4_en_profile profile; @@ -410,6 +411,7 @@ struct mlx4_en_dev { unsigned long overflow_period; struct ptp_clock *ptp_clock; struct ptp_clock_info ptp_clock_info; + struct notifier_block nb; }; @@ -845,6 +847,9 @@ int mlx4_en_reset_config(struct net_device *dev, struct hwtstamp_config ts_config, netdev_features_t new_features); +int mlx4_en_netdev_event(struct notifier_block *this, + unsigned long event, void *ptr); + /* * Functions for time stamping */ diff --git a/drivers/net/ethernet/mellanox/mlx4/mr.c b/drivers/net/ethernet/mellanox/mlx4/mr.c index 7094a9c70fd5..78f51e103880 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mr.c +++ b/drivers/net/ethernet/mellanox/mlx4/mr.c @@ -598,14 +598,11 @@ int mlx4_mr_rereg_mem_write(struct mlx4_dev *dev, struct mlx4_mr *mr, if (err) return err; - mpt_entry->start = cpu_to_be64(mr->iova); - mpt_entry->length = cpu_to_be64(mr->size); - mpt_entry->entity_size = cpu_to_be32(mr->mtt.page_shift); - - mpt_entry->pd_flags &= cpu_to_be32(MLX4_MPT_PD_MASK | - MLX4_MPT_PD_FLAG_EN_INV); - mpt_entry->flags &= cpu_to_be32(MLX4_MPT_FLAG_FREE | - MLX4_MPT_FLAG_SW_OWNS); + mpt_entry->start = cpu_to_be64(iova); + mpt_entry->length = cpu_to_be64(size); + mpt_entry->entity_size = cpu_to_be32(page_shift); + mpt_entry->flags &= ~(cpu_to_be32(MLX4_MPT_FLAG_FREE | + MLX4_MPT_FLAG_SW_OWNS)); if (mr->mtt.order < 0) { mpt_entry->flags |= cpu_to_be32(MLX4_MPT_FLAG_PHYSICAL); mpt_entry->mtt_addr = 0; @@ -708,13 +705,13 @@ static int mlx4_write_mtt_chunk(struct mlx4_dev *dev, struct mlx4_mtt *mtt, if (!mtts) return -ENOMEM; - dma_sync_single_for_cpu(&dev->pdev->dev, dma_handle, + dma_sync_single_for_cpu(&dev->persist->pdev->dev, dma_handle, npages * sizeof (u64), DMA_TO_DEVICE); for (i = 0; i < npages; ++i) mtts[i] = cpu_to_be64(page_list[i] | MLX4_MTT_FLAG_PRESENT); - dma_sync_single_for_device(&dev->pdev->dev, dma_handle, + dma_sync_single_for_device(&dev->persist->pdev->dev, dma_handle, npages * sizeof (u64), DMA_TO_DEVICE); return 0; @@ -1020,13 +1017,13 @@ int mlx4_map_phys_fmr(struct mlx4_dev *dev, struct mlx4_fmr *fmr, u64 *page_list /* Make sure MPT status is visible before writing MTT entries */ wmb(); - dma_sync_single_for_cpu(&dev->pdev->dev, fmr->dma_handle, + dma_sync_single_for_cpu(&dev->persist->pdev->dev, fmr->dma_handle, npages * sizeof(u64), DMA_TO_DEVICE); for (i = 0; i < npages; ++i) fmr->mtts[i] = cpu_to_be64(page_list[i] | MLX4_MTT_FLAG_PRESENT); - dma_sync_single_for_device(&dev->pdev->dev, fmr->dma_handle, + dma_sync_single_for_device(&dev->persist->pdev->dev, fmr->dma_handle, npages * sizeof(u64), DMA_TO_DEVICE); fmr->mpt->key = cpu_to_be32(key); @@ -1155,7 +1152,7 @@ EXPORT_SYMBOL_GPL(mlx4_fmr_free); int mlx4_SYNC_TPT(struct mlx4_dev *dev) { - return mlx4_cmd(dev, 0, 0, 0, MLX4_CMD_SYNC_TPT, 1000, - MLX4_CMD_NATIVE); + return mlx4_cmd(dev, 0, 0, 0, MLX4_CMD_SYNC_TPT, + MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); } EXPORT_SYMBOL_GPL(mlx4_SYNC_TPT); diff --git a/drivers/net/ethernet/mellanox/mlx4/pd.c b/drivers/net/ethernet/mellanox/mlx4/pd.c index 74216071201f..609c59dc854e 100644 --- a/drivers/net/ethernet/mellanox/mlx4/pd.c +++ b/drivers/net/ethernet/mellanox/mlx4/pd.c @@ -151,11 +151,13 @@ int mlx4_uar_alloc(struct mlx4_dev *dev, struct mlx4_uar *uar) return -ENOMEM; if (mlx4_is_slave(dev)) - offset = uar->index % ((int) pci_resource_len(dev->pdev, 2) / + offset = uar->index % ((int)pci_resource_len(dev->persist->pdev, + 2) / dev->caps.uar_page_size); else offset = uar->index; - uar->pfn = (pci_resource_start(dev->pdev, 2) >> PAGE_SHIFT) + offset; + uar->pfn = (pci_resource_start(dev->persist->pdev, 2) >> PAGE_SHIFT) + + offset; uar->map = NULL; return 0; } @@ -212,7 +214,6 @@ int mlx4_bf_alloc(struct mlx4_dev *dev, struct mlx4_bf *bf, int node) list_add(&uar->bf_list, &priv->bf_list); } - bf->uar = uar; idx = ffz(uar->free_bf_bmap); uar->free_bf_bmap |= 1 << idx; bf->uar = uar; diff --git a/drivers/net/ethernet/mellanox/mlx4/port.c b/drivers/net/ethernet/mellanox/mlx4/port.c index 30eb1ead0fe6..9f268f05290a 100644 --- a/drivers/net/ethernet/mellanox/mlx4/port.c +++ b/drivers/net/ethernet/mellanox/mlx4/port.c @@ -553,9 +553,9 @@ int mlx4_get_slave_num_gids(struct mlx4_dev *dev, int slave, int port) slaves_pport_actv = mlx4_phys_to_slaves_pport_actv( dev, &exclusive_ports); slave_gid -= bitmap_weight(slaves_pport_actv.slaves, - dev->num_vfs + 1); + dev->persist->num_vfs + 1); } - vfs = bitmap_weight(slaves_pport.slaves, dev->num_vfs + 1) - 1; + vfs = bitmap_weight(slaves_pport.slaves, dev->persist->num_vfs + 1) - 1; if (slave_gid <= ((MLX4_ROCE_MAX_GIDS - MLX4_ROCE_PF_GIDS) % vfs)) return ((MLX4_ROCE_MAX_GIDS - MLX4_ROCE_PF_GIDS) / vfs) + 1; return (MLX4_ROCE_MAX_GIDS - MLX4_ROCE_PF_GIDS) / vfs; @@ -590,10 +590,10 @@ int mlx4_get_base_gid_ix(struct mlx4_dev *dev, int slave, int port) slaves_pport_actv = mlx4_phys_to_slaves_pport_actv( dev, &exclusive_ports); slave_gid -= bitmap_weight(slaves_pport_actv.slaves, - dev->num_vfs + 1); + dev->persist->num_vfs + 1); } gids = MLX4_ROCE_MAX_GIDS - MLX4_ROCE_PF_GIDS; - vfs = bitmap_weight(slaves_pport.slaves, dev->num_vfs + 1) - 1; + vfs = bitmap_weight(slaves_pport.slaves, dev->persist->num_vfs + 1) - 1; if (slave_gid <= gids % vfs) return MLX4_ROCE_PF_GIDS + ((gids / vfs) + 1) * (slave_gid - 1); @@ -644,7 +644,7 @@ void mlx4_reset_roce_gids(struct mlx4_dev *dev, int slave) int num_eth_ports, err; int i; - if (slave < 0 || slave > dev->num_vfs) + if (slave < 0 || slave > dev->persist->num_vfs) return; actv_ports = mlx4_get_active_ports(dev, slave); @@ -1214,7 +1214,8 @@ int mlx4_get_slave_from_roce_gid(struct mlx4_dev *dev, int port, u8 *gid, return -EINVAL; slaves_pport = mlx4_phys_to_slaves_pport(dev, port); - num_vfs = bitmap_weight(slaves_pport.slaves, dev->num_vfs + 1) - 1; + num_vfs = bitmap_weight(slaves_pport.slaves, + dev->persist->num_vfs + 1) - 1; for (i = 0; i < MLX4_ROCE_MAX_GIDS; i++) { if (!memcmp(priv->port[port].gid_table.roce_gids[i].raw, gid, @@ -1258,7 +1259,7 @@ int mlx4_get_slave_from_roce_gid(struct mlx4_dev *dev, int port, u8 *gid, dev, &exclusive_ports); num_vfs_before += bitmap_weight( slaves_pport_actv.slaves, - dev->num_vfs + 1); + dev->persist->num_vfs + 1); } /* candidate_slave_gid isn't necessarily the correct slave, but @@ -1288,7 +1289,7 @@ int mlx4_get_slave_from_roce_gid(struct mlx4_dev *dev, int port, u8 *gid, dev, &exclusive_ports); slave_gid += bitmap_weight( slaves_pport_actv.slaves, - dev->num_vfs + 1); + dev->persist->num_vfs + 1); } } *slave_id = slave_gid; diff --git a/drivers/net/ethernet/mellanox/mlx4/qp.c b/drivers/net/ethernet/mellanox/mlx4/qp.c index 1586ecce13c7..2bb8553bd905 100644 --- a/drivers/net/ethernet/mellanox/mlx4/qp.c +++ b/drivers/net/ethernet/mellanox/mlx4/qp.c @@ -882,6 +882,8 @@ int mlx4_qp_to_ready(struct mlx4_dev *dev, struct mlx4_mtt *mtt, for (i = 0; i < ARRAY_SIZE(states) - 1; i++) { context->flags &= cpu_to_be32(~(0xf << 28)); context->flags |= cpu_to_be32(states[i + 1] << 28); + if (states[i + 1] != MLX4_QP_STATE_RTR) + context->params2 &= ~MLX4_QP_BIT_FPP; err = mlx4_qp_modify(dev, mtt, states[i], states[i + 1], context, 0, 0, qp); if (err) { diff --git a/drivers/net/ethernet/mellanox/mlx4/reset.c b/drivers/net/ethernet/mellanox/mlx4/reset.c index ea1c6d092145..0076d88587ca 100644 --- a/drivers/net/ethernet/mellanox/mlx4/reset.c +++ b/drivers/net/ethernet/mellanox/mlx4/reset.c @@ -76,19 +76,21 @@ int mlx4_reset(struct mlx4_dev *dev) goto out; } - pcie_cap = pci_pcie_cap(dev->pdev); + pcie_cap = pci_pcie_cap(dev->persist->pdev); for (i = 0; i < 64; ++i) { if (i == 22 || i == 23) continue; - if (pci_read_config_dword(dev->pdev, i * 4, hca_header + i)) { + if (pci_read_config_dword(dev->persist->pdev, i * 4, + hca_header + i)) { err = -ENODEV; mlx4_err(dev, "Couldn't save HCA PCI header, aborting\n"); goto out; } } - reset = ioremap(pci_resource_start(dev->pdev, 0) + MLX4_RESET_BASE, + reset = ioremap(pci_resource_start(dev->persist->pdev, 0) + + MLX4_RESET_BASE, MLX4_RESET_SIZE); if (!reset) { err = -ENOMEM; @@ -122,8 +124,8 @@ int mlx4_reset(struct mlx4_dev *dev) end = jiffies + MLX4_RESET_TIMEOUT_JIFFIES; do { - if (!pci_read_config_word(dev->pdev, PCI_VENDOR_ID, &vendor) && - vendor != 0xffff) + if (!pci_read_config_word(dev->persist->pdev, PCI_VENDOR_ID, + &vendor) && vendor != 0xffff) break; msleep(1); @@ -138,14 +140,16 @@ int mlx4_reset(struct mlx4_dev *dev) /* Now restore the PCI headers */ if (pcie_cap) { devctl = hca_header[(pcie_cap + PCI_EXP_DEVCTL) / 4]; - if (pcie_capability_write_word(dev->pdev, PCI_EXP_DEVCTL, + if (pcie_capability_write_word(dev->persist->pdev, + PCI_EXP_DEVCTL, devctl)) { err = -ENODEV; mlx4_err(dev, "Couldn't restore HCA PCI Express Device Control register, aborting\n"); goto out; } linkctl = hca_header[(pcie_cap + PCI_EXP_LNKCTL) / 4]; - if (pcie_capability_write_word(dev->pdev, PCI_EXP_LNKCTL, + if (pcie_capability_write_word(dev->persist->pdev, + PCI_EXP_LNKCTL, linkctl)) { err = -ENODEV; mlx4_err(dev, "Couldn't restore HCA PCI Express Link control register, aborting\n"); @@ -157,7 +161,8 @@ int mlx4_reset(struct mlx4_dev *dev) if (i * 4 == PCI_COMMAND) continue; - if (pci_write_config_dword(dev->pdev, i * 4, hca_header[i])) { + if (pci_write_config_dword(dev->persist->pdev, i * 4, + hca_header[i])) { err = -ENODEV; mlx4_err(dev, "Couldn't restore HCA reg %x, aborting\n", i); @@ -165,7 +170,7 @@ int mlx4_reset(struct mlx4_dev *dev) } } - if (pci_write_config_dword(dev->pdev, PCI_COMMAND, + if (pci_write_config_dword(dev->persist->pdev, PCI_COMMAND, hca_header[PCI_COMMAND / 4])) { err = -ENODEV; mlx4_err(dev, "Couldn't restore HCA COMMAND, aborting\n"); diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index 4efbd1eca611..486e3d26cd4a 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -309,12 +309,13 @@ static inline int mlx4_grant_resource(struct mlx4_dev *dev, int slave, int allocated, free, reserved, guaranteed, from_free; int from_rsvd; - if (slave > dev->num_vfs) + if (slave > dev->persist->num_vfs) return -EINVAL; spin_lock(&res_alloc->alloc_lock); allocated = (port > 0) ? - res_alloc->allocated[(port - 1) * (dev->num_vfs + 1) + slave] : + res_alloc->allocated[(port - 1) * + (dev->persist->num_vfs + 1) + slave] : res_alloc->allocated[slave]; free = (port > 0) ? res_alloc->res_port_free[port - 1] : res_alloc->res_free; @@ -352,7 +353,8 @@ static inline int mlx4_grant_resource(struct mlx4_dev *dev, int slave, if (!err) { /* grant the request */ if (port > 0) { - res_alloc->allocated[(port - 1) * (dev->num_vfs + 1) + slave] += count; + res_alloc->allocated[(port - 1) * + (dev->persist->num_vfs + 1) + slave] += count; res_alloc->res_port_free[port - 1] -= count; res_alloc->res_port_rsvd[port - 1] -= from_rsvd; } else { @@ -376,13 +378,14 @@ static inline void mlx4_release_resource(struct mlx4_dev *dev, int slave, &priv->mfunc.master.res_tracker.res_alloc[res_type]; int allocated, guaranteed, from_rsvd; - if (slave > dev->num_vfs) + if (slave > dev->persist->num_vfs) return; spin_lock(&res_alloc->alloc_lock); allocated = (port > 0) ? - res_alloc->allocated[(port - 1) * (dev->num_vfs + 1) + slave] : + res_alloc->allocated[(port - 1) * + (dev->persist->num_vfs + 1) + slave] : res_alloc->allocated[slave]; guaranteed = res_alloc->guaranteed[slave]; @@ -397,7 +400,8 @@ static inline void mlx4_release_resource(struct mlx4_dev *dev, int slave, } if (port > 0) { - res_alloc->allocated[(port - 1) * (dev->num_vfs + 1) + slave] -= count; + res_alloc->allocated[(port - 1) * + (dev->persist->num_vfs + 1) + slave] -= count; res_alloc->res_port_free[port - 1] += count; res_alloc->res_port_rsvd[port - 1] += from_rsvd; } else { @@ -415,7 +419,8 @@ static inline void initialize_res_quotas(struct mlx4_dev *dev, enum mlx4_resource res_type, int vf, int num_instances) { - res_alloc->guaranteed[vf] = num_instances / (2 * (dev->num_vfs + 1)); + res_alloc->guaranteed[vf] = num_instances / + (2 * (dev->persist->num_vfs + 1)); res_alloc->quota[vf] = (num_instances / 2) + res_alloc->guaranteed[vf]; if (vf == mlx4_master_func_num(dev)) { res_alloc->res_free = num_instances; @@ -486,21 +491,26 @@ int mlx4_init_resource_tracker(struct mlx4_dev *dev) for (i = 0; i < MLX4_NUM_OF_RESOURCE_TYPE; i++) { struct resource_allocator *res_alloc = &priv->mfunc.master.res_tracker.res_alloc[i]; - res_alloc->quota = kmalloc((dev->num_vfs + 1) * sizeof(int), GFP_KERNEL); - res_alloc->guaranteed = kmalloc((dev->num_vfs + 1) * sizeof(int), GFP_KERNEL); + res_alloc->quota = kmalloc((dev->persist->num_vfs + 1) * + sizeof(int), GFP_KERNEL); + res_alloc->guaranteed = kmalloc((dev->persist->num_vfs + 1) * + sizeof(int), GFP_KERNEL); if (i == RES_MAC || i == RES_VLAN) res_alloc->allocated = kzalloc(MLX4_MAX_PORTS * - (dev->num_vfs + 1) * sizeof(int), - GFP_KERNEL); + (dev->persist->num_vfs + + 1) * + sizeof(int), GFP_KERNEL); else - res_alloc->allocated = kzalloc((dev->num_vfs + 1) * sizeof(int), GFP_KERNEL); + res_alloc->allocated = kzalloc((dev->persist-> + num_vfs + 1) * + sizeof(int), GFP_KERNEL); if (!res_alloc->quota || !res_alloc->guaranteed || !res_alloc->allocated) goto no_mem_err; spin_lock_init(&res_alloc->alloc_lock); - for (t = 0; t < dev->num_vfs + 1; t++) { + for (t = 0; t < dev->persist->num_vfs + 1; t++) { struct mlx4_active_ports actv_ports = mlx4_get_active_ports(dev, t); switch (i) { @@ -2531,7 +2541,7 @@ int mlx4_SW2HW_MPT_wrapper(struct mlx4_dev *dev, int slave, /* Make sure that the PD bits related to the slave id are zeros. */ pd = mr_get_pd(inbox->buf); pd_slave = (pd >> 17) & 0x7f; - if (pd_slave != 0 && pd_slave != slave) { + if (pd_slave != 0 && --pd_slave != slave) { err = -EPERM; goto ex_abort; } @@ -2934,6 +2944,9 @@ static int verify_qp_parameters(struct mlx4_dev *dev, qp_type = (be32_to_cpu(qp_ctx->flags) >> 16) & 0xff; optpar = be32_to_cpu(*(__be32 *) inbox->buf); + if (slave != mlx4_master_func_num(dev)) + qp_ctx->params2 &= ~MLX4_QP_BIT_FPP; + switch (qp_type) { case MLX4_QP_ST_RC: case MLX4_QP_ST_XRC: @@ -4667,7 +4680,6 @@ static void rem_slave_eqs(struct mlx4_dev *dev, int slave) int state; LIST_HEAD(tlist); int eqn; - struct mlx4_cmd_mailbox *mailbox; err = move_all_busy(dev, slave, RES_EQ); if (err) @@ -4693,20 +4705,13 @@ static void rem_slave_eqs(struct mlx4_dev *dev, int slave) break; case RES_EQ_HW: - mailbox = mlx4_alloc_cmd_mailbox(dev); - if (IS_ERR(mailbox)) { - cond_resched(); - continue; - } - err = mlx4_cmd_box(dev, slave, 0, - eqn & 0xff, 0, - MLX4_CMD_HW2SW_EQ, - MLX4_CMD_TIME_CLASS_A, - MLX4_CMD_NATIVE); + err = mlx4_cmd(dev, slave, eqn & 0xff, + 1, MLX4_CMD_HW2SW_EQ, + MLX4_CMD_TIME_CLASS_A, + MLX4_CMD_NATIVE); if (err) mlx4_dbg(dev, "rem_slave_eqs: failed to move slave %d eqs %d to SW ownership\n", slave, eqn); - mlx4_free_cmd_mailbox(dev, mailbox); atomic_dec(&eq->mtt->ref_count); state = RES_EQ_RESERVED; break; |